|
|
|
/*******************************************************************************
|
|
|
|
Copyright (c) 2015-2022 NVIDIA Corporation
|
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
of this software and associated documentation files (the "Software"), to
|
|
|
|
deal in the Software without restriction, including without limitation the
|
|
|
|
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
|
|
sell copies of the Software, and to permit persons to whom the Software is
|
|
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
|
|
|
|
The above copyright notice and this permission notice shall be
|
|
|
|
included in all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
DEALINGS IN THE SOFTWARE.
|
|
|
|
|
|
|
|
*******************************************************************************/
|
|
|
|
|
|
|
|
#ifndef __UVM_HAL_H__
|
|
|
|
#define __UVM_HAL_H__
|
|
|
|
|
|
|
|
#include "uvm_types.h"
|
|
|
|
#include "uvm_common.h"
|
|
|
|
#include "uvm_forward_decl.h"
|
|
|
|
#include "uvm_hal_types.h"
|
|
|
|
#include "uvm_push.h"
|
|
|
|
#include "uvm_gpu.h"
|
|
|
|
#include "uvm_test_ioctl.h"
|
|
|
|
|
|
|
|
// A dummy method validation that always returns true; it can be used to skip
|
|
|
|
// CE/Host/SW method validations for a given architecture
|
|
|
|
bool uvm_hal_method_validate_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
|
|
|
|
|
|
|
typedef void (*uvm_hal_init_t)(uvm_push_t *push);
|
|
|
|
void uvm_hal_maxwell_ce_init(uvm_push_t *push);
|
|
|
|
void uvm_hal_maxwell_host_init_noop(uvm_push_t *push);
|
|
|
|
void uvm_hal_pascal_host_init(uvm_push_t *push);
|
|
|
|
|
|
|
|
// Host method validation
|
|
|
|
typedef bool (*uvm_hal_host_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
|
|
|
bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
|
|
|
|
|
|
|
// SW method validation
|
|
|
|
typedef bool (*uvm_hal_host_sw_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
|
|
|
bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
|
|
|
|
|
|
|
// Wait for idle
|
|
|
|
typedef void (*uvm_hal_wait_for_idle_t)(uvm_push_t *push);
|
|
|
|
void uvm_hal_maxwell_host_wait_for_idle(uvm_push_t *push);
|
|
|
|
|
|
|
|
// Membar SYS
|
|
|
|
typedef void (*uvm_hal_membar_sys_t)(uvm_push_t *push);
|
|
|
|
void uvm_hal_maxwell_host_membar_sys(uvm_push_t *push);
|
|
|
|
void uvm_hal_pascal_host_membar_sys(uvm_push_t *push);
|
|
|
|
|
|
|
|
// Membar GPU
|
|
|
|
typedef void (*uvm_hal_membar_gpu_t)(uvm_push_t *push);
|
|
|
|
void uvm_hal_pascal_host_membar_gpu(uvm_push_t *push);
|
|
|
|
|
|
|
|
// Put a noop in the pushbuffer of the given size in bytes.
|
|
|
|
// The size needs to be a multiple of 4.
|
|
|
|
typedef void (*uvm_hal_noop_t)(uvm_push_t *push, NvU32 size);
|
|
|
|
void uvm_hal_maxwell_host_noop(uvm_push_t *push, NvU32 size);
|
|
|
|
|
|
|
|
// Host-generated interrupt method. This will generate a call to
|
|
|
|
// uvm_isr_top_half_entry.
|
|
|
|
//
|
|
|
|
// This is a non-stalling interrupt, which means that it's fire-and-forget. Host
|
|
|
|
// will not stall method processing nor stop channel switching, which means that
|
|
|
|
// we cannot directly identify in software which channel generated the
|
|
|
|
// interrupt.
|
|
|
|
//
|
|
|
|
// We must set up software state before pushing the interrupt, and check any
|
|
|
|
// possible interrupt condition on receiving an interrupt callback.
|
|
|
|
typedef void (*uvm_hal_interrupt_t)(uvm_push_t *push);
|
|
|
|
void uvm_hal_maxwell_host_interrupt(uvm_push_t *push);
|
|
|
|
|
|
|
|
// Issue a TLB invalidate applying to all VAs in a PDB.
|
|
|
|
//
|
|
|
|
// The PTE caches (TLBs) are always invalidated. The PDE caches for all VAs in
|
|
|
|
// the PDB are invalidated from the specified depth down to the PTEs. This
|
|
|
|
// allows for optimizations if the caller isn't writing all levels of the PDEs.
|
|
|
|
// Depth follows the MMU code convention where depth 0 is the top level and here
|
|
|
|
// means to invalidate everything. See uvm_pascal_mmu.c for an example of depth
|
|
|
|
// mapping to HW PDE levels. Notably 2M PTEs are considered PDEs as far as the
|
|
|
|
// TLBs are concerned and hence on Pascal the depth needs to be at most 3 for
|
|
|
|
// them to be included in the invalidation.
|
|
|
|
//
|
|
|
|
// If the membar parameter is not UVM_MEMBAR_NONE, the specified membar is
|
|
|
|
// performed logically after the TLB invalidate such that all physical memory
|
|
|
|
// accesses using the old translations are ordered to the scope of the membar.
|
|
|
|
typedef void (*uvm_hal_host_tlb_invalidate_all_t)(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
NvU32 depth,
|
|
|
|
uvm_membar_t membar);
|
|
|
|
void uvm_hal_maxwell_host_tlb_invalidate_all_a16f(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
NvU32 depth,
|
|
|
|
uvm_membar_t membar);
|
|
|
|
void uvm_hal_maxwell_host_tlb_invalidate_all_b06f(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
NvU32 depth,
|
|
|
|
uvm_membar_t membar);
|
|
|
|
void uvm_hal_pascal_host_tlb_invalidate_all(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
NvU32 depth,
|
|
|
|
uvm_membar_t membar);
|
|
|
|
void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
NvU32 depth,
|
|
|
|
uvm_membar_t membar);
|
|
|
|
void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
NvU32 depth,
|
|
|
|
uvm_membar_t membar);
|
|
|
|
|
|
|
|
// Issue a TLB invalidate applying to the specified VA range in a PDB.
|
|
|
|
//
|
|
|
|
// The PTE caches (TLBs) for each page size aligned VA within the VA range
|
|
|
|
// are always invalidated. The PDE caches covering the specified VA
|
|
|
|
// range in the PDB are invalidated from the specified depth down to the PTEs.
|
|
|
|
// Specifying the depth allows for optimizations if the caller isn't writing all
|
|
|
|
// levels of the PDEs. Specifying the page size allows for optimizations if
|
|
|
|
// the caller can guarantee caches for smaller page sizes don't need to be
|
|
|
|
// invalidated.
|
|
|
|
//
|
|
|
|
// Depth follows the MMU code convention where depth 0 is the top level and here
|
|
|
|
// means to invalidate all levels. See uvm_pascal_mmu.c for an example of depth
|
|
|
|
// mapping to HW PDE levels. Notably 2M PTEs are considered PDEs as far as the
|
|
|
|
// TLBs are concerned and hence on Pascal the depth needs to be at most 3 for
|
|
|
|
// them to be included in the invalidation.
|
|
|
|
//
|
|
|
|
// If the membar parameter is not UVM_MEMBAR_NONE, the specified membar is
|
|
|
|
// performed logically after the TLB invalidate such that all physical memory
|
|
|
|
// accesses using the old translations are ordered to the scope of the membar.
|
|
|
|
//
|
|
|
|
// Note that this can end up pushing a lot of methods for big ranges so it's
|
|
|
|
// better not to use it directly. Instead, uvm_tlb_batch* APIs should be used
|
|
|
|
// that automatically switch between targeted VA invalidates and invalidate all.
|
|
|
|
typedef void (*uvm_hal_host_tlb_invalidate_va_t)(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
NvU32 depth,
|
|
|
|
NvU64 base,
|
|
|
|
NvU64 size,
|
|
|
|
NvU32 page_size,
|
|
|
|
uvm_membar_t membar);
|
|
|
|
void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
NvU32 depth,
|
|
|
|
NvU64 base,
|
|
|
|
NvU64 size,
|
|
|
|
NvU32 page_size,
|
|
|
|
uvm_membar_t membar);
|
|
|
|
void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
NvU32 depth,
|
|
|
|
NvU64 base,
|
|
|
|
NvU64 size,
|
|
|
|
NvU32 page_size,
|
|
|
|
uvm_membar_t membar);
|
|
|
|
void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
NvU32 depth,
|
|
|
|
NvU64 base,
|
|
|
|
NvU64 size,
|
|
|
|
NvU32 page_size,
|
|
|
|
uvm_membar_t membar);
|
|
|
|
void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
NvU32 depth,
|
|
|
|
NvU64 base,
|
|
|
|
NvU64 size,
|
|
|
|
NvU32 page_size,
|
|
|
|
uvm_membar_t membar);
|
|
|
|
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
NvU32 depth,
|
|
|
|
NvU64 base,
|
|
|
|
NvU64 size,
|
|
|
|
NvU32 page_size,
|
|
|
|
uvm_membar_t membar);
|
|
|
|
|
|
|
|
typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
|
|
|
void uvm_hal_maxwell_host_tlb_invalidate_test(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
|
|
|
void uvm_hal_pascal_host_tlb_invalidate_test(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
|
|
|
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
|
|
|
void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
|
|
|
|
|
|
|
// By default all semaphore release operations include a membar sys before the
|
|
|
|
// operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
|
|
|
|
// uvm_push_set_flag().
|
|
|
|
typedef void (*uvm_hal_semaphore_release_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
|
|
|
|
// Release a semaphore including a timestamp at the specific GPU VA.
|
|
|
|
//
|
|
|
|
// This operation writes 16 bytes of memory and the VA needs to be 16-byte
|
|
|
|
// aligned. The value of the released payload is unspecified and shouldn't be
|
|
|
|
// relied on, only the timestamp should be of interest.
|
|
|
|
typedef void (*uvm_hal_semaphore_timestamp_t)(uvm_push_t *push, NvU64 gpu_va);
|
|
|
|
void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
|
|
|
void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
|
|
|
void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
|
|
|
|
|
|
|
void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
|
|
|
void uvm_hal_volta_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
|
|
|
void uvm_hal_hopper_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
|
|
|
|
|
|
|
typedef void (*uvm_hal_semaphore_acquire_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
void uvm_hal_turing_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
|
|
|
|
typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
|
|
|
void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
|
|
|
void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
|
|
|
void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
|
|
|
|
|
|
|
|
typedef void (*uvm_hal_host_set_gpfifo_noop_t)(NvU64 *fifo_entry);
|
|
|
|
void uvm_hal_maxwell_host_set_gpfifo_noop(NvU64 *fifo_entry);
|
|
|
|
|
|
|
|
typedef void (*uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va);
|
|
|
|
void uvm_hal_maxwell_host_set_gpfifo_pushbuffer_segment_base_unsupported(NvU64 *fifo_entry, NvU64 pushbuffer_va);
|
|
|
|
void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, NvU64 pushbuffer_va);
|
|
|
|
|
|
|
|
typedef void (*uvm_hal_host_write_gpu_put_t)(uvm_channel_t *channel, NvU32 gpu_put);
|
|
|
|
void uvm_hal_maxwell_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put);
|
|
|
|
void uvm_hal_volta_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put);
|
|
|
|
|
|
|
|
// Return the current GPU time in nanoseconds
|
|
|
|
typedef NvU64 (*uvm_hal_get_time_t)(uvm_gpu_t *gpu);
|
|
|
|
NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu);
|
|
|
|
|
|
|
|
// Internal helpers used by the CE hal
|
|
|
|
// Used to handle the offset encoding differences between architectures
|
|
|
|
typedef void (*uvm_hal_ce_offset_out_t)(uvm_push_t *push, NvU64 offset);
|
|
|
|
void uvm_hal_maxwell_ce_offset_out(uvm_push_t *push, NvU64 offset);
|
|
|
|
void uvm_hal_pascal_ce_offset_out(uvm_push_t *push, NvU64 offset);
|
|
|
|
void uvm_hal_hopper_ce_offset_out(uvm_push_t *push, NvU64 offset);
|
|
|
|
|
|
|
|
typedef void (*uvm_hal_ce_offset_in_out_t)(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
|
|
|
|
void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
|
|
|
|
void uvm_hal_pascal_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
|
|
|
|
void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
|
|
|
|
|
|
|
|
typedef NvU32 (*uvm_hal_ce_phys_mode_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
|
|
|
NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
|
|
|
NvU32 uvm_hal_ampere_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
|
|
|
|
|
|
|
typedef NvU32 (*uvm_hal_ce_plc_mode_t)(void);
|
|
|
|
NvU32 uvm_hal_maxwell_ce_plc_mode(void);
|
|
|
|
NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void);
|
|
|
|
|
|
|
|
// CE method validation
|
|
|
|
typedef bool (*uvm_hal_ce_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
|
|
|
bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
|
|
|
|
|
|
|
// Memcopy validation.
|
|
|
|
// The validation happens at the start of the memcopy (uvm_hal_memcopy_t)
|
|
|
|
// execution. Use uvm_hal_ce_memcopy_validate_stub to skip the validation for
|
|
|
|
// a given architecture.
|
|
|
|
typedef bool (*uvm_hal_ce_memcopy_validate)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
|
|
|
bool uvm_hal_ce_memcopy_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
|
|
|
bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
|
|
|
|
|
|
|
// Patching of the memcopy source; if not needed for a given architecture use
|
|
|
|
// the (empty) uvm_hal_ce_memcopy_patch_src_stub implementation
|
|
|
|
typedef void (*uvm_hal_ce_memcopy_patch_src)(uvm_push_t *push, uvm_gpu_address_t *src);
|
|
|
|
void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src);
|
|
|
|
void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src);
|
|
|
|
|
|
|
|
// Memcopy size bytes from src to dst.
|
|
|
|
//
|
|
|
|
// By default all CE transfer operations include a membar sys after the
|
|
|
|
// operation and are not pipelined. This can be affected by using
|
|
|
|
// UVM_PUSH_FLAG_NEXT_CE_* flags with uvm_push_set_flag().
|
|
|
|
typedef void (*uvm_hal_memcopy_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
|
|
|
|
void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
|
|
|
|
|
|
|
|
// Simple wrapper for uvm_hal_memcopy_t with both addresses being virtual
|
|
|
|
typedef void (*uvm_hal_memcopy_v_to_v_t)(uvm_push_t *push, NvU64 dst, NvU64 src, size_t size);
|
|
|
|
void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst, NvU64 src, size_t size);
|
|
|
|
|
|
|
|
// Memset validation.
|
|
|
|
// The validation happens at the start of the memset (uvm_hal_memset_*_t)
|
|
|
|
// execution. Use uvm_hal_ce_memset_validate_stub to skip the validation for
|
|
|
|
// a given architecture.
|
|
|
|
typedef bool (*uvm_hal_ce_memset_validate)(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
|
|
|
bool uvm_hal_ce_memset_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
|
|
|
bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
|
|
|
|
|
|
|
// Memset size bytes at dst to a given N-byte input value.
|
|
|
|
//
|
|
|
|
// Size has to be a multiple of the element size. For example, the size passed
|
|
|
|
// to uvm_hal_memset_4_t must be a multiple of 4 bytes.
|
|
|
|
//
|
|
|
|
// By default all CE transfer operations include a membar sys after the
|
|
|
|
// operation and are not pipelined. This can be affected by using
|
|
|
|
// UVM_PUSH_FLAG_NEXT_CE_* flags with uvm_push_set_flag().
|
|
|
|
typedef void (*uvm_hal_memset_1_t)(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
|
|
|
|
typedef void (*uvm_hal_memset_4_t)(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
|
|
|
|
typedef void (*uvm_hal_memset_8_t)(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
|
|
|
|
|
|
|
// Simple wrapper for uvm_hal_memset_4_t with the address being virtual.
|
|
|
|
typedef void (*uvm_hal_memset_v_4_t)(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);
|
|
|
|
|
|
|
|
void uvm_hal_maxwell_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
|
|
|
|
void uvm_hal_maxwell_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
|
|
|
|
void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
|
|
|
void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);
|
|
|
|
|
|
|
|
void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
|
|
|
|
void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
|
|
|
|
void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
|
|
|
|
|
|
|
// Increments the semaphore by 1, or resets to 0 if the incremented value would
|
|
|
|
// exceed the payload.
|
|
|
|
//
|
|
|
|
// By default all CE semaphore operations include a membar sys before the
|
|
|
|
// semaphore operation. This can be affected by using UVM_PUSH_FLAG_NEXT_CE_*
|
|
|
|
// flags with uvm_push_set_flag().
|
|
|
|
typedef void (*uvm_hal_semaphore_reduction_inc_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
|
|
|
|
|
|
|
// Initialize GPU architecture dependent properties
|
|
|
|
typedef void (*uvm_hal_arch_init_properties_t)(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
|
|
|
|
// Retrieve the page-tree HAL for a given big page size
|
|
|
|
typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size);
|
|
|
|
typedef void (*uvm_hal_mmu_enable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
typedef void (*uvm_hal_mmu_disable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU32 big_page_size);
|
|
|
|
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size);
|
|
|
|
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size);
|
|
|
|
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size);
|
|
|
|
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size);
|
|
|
|
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size);
|
|
|
|
void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_pascal_mmu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
|
|
|
|
// Convert a faulted MMU engine ID to a UVM engine type. Only engines which have
|
|
|
|
// faults serviced by UVM are handled. On Pascal the only such engine is
|
|
|
|
// GRAPHICS, so no translation is provided.
|
|
|
|
typedef uvm_mmu_engine_type_t (*uvm_hal_mmu_engine_id_to_type_t)(NvU16 mmu_engine_id);
|
|
|
|
uvm_mmu_engine_type_t uvm_hal_maxwell_mmu_engine_id_to_type_unsupported(NvU16 mmu_engine_id);
|
|
|
|
uvm_mmu_engine_type_t uvm_hal_volta_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
|
|
|
uvm_mmu_engine_type_t uvm_hal_turing_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
|
|
|
uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
|
|
|
uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id);
|
|
|
|
|
|
|
|
typedef NvU16 (*uvm_hal_mmu_client_id_to_utlb_id_t)(NvU16 client_id);
|
|
|
|
NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id);
|
|
|
|
NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id);
|
|
|
|
NvU16 uvm_hal_volta_mmu_client_id_to_utlb_id(NvU16 client_id);
|
|
|
|
NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id);
|
|
|
|
NvU16 uvm_hal_hopper_mmu_client_id_to_utlb_id(NvU16 client_id);
|
|
|
|
|
|
|
|
// Replayable faults
|
|
|
|
typedef void (*uvm_hal_enable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
typedef void (*uvm_hal_disable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
typedef void (*uvm_hal_clear_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
|
|
|
typedef NvU32 (*uvm_hal_fault_buffer_read_put_t)(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
typedef NvU32 (*uvm_hal_fault_buffer_read_get_t)(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
typedef void (*uvm_hal_fault_buffer_write_get_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
|
|
|
typedef NvU8 (*uvm_hal_fault_buffer_get_ve_id_t)(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
|
|
|
|
|
|
|
|
// Parse the entry on the given buffer index. This also clears the valid bit of
|
|
|
|
// the entry in the buffer.
|
|
|
|
typedef void (*uvm_hal_fault_buffer_parse_entry_t)(uvm_parent_gpu_t *gpu,
|
|
|
|
NvU32 index,
|
|
|
|
uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
typedef bool (*uvm_hal_fault_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
typedef void (*uvm_hal_fault_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
typedef NvU32 (*uvm_hal_fault_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
typedef void (*uvm_hal_fault_buffer_replay_t)(uvm_push_t *push, uvm_fault_replay_type_t type);
|
|
|
|
typedef void (*uvm_hal_fault_cancel_global_t)(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr);
|
|
|
|
typedef void (*uvm_hal_fault_cancel_targeted_t)(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t instance_ptr,
|
|
|
|
NvU32 gpc_id,
|
|
|
|
NvU32 client_id);
|
|
|
|
|
|
|
|
void uvm_hal_maxwell_enable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_maxwell_disable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_maxwell_clear_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
|
|
|
NvU32 uvm_hal_maxwell_fault_buffer_read_put_unsupported(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
NvU32 uvm_hal_maxwell_fault_buffer_read_get_unsupported(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_maxwell_fault_buffer_write_get_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
|
|
|
|
void uvm_hal_maxwell_fault_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
|
|
|
|
NvU32 index,
|
|
|
|
uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
void uvm_hal_pascal_enable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_pascal_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_pascal_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
|
|
|
NvU32 uvm_hal_pascal_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
NvU32 uvm_hal_pascal_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_pascal_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
void uvm_hal_pascal_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
|
|
|
|
NvU32 index,
|
|
|
|
uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
NvU8 uvm_hal_volta_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
|
|
|
|
void uvm_hal_volta_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
|
|
|
|
NvU32 index,
|
|
|
|
uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
|
|
|
NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
|
|
|
|
|
|
|
|
bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
NvU32 uvm_hal_maxwell_fault_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
bool uvm_hal_pascal_fault_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
void uvm_hal_pascal_fault_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
NvU32 uvm_hal_pascal_fault_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
|
|
|
|
typedef void (*uvm_hal_fault_buffer_parse_non_replayable_entry_t)(uvm_parent_gpu_t *parent_gpu,
|
|
|
|
void *fault_packet,
|
|
|
|
uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
void uvm_hal_maxwell_fault_buffer_parse_non_replayable_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
|
|
|
|
void *fault_packet,
|
|
|
|
uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
void uvm_hal_volta_fault_buffer_parse_non_replayable_entry(uvm_parent_gpu_t *parent_gpu,
|
|
|
|
void *fault_packet,
|
|
|
|
uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
|
|
|
|
void uvm_hal_maxwell_cancel_faults_global_unsupported(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr);
|
|
|
|
void uvm_hal_pascal_cancel_faults_global(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr);
|
|
|
|
|
|
|
|
// Trigger fault replay on the GPU where the given pushbuffer is located.
|
|
|
|
void uvm_hal_maxwell_replay_faults_unsupported(uvm_push_t *push, uvm_fault_replay_type_t type);
|
|
|
|
void uvm_hal_maxwell_cancel_faults_targeted_unsupported(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t instance_ptr,
|
|
|
|
NvU32 gpc_id,
|
|
|
|
NvU32 client_id);
|
|
|
|
void uvm_hal_pascal_replay_faults(uvm_push_t *push, uvm_fault_replay_type_t type);
|
|
|
|
void uvm_hal_pascal_cancel_faults_targeted(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t instance_ptr,
|
|
|
|
NvU32 gpc_id,
|
|
|
|
NvU32 client_id);
|
|
|
|
|
|
|
|
typedef void (*uvm_hal_fault_cancel_va_t)(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
const uvm_fault_buffer_entry_t *fault_entry,
|
|
|
|
uvm_fault_cancel_va_mode_t cancel_va_mode);
|
|
|
|
|
|
|
|
void uvm_hal_maxwell_cancel_faults_va_unsupported(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
const uvm_fault_buffer_entry_t *fault_entry,
|
|
|
|
uvm_fault_cancel_va_mode_t cancel_va_mode);
|
|
|
|
|
|
|
|
void uvm_hal_volta_replay_faults(uvm_push_t *push, uvm_fault_replay_type_t type);
|
|
|
|
void uvm_hal_volta_cancel_faults_va(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
const uvm_fault_buffer_entry_t *fault_entry,
|
|
|
|
uvm_fault_cancel_va_mode_t cancel_va_mode);
|
|
|
|
|
|
|
|
void uvm_hal_hopper_cancel_faults_va(uvm_push_t *push,
|
|
|
|
uvm_gpu_phys_address_t pdb,
|
|
|
|
const uvm_fault_buffer_entry_t *fault_entry,
|
|
|
|
uvm_fault_cancel_va_mode_t cancel_va_mode);
|
|
|
|
|
|
|
|
typedef void (*uvm_hal_host_clear_faulted_channel_method_t)(uvm_push_t *push,
|
|
|
|
uvm_user_channel_t *user_channel,
|
|
|
|
const uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
|
|
|
|
void uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported(uvm_push_t *push,
|
|
|
|
uvm_user_channel_t *user_channel,
|
|
|
|
const uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
void uvm_hal_volta_host_clear_faulted_channel_method(uvm_push_t *push,
|
|
|
|
uvm_user_channel_t *user_channel,
|
|
|
|
const uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
void uvm_hal_turing_host_clear_faulted_channel_method(uvm_push_t *push,
|
|
|
|
uvm_user_channel_t *user_channel,
|
|
|
|
const uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
typedef void (*uvm_hal_host_clear_faulted_channel_register_t)(uvm_user_channel_t *user_channel,
|
|
|
|
const uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
void uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported(uvm_user_channel_t *user_channel,
|
|
|
|
const uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user_channel,
|
|
|
|
const uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
|
|
|
|
typedef void (*uvm_hal_host_clear_faulted_channel_sw_method_t)(uvm_push_t *push,
|
|
|
|
uvm_user_channel_t *user_channel,
|
|
|
|
const uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
void uvm_hal_maxwell_host_clear_faulted_channel_sw_method_unsupported(uvm_push_t *push,
|
|
|
|
uvm_user_channel_t *user_channel,
|
|
|
|
const uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
void uvm_hal_ampere_host_clear_faulted_channel_sw_method(uvm_push_t *push,
|
|
|
|
uvm_user_channel_t *user_channel,
|
|
|
|
const uvm_fault_buffer_entry_t *buffer_entry);
|
|
|
|
|
|
|
|
void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry);
|
|
|
|
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry);
|
|
|
|
|
|
|
|
// Access counters
|
|
|
|
typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
|
|
|
|
|
|
|
// Parse the entry on the given buffer index. This also clears the valid bit of
|
|
|
|
// the entry in the buffer.
|
|
|
|
typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_parent_gpu_t *parent_gpu,
|
|
|
|
NvU32 index,
|
|
|
|
uvm_access_counter_buffer_entry_t *buffer_entry);
|
|
|
|
typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push);
|
|
|
|
typedef void (*uvm_hal_access_counter_clear_type_t)(uvm_push_t *push, uvm_access_counter_type_t type);
|
|
|
|
typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push,
|
|
|
|
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
|
|
|
|
|
|
|
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
|
|
|
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
|
|
|
|
NvU32 index,
|
|
|
|
uvm_access_counter_buffer_entry_t *buffer_entry);
|
|
|
|
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push);
|
|
|
|
void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type);
|
|
|
|
void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
|
|
|
|
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
|
|
|
|
|
|
|
void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
|
|
|
void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
|
|
|
|
NvU32 index,
|
|
|
|
uvm_access_counter_buffer_entry_t *buffer_entry);
|
|
|
|
bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
|
|
|
NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
|
|
|
|
void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push);
|
|
|
|
void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type);
|
|
|
|
void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
|
|
|
|
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
|
|
|
|
|
|
|
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
|
|
|
|
|
|
|
struct uvm_host_hal_struct
|
|
|
|
{
|
|
|
|
uvm_hal_init_t init;
|
|
|
|
uvm_hal_host_method_validate method_validate;
|
|
|
|
uvm_hal_host_sw_method_validate sw_method_validate;
|
|
|
|
uvm_hal_wait_for_idle_t wait_for_idle;
|
|
|
|
uvm_hal_membar_sys_t membar_sys;
|
|
|
|
uvm_hal_membar_gpu_t membar_gpu;
|
|
|
|
uvm_hal_noop_t noop;
|
|
|
|
uvm_hal_interrupt_t interrupt;
|
|
|
|
uvm_hal_semaphore_release_t semaphore_release;
|
|
|
|
uvm_hal_semaphore_acquire_t semaphore_acquire;
|
|
|
|
uvm_hal_semaphore_timestamp_t semaphore_timestamp;
|
|
|
|
uvm_hal_host_set_gpfifo_entry_t set_gpfifo_entry;
|
|
|
|
uvm_hal_host_set_gpfifo_noop_t set_gpfifo_noop;
|
|
|
|
uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t set_gpfifo_pushbuffer_segment_base;
|
|
|
|
uvm_hal_host_write_gpu_put_t write_gpu_put;
|
|
|
|
uvm_hal_host_tlb_invalidate_all_t tlb_invalidate_all;
|
|
|
|
uvm_hal_host_tlb_invalidate_va_t tlb_invalidate_va;
|
|
|
|
uvm_hal_host_tlb_invalidate_test_t tlb_invalidate_test;
|
|
|
|
uvm_hal_fault_buffer_replay_t replay_faults;
|
|
|
|
uvm_hal_fault_cancel_global_t cancel_faults_global;
|
|
|
|
uvm_hal_fault_cancel_targeted_t cancel_faults_targeted;
|
|
|
|
uvm_hal_fault_cancel_va_t cancel_faults_va;
|
|
|
|
uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_sw_method;
|
|
|
|
uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_method;
|
|
|
|
uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register;
|
|
|
|
uvm_hal_access_counter_clear_all_t access_counter_clear_all;
|
|
|
|
uvm_hal_access_counter_clear_type_t access_counter_clear_type;
|
|
|
|
uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted;
|
|
|
|
uvm_hal_get_time_t get_time;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct uvm_ce_hal_struct
|
|
|
|
{
|
|
|
|
uvm_hal_init_t init;
|
|
|
|
uvm_hal_ce_method_validate method_validate;
|
|
|
|
uvm_hal_semaphore_release_t semaphore_release;
|
|
|
|
uvm_hal_semaphore_timestamp_t semaphore_timestamp;
|
|
|
|
uvm_hal_ce_offset_out_t offset_out;
|
|
|
|
uvm_hal_ce_offset_in_out_t offset_in_out;
|
|
|
|
uvm_hal_ce_phys_mode_t phys_mode;
|
|
|
|
uvm_hal_ce_plc_mode_t plc_mode;
|
|
|
|
uvm_hal_ce_memcopy_validate memcopy_validate;
|
|
|
|
uvm_hal_ce_memcopy_patch_src memcopy_patch_src;
|
|
|
|
uvm_hal_memcopy_t memcopy;
|
|
|
|
uvm_hal_memcopy_v_to_v_t memcopy_v_to_v;
|
|
|
|
uvm_hal_ce_memset_validate memset_validate;
|
|
|
|
uvm_hal_memset_1_t memset_1;
|
|
|
|
uvm_hal_memset_4_t memset_4;
|
|
|
|
uvm_hal_memset_8_t memset_8;
|
|
|
|
uvm_hal_memset_v_4_t memset_v_4;
|
|
|
|
uvm_hal_semaphore_reduction_inc_t semaphore_reduction_inc;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct uvm_arch_hal_struct
|
|
|
|
{
|
|
|
|
uvm_hal_arch_init_properties_t init_properties;
|
|
|
|
uvm_hal_lookup_mode_hal_t mmu_mode_hal;
|
|
|
|
uvm_hal_mmu_enable_prefetch_faults_t enable_prefetch_faults;
|
|
|
|
uvm_hal_mmu_disable_prefetch_faults_t disable_prefetch_faults;
|
|
|
|
uvm_hal_mmu_engine_id_to_type_t mmu_engine_id_to_type;
|
|
|
|
uvm_hal_mmu_client_id_to_utlb_id_t mmu_client_id_to_utlb_id;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct uvm_fault_buffer_hal_struct
|
|
|
|
{
|
|
|
|
uvm_hal_enable_replayable_faults_t enable_replayable_faults;
|
|
|
|
uvm_hal_disable_replayable_faults_t disable_replayable_faults;
|
|
|
|
uvm_hal_clear_replayable_faults_t clear_replayable_faults;
|
|
|
|
uvm_hal_fault_buffer_read_put_t read_put;
|
|
|
|
uvm_hal_fault_buffer_read_get_t read_get;
|
|
|
|
uvm_hal_fault_buffer_write_get_t write_get;
|
|
|
|
uvm_hal_fault_buffer_get_ve_id_t get_ve_id;
|
|
|
|
uvm_hal_fault_buffer_parse_entry_t parse_entry;
|
|
|
|
uvm_hal_fault_buffer_entry_is_valid_t entry_is_valid;
|
|
|
|
uvm_hal_fault_buffer_entry_clear_valid_t entry_clear_valid;
|
|
|
|
uvm_hal_fault_buffer_entry_size_t entry_size;
|
|
|
|
uvm_hal_fault_buffer_parse_non_replayable_entry_t parse_non_replayable_entry;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct uvm_access_counter_buffer_hal_struct
|
|
|
|
{
|
|
|
|
uvm_hal_enable_access_counter_notifications_t enable_access_counter_notifications;
|
|
|
|
uvm_hal_disable_access_counter_notifications_t disable_access_counter_notifications;
|
|
|
|
uvm_hal_clear_access_counter_notifications_t clear_access_counter_notifications;
|
|
|
|
uvm_hal_access_counter_buffer_parse_entry_t parse_entry;
|
|
|
|
uvm_hal_access_counter_buffer_entry_is_valid_t entry_is_valid;
|
|
|
|
uvm_hal_access_counter_buffer_entry_clear_valid_t entry_clear_valid;
|
|
|
|
uvm_hal_access_counter_buffer_entry_size_t entry_size;
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
// id is either a hardware class or GPU architecture
|
|
|
|
NvU32 id;
|
|
|
|
NvU32 parent_id;
|
|
|
|
union
|
|
|
|
{
|
|
|
|
// host_ops: id is a hardware class
|
|
|
|
uvm_host_hal_t host_ops;
|
|
|
|
|
|
|
|
// ce_ops: id is a hardware class
|
|
|
|
uvm_ce_hal_t ce_ops;
|
|
|
|
|
|
|
|
// arch_ops: id is an architecture
|
|
|
|
uvm_arch_hal_t arch_ops;
|
|
|
|
|
|
|
|
// fault_buffer_ops: id is an architecture
|
|
|
|
uvm_fault_buffer_hal_t fault_buffer_ops;
|
|
|
|
|
|
|
|
// access_counter_buffer_ops: id is an architecture
|
|
|
|
uvm_access_counter_buffer_hal_t access_counter_buffer_ops;
|
|
|
|
|
|
|
|
} u;
|
|
|
|
} uvm_hal_class_ops_t;
|
|
|
|
|
|
|
|
NV_STATUS uvm_hal_init_table(void);
|
|
|
|
NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu);
|
|
|
|
|
|
|
|
// Helper to push a SYS or GPU membar based on the membar type
|
|
|
|
//
|
|
|
|
// Notably this doesn't just get the GPU from the push object to support the
|
|
|
|
// test mode of the page tree code that doesn't do real pushes.
|
|
|
|
static void uvm_hal_membar(uvm_gpu_t *gpu, uvm_push_t *push, uvm_membar_t membar)
|
|
|
|
{
|
|
|
|
switch (membar) {
|
|
|
|
case UVM_MEMBAR_SYS:
|
|
|
|
gpu->parent->host_hal->membar_sys(push);
|
|
|
|
break;
|
|
|
|
case UVM_MEMBAR_GPU:
|
|
|
|
gpu->parent->host_hal->membar_gpu(push);
|
|
|
|
break;
|
|
|
|
case UVM_MEMBAR_NONE:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void uvm_hal_wfi_membar(uvm_push_t *push, uvm_membar_t membar)
|
|
|
|
{
|
|
|
|
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
|
|
|
gpu->parent->host_hal->wait_for_idle(push);
|
|
|
|
uvm_hal_membar(gpu, push, membar);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Internal helper used by the TLB invalidate hal functions. This issues the
|
|
|
|
// appropriate Host membar(s) after a TLB invalidate.
|
|
|
|
void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar);
|
|
|
|
|
|
|
|
#endif // __UVM_HAL_H__
|