Browse Source

545.29.03

pull/567/head 545.29.03
Maneet Singh 1 year ago
parent
commit
f364378a65
Failed to extract signature
  1. 6
      CHANGELOG.md
  2. 10
      README.md
  3. 2
      kernel-open/Kbuild
  4. 20
      kernel-open/nvidia-drm/nvidia-drm-drv.c
  5. 5
      kernel-open/nvidia-drm/nvidia-drm-ioctl.h
  6. 2
      kernel-open/nvidia/nv-msi.c
  7. 5
      kernel-open/nvidia/nv-p2p.c
  8. 6
      src/common/displayport/src/dp_deviceimpl.cpp
  9. 20
      src/common/inc/nvBldVer.h
  10. 2
      src/common/inc/nvUnixVersion.h
  11. 55
      src/common/sdk/nvidia/inc/ctrl/ctrl30f1.h
  12. 32
      src/nvidia-modeset/src/nvkms-evo.c
  13. 4
      src/nvidia/generated/g_nv_name_released.h
  14. 27
      src/nvidia/inc/kernel/gpu/mem_mgr/phys_mem_allocator/addrtree.h
  15. 10
      src/nvidia/inc/kernel/gpu/mem_mgr/phys_mem_allocator/phys_mem_allocator.h
  16. 51
      src/nvidia/inc/kernel/gpu/mem_mgr/phys_mem_allocator/regmap.h
  17. 2
      src/nvidia/src/kernel/gpu/bus/arch/maxwell/kern_bus_gm107.c
  18. 4
      src/nvidia/src/kernel/gpu/fifo/kernel_channel.c
  19. 12
      src/nvidia/src/kernel/gpu/gr/kernel_graphics.c
  20. 4
      src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c
  21. 37
      src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/addrtree.c
  22. 2
      src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/numa.c
  23. 29
      src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/phys_mem_allocator.c
  24. 1134
      src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/regmap.c
  25. 2
      src/nvidia/src/kernel/rmapi/event.c
  26. 6
      src/nvidia/src/libraries/mmu/mmu_walk.c
  27. 8
      utils.mk
  28. 2
      version.mk

6
CHANGELOG.md

@ -2,6 +2,8 @@
## Release 545 Entries
### [545.29.03] 2023-10-31
### [545.23.06] 2023-10-17
#### Fixed
@ -62,6 +64,10 @@
## Release 525 Entries
#### Fixed
- Fix nvidia_p2p_get_pages(): Fix double-free in register-callback error path, [#557](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/557) by @BrendanCunningham
### [525.116.04] 2023-05-09
### [525.116.03] 2023-04-25

10
README.md

@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 545.23.06.
version 545.29.03.
## How to Build
@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
545.23.06 driver release. This can be achieved by installing
545.29.03 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@ -188,7 +188,7 @@ encountered specific to them.
For details on feature support and limitations, see the NVIDIA GPU driver
end user README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/545.23.06/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/545.29.03/README/kernel_open.html
In the below table, if three IDs are listed, the first is the PCI Device
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -658,6 +658,7 @@ Subsystem Device ID.
| NVIDIA A100-SXM4-80GB | 20B2 10DE 147F |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1622 |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1623 |
| NVIDIA PG509-210 | 20B2 10DE 1625 |
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A7 |
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A8 |
| NVIDIA A100 80GB PCIe | 20B5 10DE 1533 |
@ -665,6 +666,7 @@ Subsystem Device ID.
| NVIDIA PG506-232 | 20B6 10DE 1492 |
| NVIDIA A30 | 20B7 10DE 1532 |
| NVIDIA A30 | 20B7 10DE 1804 |
| NVIDIA A30 | 20B7 10DE 1852 |
| NVIDIA A800-SXM4-40GB | 20BD 10DE 17F4 |
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
@ -748,6 +750,8 @@ Subsystem Device ID.
| NVIDIA H100 PCIe | 2331 10DE 1626 |
| NVIDIA H100 | 2339 10DE 17FC |
| NVIDIA H800 NVL | 233A 10DE 183A |
| GH200 120GB | 2342 10DE 16EB |
| GH200 480GB | 2342 10DE 1809 |
| NVIDIA GeForce RTX 3060 Ti | 2414 |
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
| NVIDIA RTX A5500 Laptop GPU | 2438 |

2
kernel-open/Kbuild

@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"545.23.06\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"545.29.03\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)

20
kernel-open/nvidia-drm/nvidia-drm-drv.c

@ -729,6 +729,7 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
params->gpu_id = nv_dev->gpu_info.gpu_id;
params->primary_index = dev->primary->index;
params->supports_alloc = false;
params->generic_page_kind = 0;
params->page_kind_generation = 0;
params->sector_layout = 0;
@ -736,15 +737,20 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
params->supports_semsurf = false;
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
params->generic_page_kind = nv_dev->genericPageKind;
params->page_kind_generation = nv_dev->pageKindGeneration;
params->sector_layout = nv_dev->sectorLayout;
/* Semaphore surfaces are only supported if the modeset = 1 parameter is set */
if ((nv_dev->pDevice) != NULL && (nv_dev->semsurf_stride != 0)) {
params->supports_semsurf = true;
/* Memory allocation and semaphore surfaces are only supported
* if the modeset = 1 parameter is set */
if (nv_dev->pDevice != NULL) {
params->supports_alloc = true;
params->generic_page_kind = nv_dev->genericPageKind;
params->page_kind_generation = nv_dev->pageKindGeneration;
params->sector_layout = nv_dev->sectorLayout;
if (nv_dev->semsurf_stride != 0) {
params->supports_semsurf = true;
#if defined(NV_SYNC_FILE_GET_FENCE_PRESENT)
params->supports_sync_fd = true;
params->supports_sync_fd = true;
#endif /* defined(NV_SYNC_FILE_GET_FENCE_PRESENT) */
}
}
#endif /* defined(NV_DRM_ATOMIC_MODESET_AVAILABLE) */

5
kernel-open/nvidia-drm/nvidia-drm-ioctl.h

@ -178,7 +178,10 @@ struct drm_nvidia_get_dev_info_params {
uint32_t gpu_id; /* OUT */
uint32_t primary_index; /* OUT; the "card%d" value */
/* See DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D definitions of these */
uint32_t supports_alloc; /* OUT */
/* The generic_page_kind, page_kind_generation, and sector_layout
* fields are only valid if supports_alloc is true.
* See DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D definitions of these. */
uint32_t generic_page_kind; /* OUT */
uint32_t page_kind_generation; /* OUT */
uint32_t sector_layout; /* OUT */

2
kernel-open/nvidia/nv-msi.c

@ -156,7 +156,7 @@ NvS32 NV_API_CALL nv_request_msix_irq(nv_linux_state_t *nvl)
{
for( j = 0; j < i; j++)
{
free_irq(nvl->msix_entries[i].vector, (void *)nvl);
free_irq(nvl->msix_entries[j].vector, (void *)nvl);
}
break;
}

5
kernel-open/nvidia/nv-p2p.c

@ -506,8 +506,13 @@ static int nv_p2p_get_pages(
(*page_table)->page_size = page_size_index;
os_free_mem(physical_addresses);
physical_addresses = NULL;
os_free_mem(wreqmb_h);
wreqmb_h = NULL;
os_free_mem(rreqmb_h);
rreqmb_h = NULL;
if (free_callback != NULL)
{

6
src/common/displayport/src/dp_deviceimpl.cpp

@ -2068,11 +2068,13 @@ void DeviceImpl::setDscDecompressionDevice(bool bDscCapBasedOnParent)
}
}
}
else if (this->parent && this->parent->isDSCDecompressionSupported())
else if (this->parent && this->parent->isDSCDecompressionSupported() &&
!(this->isLogical()))
{
//
// This condition takes care of sink devices not capable of DSC
// but parent is capable of DSC decompression.
// but parent is capable of DSC decompression. We need to skip this
// if sink is at logical port.
//
this->bDSCPossible = true;
this->devDoingDscDecompression = this->parent;

20
src/common/inc/nvBldVer.h

@ -36,25 +36,25 @@
// and then checked back in. You cannot make changes to these sections without
// corresponding changes to the buildmeister script
#ifndef NV_BUILD_BRANCH
#define NV_BUILD_BRANCH r545_74
#define NV_BUILD_BRANCH r545_96
#endif
#ifndef NV_PUBLIC_BRANCH
#define NV_PUBLIC_BRANCH r545_74
#define NV_PUBLIC_BRANCH r545_96
#endif
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r545/r545_74-96"
#define NV_BUILD_CHANGELIST_NUM (33409679)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r545/r545_96-121"
#define NV_BUILD_CHANGELIST_NUM (33470109)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "rel/gpu_drv/r545/r545_74-96"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33409679)
#define NV_BUILD_NAME "rel/gpu_drv/r545/r545_96-121"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33470109)
#else /* Windows builds */
#define NV_BUILD_BRANCH_VERSION "r545_74-8"
#define NV_BUILD_CHANGELIST_NUM (33409679)
#define NV_BUILD_BRANCH_VERSION "r545_96-3"
#define NV_BUILD_CHANGELIST_NUM (33467912)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "545.87"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33409679)
#define NV_BUILD_NAME "546.04"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33467912)
#define NV_BUILD_BRANCH_BASE_VERSION R545
#endif
// End buildmeister python edited section

2
src/common/inc/nvUnixVersion.h

@ -4,7 +4,7 @@
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
#define NV_VERSION_STRING "545.23.06"
#define NV_VERSION_STRING "545.29.03"
#else

55
src/common/sdk/nvidia/inc/ctrl/ctrl30f1.h

@ -887,10 +887,6 @@ typedef struct NV30F1_CTRL_GSYNC_GET_CONTROL_SWAP_LOCK_WINDOW_PARAMS {
* This parameter is set by the client to indicate the
* gpuId of the GPU to which the display to be optimized
* is attached.
* display
* This parameter is not used by RM currently.
* Clients can ignore this parameter. Note that this
* parameter will be removed in future.
* output
* This parameter is set by the client to indicate the
* output resource type of the display to be optimized.
@ -1033,6 +1029,12 @@ typedef struct NV30F1_CTRL_GSYNC_GET_CONTROL_SWAP_LOCK_WINDOW_PARAMS {
* optimal pixel clock to use with the adjusted mode,
* in units of Hz.
*
*
* bOptimized[out]
* This is set to NV_TRUE if the timings were successfully optimized, and
* NV_FALSE otherwise.
*
*
* Progressive Raster Structure
*
* hSyncEnd hTotal
@ -1145,28 +1147,29 @@ typedef struct NV30F1_CTRL_GSYNC_GET_CONTROL_SWAP_LOCK_WINDOW_PARAMS {
#define NV30F1_CTRL_GSYNC_GET_OPTIMIZED_TIMING_PARAMS_MESSAGE_ID (0x60U)
typedef struct NV30F1_CTRL_GSYNC_GET_OPTIMIZED_TIMING_PARAMS {
NvU32 gpuId;
NvU32 display;
NvU32 output;
NvU32 protocol;
NvU32 structure;
NvU32 adjust;
NvU32 hDeltaStep;
NvU32 hDeltaMax;
NvU32 vDeltaStep;
NvU32 vDeltaMax;
NvU32 hSyncEnd;
NvU32 hBlankEnd;
NvU32 hBlankStart;
NvU32 hTotal;
NvU32 vSyncEnd;
NvU32 vBlankEnd;
NvU32 vBlankStart;
NvU32 vInterlacedBlankEnd;
NvU32 vInterlacedBlankStart;
NvU32 vTotal;
NvU32 refreshX10K;
NvU32 pixelClockHz;
NvU32 gpuId;
NvU32 output;
NvU32 protocol;
NvU32 structure;
NvU32 adjust;
NvU32 hDeltaStep;
NvU32 hDeltaMax;
NvU32 vDeltaStep;
NvU32 vDeltaMax;
NvU32 hSyncEnd;
NvU32 hBlankEnd;
NvU32 hBlankStart;
NvU32 hTotal;
NvU32 vSyncEnd;
NvU32 vBlankEnd;
NvU32 vBlankStart;
NvU32 vInterlacedBlankEnd;
NvU32 vInterlacedBlankStart;
NvU32 vTotal;
NvU32 refreshX10K;
NvU32 pixelClockHz;
NvBool bOptimized;
} NV30F1_CTRL_GSYNC_GET_OPTIMIZED_TIMING_PARAMS;
/* output values */

32
src/nvidia-modeset/src/nvkms-evo.c

@ -786,6 +786,9 @@ static void TweakTimingsForGsync(const NVDpyEvoRec *pDpyEvo,
return;
}
nvEvoLogInfoString(pInfoString,
"Adjusting Mode Timings for Quadro Sync Compatibility");
ret = nvRmApiControl(nvEvoGlobal.clientHandle,
pDispEvo->pFrameLockEvo->device,
NV30F1_CTRL_CMD_GSYNC_GET_OPTIMIZED_TIMING,
@ -796,12 +799,13 @@ static void TweakTimingsForGsync(const NVDpyEvoRec *pDpyEvo,
nvAssert(!"Failed to convert to Quadro Sync safe timing");
/* do not apply the timings returned by RM if the call failed */
return;
} else if (!gsyncOptTimingParams.bOptimized) {
nvEvoLogInfoString(pInfoString, " Timings Unchanged.");
return;
}
nvConstructNvModeTimingsFromHwModeTimings(pTimings, &modeTimings);
nvEvoLogInfoString(pInfoString,
"Adjusting Mode Timings for Quadro Sync Compatibility");
nvEvoLogInfoString(pInfoString, " Old Timings:");
nvEvoLogModeValidationModeTimings(pInfoString, &modeTimings);
@ -5923,13 +5927,6 @@ NvBool nvConstructHwModeTimingsImpCheckEvo(
NVKMS_MODE_VALIDATION_REQUIRE_BOOT_CLOCKS);
NvU32 ret;
/* bypass this checking if the user disabled IMP */
if ((pParams->overrides &
NVKMS_MODE_VALIDATION_NO_EXTENDED_GPU_CAPABILITIES_CHECK) != 0) {
return TRUE;
}
activeRmId = nvRmAllocDisplayId(pConnectorEvo->pDispEvo,
nvAddDpyIdToEmptyDpyIdList(pConnectorEvo->displayId));
if (activeRmId == 0x0) {
@ -5954,11 +5951,18 @@ NvBool nvConstructHwModeTimingsImpCheckEvo(
timingsParams[head].pUsage = &timings[head].viewPort.guaranteedUsage;
}
ret = nvValidateImpOneDispDowngrade(pConnectorEvo->pDispEvo, timingsParams,
requireBootClocks,
NV_EVO_REALLOCATE_BANDWIDTH_MODE_NONE,
/* downgradePossibleHeadsBitMask */
(NVBIT(NVKMS_MAX_HEADS_PER_DISP) - 1UL));
/* bypass this checking if the user disabled IMP */
if ((pParams->overrides &
NVKMS_MODE_VALIDATION_NO_EXTENDED_GPU_CAPABILITIES_CHECK) != 0) {
ret = TRUE;
} else {
ret = nvValidateImpOneDispDowngrade(pConnectorEvo->pDispEvo, timingsParams,
requireBootClocks,
NV_EVO_REALLOCATE_BANDWIDTH_MODE_NONE,
/* downgradePossibleHeadsBitMask */
(NVBIT(NVKMS_MAX_HEADS_PER_DISP) - 1UL));
}
if (ret) {
*pNumHeads = numHeads;
} else {

4
src/nvidia/generated/g_nv_name_released.h

@ -808,6 +808,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x20B2, 0x147f, 0x10de, "NVIDIA A100-SXM4-80GB" },
{ 0x20B2, 0x1622, 0x10de, "NVIDIA A100-SXM4-80GB" },
{ 0x20B2, 0x1623, 0x10de, "NVIDIA A100-SXM4-80GB" },
{ 0x20B2, 0x1625, 0x10de, "NVIDIA PG509-210" },
{ 0x20B3, 0x14a7, 0x10de, "NVIDIA A100-SXM-64GB" },
{ 0x20B3, 0x14a8, 0x10de, "NVIDIA A100-SXM-64GB" },
{ 0x20B5, 0x1533, 0x10de, "NVIDIA A100 80GB PCIe" },
@ -815,6 +816,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x20B6, 0x1492, 0x10de, "NVIDIA PG506-232" },
{ 0x20B7, 0x1532, 0x10de, "NVIDIA A30" },
{ 0x20B7, 0x1804, 0x10de, "NVIDIA A30" },
{ 0x20B7, 0x1852, 0x10de, "NVIDIA A30" },
{ 0x20BD, 0x17f4, 0x10de, "NVIDIA A800-SXM4-40GB" },
{ 0x20F1, 0x145f, 0x10de, "NVIDIA A100-PCIE-40GB" },
{ 0x20F3, 0x179b, 0x10de, "NVIDIA A800-SXM4-80GB" },
@ -899,6 +901,8 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x2331, 0x1626, 0x10de, "NVIDIA H100 PCIe" },
{ 0x2339, 0x17fc, 0x10de, "NVIDIA H100" },
{ 0x233A, 0x183a, 0x10de, "NVIDIA H800 NVL" },
{ 0x2342, 0x16eb, 0x10de, "GH200 120GB" },
{ 0x2342, 0x1809, 0x10de, "GH200 480GB" },
{ 0x2414, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Ti" },
{ 0x2420, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080 Ti Laptop GPU" },
{ 0x2438, 0x0000, 0x0000, "NVIDIA RTX A5500 Laptop GPU" },

27
src/nvidia/inc/kernel/gpu/mem_mgr/phys_mem_allocator/addrtree.h

@ -151,11 +151,28 @@ void pmaAddrtreePrintTree(void *pMap, const char* str);
*
* @return void
*/
void pmaAddrtreeChangeState(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState);
void pmaAddrtreeChangeStateAttrib(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState, NvBool writeAttrib);
void pmaAddrtreeChangeStateAttribEx(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState,PMA_PAGESTATUS newStateMask);
void pmaAddrtreeChangePageStateAttrib(void * pMap, NvU64 startFrame, NvU64 pageSize,
PMA_PAGESTATUS newState, NvBool writeAttrib);
void pmaAddrtreeChangeStateAttribEx(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
void pmaAddrtreeChangePageStateAttribEx(void * pMap, NvU64 startFrame, NvU64 pageSize,
PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
/*!
* @brief Changes the state & attrib bits specified by mask
*
* Changes the state of the bits given the physical frame number
* and the number of frames to change
*
* @param[in] pMap The addrtree to change
* @param[in] frameNum The frame number to change
* @param[in] numFrames The number of frames to change
* @param[in] newState The new state to change to
* @param[in] newStateMask Specific bits to write
*
* @return void
*/
void pmaAddrtreeChangeBlockStateAttrib(void *pMap, NvU64 frameNum,
NvU64 numFrames,
PMA_PAGESTATUS newState,
PMA_PAGESTATUS newStateMask);
/*!
* @brief Read the page state & attrib bits

10
src/nvidia/inc/kernel/gpu/mem_mgr/phys_mem_allocator/phys_mem_allocator.h

@ -178,10 +178,9 @@ typedef NV_STATUS (*pmaEvictRangeCb_t)(void *ctxPtr, NvU64 physBegin, NvU64 phys
*/
typedef void *(*pmaMapInit_t)(NvU64 numFrames, NvU64 addrBase, PMA_STATS *pPmaStats, NvBool bProtected);
typedef void (*pmaMapDestroy_t)(void *pMap);
typedef void (*pmaMapChangeState_t)(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState);
typedef void (*pmaMapChangeStateAttrib_t)(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState, NvBool writeAttrib);
typedef void (*pmaMapChangeStateAttribEx_t)(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
typedef void (*pmaMapChangePageStateAttrib_t)(void *pMap, NvU64 startFrame, NvU64 pageSize, PMA_PAGESTATUS newState, NvBool writeAttrib);
typedef void (*pmaMapChangePageStateAttribEx_t)(void *pMap, NvU64 startFrame, NvU64 pageSize, PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
typedef void (*pmaMapChangeBlockStateAttrib_t)(void *pMap, NvU64 frameNum, NvU64 numFrames, PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
typedef PMA_PAGESTATUS (*pmaMapRead_t)(void *pMap, NvU64 frameNum, NvBool readAttrib);
typedef NV_STATUS (*pmaMapScanContiguous_t)(void *pMap, NvU64 addrBase, NvU64 rangeStart, NvU64 rangeEnd,
NvU64 numPages, NvU64 *freelist, NvU64 pageSize, NvU64 alignment,
@ -201,10 +200,9 @@ struct _PMA_MAP_INFO
NvU32 mode;
pmaMapInit_t pmaMapInit;
pmaMapDestroy_t pmaMapDestroy;
pmaMapChangeState_t pmaMapChangeState;
pmaMapChangeStateAttrib_t pmaMapChangeStateAttrib;
pmaMapChangeStateAttribEx_t pmaMapChangeStateAttribEx;
pmaMapChangePageStateAttrib_t pmaMapChangePageStateAttrib;
pmaMapChangePageStateAttribEx_t pmaMapChangePageStateAttribEx;
pmaMapChangeBlockStateAttrib_t pmaMapChangeBlockStateAttrib;
pmaMapRead_t pmaMapRead;
pmaMapScanContiguous_t pmaMapScanContiguous;
pmaMapScanDiscontiguous_t pmaMapScanDiscontiguous;

51
src/nvidia/inc/kernel/gpu/mem_mgr/phys_mem_allocator/regmap.h

@ -89,34 +89,6 @@ void pmaRegmapDestroy(void *pMap);
NvU64 pmaRegmapGetEvictingFrames(void *pMap);
void pmaRegmapSetEvictingFrames(void *pMap, NvU64 frameEvictionsInProcess);
/*!
* @brief Changes the recorded state bits
*
* Changes the state of the bits given the physical frame number
*
* @param[in] pMap The regmap to change
* @param[in] frameNum The frame number to change
* @param[in] newState The new state to change to
*
* @return void
*/
void pmaRegmapChangeState(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState);
/*!
* @brief Changes the recorded state & attrib bits
*
* Changes the state of the bits given the physical frame number
*
* @param[in] pMap The regmap to change
* @param[in] frameNum The frame number to change
* @param[in] newState The new state to change to
* @param[in] writeAttrib Write attribute bits as well
*
* @return void
*/
void pmaRegmapChangeStateAttrib(void *pMap, NvU64 frameNum,
PMA_PAGESTATUS newState, NvBool writeAttrib);
/*!
* @brief Changes the recorded state & attrib bits for an entire page
*
@ -131,8 +103,8 @@ void pmaRegmapChangeStateAttrib(void *pMap, NvU64 frameNum,
*
* @return void
*/
void pmaRegmapChangePageStateAttrib(void * pMap, NvU64 frameNumStart, NvU64 pageSize,
PMA_PAGESTATUS newState, NvBool writeAttrib);
void pmaRegmapChangePageStateAttribEx(void * pMap, NvU64 frameNumStart, NvU64 pageSize,
PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
/*!
* @brief Changes the state & attrib bits specified by mask
@ -150,6 +122,25 @@ void pmaRegmapChangeStateAttribEx(void *pMap, NvU64 frameNum,
PMA_PAGESTATUS newState,
PMA_PAGESTATUS newStateMask);
/*!
* @brief Changes the state & attrib bits specified by mask
*
* Changes the state of the bits given the physical frame number
* and the number of frames to change
*
* @param[in] pMap The regmap to change
* @param[in] frameNum The frame number to change
* @param[in] numFrames The number of frames to change
* @param[in] newState The new state to change to
* @param[in] newStateMask Specific bits to write
*
* @return void
*/
void pmaRegmapChangeBlockStateAttrib(void *pMap, NvU64 frameNum,
NvU64 numFrames,
PMA_PAGESTATUS newState,
PMA_PAGESTATUS newStateMask);
/*!
* @brief Read the page state & attrib bits
*

2
src/nvidia/src/kernel/gpu/bus/arch/maxwell/kern_bus_gm107.c

@ -2404,6 +2404,8 @@ kbusUpdateRmAperture_GM107
{
pFmt = pKernelBus->bar2[gfid].pFmt;
NV_CHECK_OR_RETURN(LEVEL_ERROR, pFmt != NULL, NV_ERR_INVALID_ARGUMENT);
// MMU_MAP_CTX
mapTarget.pLevelFmt = mmuFmtFindLevelWithPageShift(pFmt->pRoot,
BIT_IDX_64(pageSize));

4
src/nvidia/src/kernel/gpu/fifo/kernel_channel.c

@ -2776,6 +2776,10 @@ kchannelCtrlCmdGetClassEngineid_IMPL
return NV_ERR_OBJECT_NOT_FOUND;
}
NV_CHECK_OR_RETURN(LEVEL_ERROR,
pParams->hObject != RES_GET_CLIENT_HANDLE(pKernelChannel),
NV_ERR_INVALID_ARGUMENT);
if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
(IS_VIRTUAL_WITH_SRIOV(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
{

12
src/nvidia/src/kernel/gpu/gr/kernel_graphics.c

@ -1738,8 +1738,9 @@ kgraphicsCreateGoldenImageChannel_IMPL
NvU32 classNum;
MIG_INSTANCE_REF ref;
NvU32 objectType;
NvU32 primarySliSubDeviceInstance;
// XXX This should be removed when braodcast SLI support is deprecated
// XXX This should be removed when broadcast SLI support is deprecated
if (!gpumgrIsParentGPU(pGpu))
{
return NV_OK;
@ -1750,6 +1751,8 @@ kgraphicsCreateGoldenImageChannel_IMPL
// FIXME these allocations corrupt BC state
NV_ASSERT_OK_OR_RETURN(
rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu, &hClientId, &hDeviceId, &hSubdeviceId));
// rmapiutilAllocClientAndDeviceHandles allocates a subdevice object for this subDeviceInstance
primarySliSubDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
NV_ASSERT_OK_OR_RETURN(serverGetClientUnderLock(&g_resServ, hClientId, &pClientId));
@ -1765,6 +1768,11 @@ kgraphicsCreateGoldenImageChannel_IMPL
{
NvHandle hSecondary;
NV2080_ALLOC_PARAMETERS nv2080AllocParams;
NvU32 thisSubDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
// Skip if already allocated by rmapiutilAllocClientAndDeviceHandles()
if (thisSubDeviceInstance == primarySliSubDeviceInstance)
SLI_LOOP_CONTINUE;
// Allocate a subDevice
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
@ -1772,7 +1780,7 @@ kgraphicsCreateGoldenImageChannel_IMPL
cleanup);
portMemSet(&nv2080AllocParams, 0, sizeof(nv2080AllocParams));
nv2080AllocParams.subDeviceId = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
nv2080AllocParams.subDeviceId = thisSubDeviceInstance;
NV_CHECK_OK(status, LEVEL_SILENT,
pRmApi->AllocWithHandle(pRmApi,

4
src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c

@ -390,8 +390,8 @@ memmgrInitInternalChannels_IMPL
IS_MIG_ENABLED(pGpu) ||
gpuIsCCorApmFeatureEnabled(pGpu) ||
IsSLIEnabled(pGpu) ||
RMCFG_FEATURE_ARCH_PPC64LE ||
RMCFG_FEATURE_ARCH_AARCH64)
NVCPU_IS_PPC64LE ||
NVCPU_IS_AARCH64)
{
// BUG 4167899: Temporarily skip CeUtils creation on platforms where it fails
NV_PRINTF(LEVEL_INFO, "Skipping global CeUtils creation\n");

37
src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/addrtree.c

@ -1890,8 +1890,8 @@ __pmaAddrtreeChangePageStateAttribEx
// This function wraps the real __pmaAddrtreeChangePageStateAttribEx
// to allow addrtree to set 128KB page size
//
static void
_pmaAddrtreeChangePageStateAttribEx
void
pmaAddrtreeChangePageStateAttribEx
(
void *pMap,
NvU64 frameNumStart,
@ -1939,35 +1939,24 @@ pmaAddrtreeChangeStateAttribEx
PMA_PAGESTATUS newStateMask
)
{
_pmaAddrtreeChangePageStateAttribEx(pMap, frameNum, _PMA_64KB, newState, newStateMask);
}
// TODO: merge this on PMA level
void pmaAddrtreeChangeState(void *pTree, NvU64 frameNum, PMA_PAGESTATUS newState)
{
pmaAddrtreeChangeStateAttribEx(pTree, frameNum, newState, STATE_MASK);
}
// TODO: merge this on PMA level
void pmaAddrtreeChangeStateAttrib(void *pTree, NvU64 frameNum, PMA_PAGESTATUS newState, NvBool writeAttrib)
{
PMA_PAGESTATUS mask = writeAttrib ? MAP_MASK : STATE_MASK;
pmaAddrtreeChangeStateAttribEx(pTree, frameNum, newState, mask);
pmaAddrtreeChangePageStateAttribEx(pMap, frameNum, _PMA_64KB, newState, newStateMask);
}
// TODO: merge this on PMA level
void
pmaAddrtreeChangePageStateAttrib
pmaAddrtreeChangeBlockStateAttrib
(
void * pTree,
NvU64 frameNumStart,
NvU64 pageSize,
void *pMap,
NvU64 frame,
NvU64 len,
PMA_PAGESTATUS newState,
NvBool writeAttrib
PMA_PAGESTATUS writeMask
)
{
PMA_PAGESTATUS mask = writeAttrib ? MAP_MASK : STATE_MASK;
_pmaAddrtreeChangePageStateAttribEx(pTree, frameNumStart, pageSize, newState, mask);
while (len != 0)
{
len--;
pmaAddrtreeChangeStateAttribEx(pMap, frame + len, newState, writeMask);
}
}
PMA_PAGESTATUS pmaAddrtreeRead

2
src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/numa.c

@ -651,7 +651,7 @@ NV_STATUS pmaNumaAllocate
status = NV_ERR_NO_MEMORY;
break;
}
pPma->pMapInfo->pmaMapChangeStateAttrib(pMap, frameOffset, allocOption, NV_TRUE);
pPma->pMapInfo->pmaMapChangeStateAttribEx(pMap, frameOffset, allocOption, MAP_MASK);
}
if (status != NV_OK)
break;

29
src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/phys_mem_allocator.c

@ -97,7 +97,7 @@ _pmaRollback
for (j = 0; j < framesPerPage; j++)
{
pPma->pMapInfo->pmaMapChangeState(pPma->pRegions[regId], (frameNum + j), oldState);
pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + j), oldState, STATE_MASK);
}
}
}
@ -110,7 +110,7 @@ _pmaRollback
frameNum = PMA_ADDR2FRAME(pPages[failCount], addrBase);
for(i = 0; i < failFrame; i++)
{
pPma->pMapInfo->pmaMapChangeState(pPma->pRegions[regId], (frameNum + i), oldState);
pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + i), oldState, STATE_MASK);
}
}
}
@ -208,10 +208,9 @@ pmaInitialize(PMA *pPma, NvU32 initFlags)
//
pMapInfo->pmaMapInit = pmaRegmapInit;
pMapInfo->pmaMapDestroy = pmaRegmapDestroy;
pMapInfo->pmaMapChangeState = pmaRegmapChangeState;
pMapInfo->pmaMapChangeStateAttrib = pmaRegmapChangeStateAttrib;
pMapInfo->pmaMapChangeStateAttribEx = pmaRegmapChangeStateAttribEx;
pMapInfo->pmaMapChangePageStateAttrib = pmaRegmapChangePageStateAttrib;
pMapInfo->pmaMapChangePageStateAttribEx = pmaRegmapChangePageStateAttribEx;
pMapInfo->pmaMapChangeBlockStateAttrib = pmaRegmapChangeBlockStateAttrib;
pMapInfo->pmaMapRead = pmaRegmapRead;
pMapInfo->pmaMapScanContiguous = pmaRegmapScanContiguous;
pMapInfo->pmaMapScanDiscontiguous = pmaRegmapScanDiscontiguous;
@ -246,10 +245,9 @@ pmaInitialize(PMA *pPma, NvU32 initFlags)
{
pMapInfo->pmaMapInit = pmaAddrtreeInit;
pMapInfo->pmaMapDestroy = pmaAddrtreeDestroy;
pMapInfo->pmaMapChangeState = pmaAddrtreeChangeState;
pMapInfo->pmaMapChangeStateAttrib = pmaAddrtreeChangeStateAttrib;
pMapInfo->pmaMapChangeStateAttribEx = pmaAddrtreeChangeStateAttribEx;
pMapInfo->pmaMapChangePageStateAttrib = pmaAddrtreeChangePageStateAttrib;
pMapInfo->pmaMapChangePageStateAttribEx = pmaAddrtreeChangePageStateAttribEx;
pMapInfo->pmaMapChangeBlockStateAttrib = pmaAddrtreeChangeBlockStateAttrib;
pMapInfo->pmaMapRead = pmaAddrtreeRead;
pMapInfo->pmaMapScanContiguous = pmaAddrtreeScanContiguous;
pMapInfo->pmaMapScanDiscontiguous = pmaAddrtreeScanDiscontiguous;
@ -1084,11 +1082,8 @@ pmaAllocatePages_retry:
frameBase,
frameBase + numFramesAllocated - 1);
for (i = 0; i < numPagesAllocatedSoFar; i++)
{
pPma->pMapInfo->pmaMapChangePageStateAttrib(pMap, frameBase + (i * framesPerPage),
pageSize, pinOption, NV_TRUE);
}
pPma->pMapInfo->pmaMapChangeBlockStateAttrib(pMap, frameBase, numPagesAllocatedSoFar * framesPerPage,
pinOption, MAP_MASK);
if (blacklistOffFlag && blacklistOffPerRegion[regId])
{
@ -1134,8 +1129,8 @@ pmaAllocatePages_retry:
}
lastFrameRangeEnd = frameBase + framesPerPage - 1;
pPma->pMapInfo->pmaMapChangePageStateAttrib(pMap, PMA_ADDR2FRAME(pPages[i], addrBase),
pageSize, pinOption, NV_TRUE);
pPma->pMapInfo->pmaMapChangePageStateAttribEx(pMap, PMA_ADDR2FRAME(pPages[i], addrBase),
pageSize, pinOption, MAP_MASK);
}
NV_PRINTF(LEVEL_INFO, "0x%llx through 0x%llx \n",
@ -1267,7 +1262,7 @@ pmaPinPages
}
else
{
pPma->pMapInfo->pmaMapChangeState(pPma->pRegions[regId], (frameNum + j), STATE_PIN);
pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + j), STATE_PIN, STATE_MASK);
}
}
}
@ -1320,7 +1315,7 @@ pmaUnpinPages
}
else
{
pPma->pMapInfo->pmaMapChangeState(pPma->pRegions[regId], (frameNum + j), STATE_UNPIN);
pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + j), STATE_UNPIN, STATE_MASK);
}
}
}

1134
src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/regmap.c

File diff suppressed because it is too large

2
src/nvidia/src/kernel/rmapi/event.c

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a

6
src/nvidia/src/libraries/mmu/mmu_walk.c

@ -177,6 +177,7 @@ mmuWalkSetUserCtx
MMU_WALK_USER_CTX *pUserCtx
)
{
NV_ASSERT_OR_RETURN(NULL != pWalk, NV_ERR_INVALID_STATE);
pWalk->pUserCtx = pUserCtx;
return NV_OK;
@ -223,9 +224,12 @@ mmuWalkFindLevel
)
{
const MMU_WALK_LEVEL *pLevel = &pWalk->root;
while (pLevel->pFmt != pLevelFmt)
while (pLevel != NULL && pLevel->pFmt != pLevelFmt)
{
NvU32 subLevel;
NV_ASSERT_OR_RETURN(pLevel->pFmt != NULL, NULL);
// Single sub-level always continues.
if (1 == pLevel->pFmt->numSubLevels)
{

8
utils.mk

@ -575,8 +575,14 @@ LD_TARGET_EMULATION_FLAG_SunOS_x86_64 = elf_x86_64_sol2
LD_TARGET_EMULATION_FLAG_FreeBSD_x86 = elf_i386_fbsd
LD_TARGET_EMULATION_FLAG_FreeBSD_x86_64 = elf_x86_64_fbsd
# Different linkers (GNU ld versus ld.lld versus ld.gold) expect different
# target architecture values for '-m'. Empirically, only ld.lld appears to
# actually need it, so only add the option when linking with ld.lld. Example
# `ld.lld -v` output: "LLD 15.0.7 (compatible with GNU linkers)".
LD_IS_LLD := $(if $(filter LLD,$(shell $(LD) -v)),1)
ifdef LD_TARGET_EMULATION_FLAG_$(TARGET_OS)_$(TARGET_ARCH)
LD_TARGET_EMULATION_FLAG = -m $(LD_TARGET_EMULATION_FLAG_$(TARGET_OS)_$(TARGET_ARCH))
LD_TARGET_EMULATION_FLAG = $(if $(LD_IS_LLD), -m $(LD_TARGET_EMULATION_FLAG_$(TARGET_OS)_$(TARGET_ARCH)))
endif
define READ_ONLY_OBJECT_FROM_FILE_RULE

2
version.mk

@ -1,4 +1,4 @@
NVIDIA_VERSION = 545.23.06
NVIDIA_VERSION = 545.29.03
# This file.
VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))

Loading…
Cancel
Save