545.29.03

1 year ago · f364378a65
28 changed files with 870 additions and 629 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,8 @@

 ## Release 545 Entries

+### [545.29.03] 2023-10-31
+
 ### [545.23.06] 2023-10-17

 #### Fixed
@ -62,6 +64,10 @@

 ## Release 525 Entries

+#### Fixed
+
+- Fix nvidia_p2p_get_pages(): Fix double-free in register-callback error path, [#557](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/557) by @BrendanCunningham
+
 ### [525.116.04] 2023-05-09

 ### [525.116.03] 2023-04-25
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source

 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 545.23.06.
+version 545.29.03.


 ## How to Build
@ -17,7 +17,7 @@ as root:

 Note that the kernel modules built here must be used with GSP
 firmware and user-space NVIDIA GPU driver components from a corresponding
-545.23.06 driver release.  This can be achieved by installing
+545.29.03 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,

@ -188,7 +188,7 @@ encountered specific to them.
 For details on feature support and limitations, see the NVIDIA GPU driver
 end user README here:

-https://us.download.nvidia.com/XFree86/Linux-x86_64/545.23.06/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/545.29.03/README/kernel_open.html

 In the below table, if three IDs are listed, the first is the PCI Device 
 ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -658,6 +658,7 @@ Subsystem Device ID.
 | NVIDIA A100-SXM4-80GB                           | 20B2 10DE 147F |
 | NVIDIA A100-SXM4-80GB                           | 20B2 10DE 1622 |
 | NVIDIA A100-SXM4-80GB                           | 20B2 10DE 1623 |
+| NVIDIA PG509-210                                | 20B2 10DE 1625 |
 | NVIDIA A100-SXM-64GB                            | 20B3 10DE 14A7 |
 | NVIDIA A100-SXM-64GB                            | 20B3 10DE 14A8 |
 | NVIDIA A100 80GB PCIe                           | 20B5 10DE 1533 |
@ -665,6 +666,7 @@ Subsystem Device ID.
 | NVIDIA PG506-232                                | 20B6 10DE 1492 |
 | NVIDIA A30                                      | 20B7 10DE 1532 |
 | NVIDIA A30                                      | 20B7 10DE 1804 |
+| NVIDIA A30                                      | 20B7 10DE 1852 |
 | NVIDIA A800-SXM4-40GB                           | 20BD 10DE 17F4 |
 | NVIDIA A100-PCIE-40GB                           | 20F1 10DE 145F |
 | NVIDIA A800-SXM4-80GB                           | 20F3 10DE 179B |
@ -748,6 +750,8 @@ Subsystem Device ID.
 | NVIDIA H100 PCIe                                | 2331 10DE 1626 |
 | NVIDIA H100                                     | 2339 10DE 17FC |
 | NVIDIA H800 NVL                                 | 233A 10DE 183A |
+| GH200 120GB                                     | 2342 10DE 16EB |
+| GH200 480GB                                     | 2342 10DE 1809 |
 | NVIDIA GeForce RTX 3060 Ti                      | 2414           |
 | NVIDIA GeForce RTX 3080 Ti Laptop GPU           | 2420           |
 | NVIDIA RTX A5500 Laptop GPU                     | 2438           |
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"545.23.06\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"545.29.03\"

 ifneq ($(SYSSRCHOST1X),)
 EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
--- a/kernel-open/nvidia-drm/nvidia-drm-drv.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c
@ -729,6 +729,7 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,

    params->gpu_id = nv_dev->gpu_info.gpu_id;
    params->primary_index = dev->primary->index;
+    params->supports_alloc = false;
    params->generic_page_kind = 0;
    params->page_kind_generation = 0;
    params->sector_layout = 0;
@ -736,15 +737,20 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
    params->supports_semsurf = false;

 #if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
-    params->generic_page_kind = nv_dev->genericPageKind;
-    params->page_kind_generation = nv_dev->pageKindGeneration;
-    params->sector_layout = nv_dev->sectorLayout;
-    /* Semaphore surfaces are only supported if the modeset = 1 parameter is set */
-    if ((nv_dev->pDevice) != NULL && (nv_dev->semsurf_stride != 0)) {
-        params->supports_semsurf = true;
+    /* Memory allocation and semaphore surfaces are only supported
+     * if the modeset = 1 parameter is set */
+    if (nv_dev->pDevice != NULL) {
+        params->supports_alloc = true;
+        params->generic_page_kind = nv_dev->genericPageKind;
+        params->page_kind_generation = nv_dev->pageKindGeneration;
+        params->sector_layout = nv_dev->sectorLayout;
+
+        if (nv_dev->semsurf_stride != 0) {
+            params->supports_semsurf = true;
 #if defined(NV_SYNC_FILE_GET_FENCE_PRESENT)
-        params->supports_sync_fd = true;
+            params->supports_sync_fd = true;
 #endif /* defined(NV_SYNC_FILE_GET_FENCE_PRESENT) */
+        }
    }
 #endif /* defined(NV_DRM_ATOMIC_MODESET_AVAILABLE) */

--- a/kernel-open/nvidia-drm/nvidia-drm-ioctl.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-ioctl.h
@ -178,7 +178,10 @@ struct drm_nvidia_get_dev_info_params {
    uint32_t gpu_id;             /* OUT */
    uint32_t primary_index;      /* OUT; the "card%d" value */

-    /* See DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D definitions of these */
+    uint32_t supports_alloc;     /* OUT */
+    /* The generic_page_kind, page_kind_generation, and sector_layout
+     * fields are only valid if supports_alloc is true.
+     * See DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D definitions of these. */
    uint32_t generic_page_kind;    /* OUT */
    uint32_t page_kind_generation; /* OUT */
    uint32_t sector_layout;        /* OUT */
--- a/kernel-open/nvidia/nv-msi.c
+++ b/kernel-open/nvidia/nv-msi.c
@ -156,7 +156,7 @@ NvS32 NV_API_CALL nv_request_msix_irq(nv_linux_state_t *nvl)
        {
            for( j = 0; j < i; j++)
            {
-                free_irq(nvl->msix_entries[i].vector, (void *)nvl);
+                free_irq(nvl->msix_entries[j].vector, (void *)nvl);
            }
            break;
        }
--- a/kernel-open/nvidia/nv-p2p.c
+++ b/kernel-open/nvidia/nv-p2p.c
@ -506,8 +506,13 @@ static int nv_p2p_get_pages(
    (*page_table)->page_size = page_size_index;

    os_free_mem(physical_addresses);
+    physical_addresses = NULL;
+
    os_free_mem(wreqmb_h);
+    wreqmb_h = NULL;
+
    os_free_mem(rreqmb_h);
+    rreqmb_h = NULL;

    if (free_callback != NULL)
    {
--- a/src/common/displayport/src/dp_deviceimpl.cpp
+++ b/src/common/displayport/src/dp_deviceimpl.cpp
@ -2068,11 +2068,13 @@ void DeviceImpl::setDscDecompressionDevice(bool bDscCapBasedOnParent)
                    }
                }
            }
-            else if (this->parent && this->parent->isDSCDecompressionSupported())
+            else if (this->parent && this->parent->isDSCDecompressionSupported() &&
+                     !(this->isLogical()))
            {
                //
                // This condition takes care of sink devices not capable of DSC
-                // but parent is capable of DSC decompression.
+                // but parent is capable of DSC decompression. We need to skip this
+                // if sink is at logical port.
                //
                this->bDSCPossible = true;
                this->devDoingDscDecompression = this->parent;
--- a/src/common/inc/nvBldVer.h
+++ b/src/common/inc/nvBldVer.h
@ -36,25 +36,25 @@
 // and then checked back in. You cannot make changes to these sections without
 // corresponding changes to the buildmeister script
 #ifndef NV_BUILD_BRANCH
-    #define NV_BUILD_BRANCH             r545_74
+    #define NV_BUILD_BRANCH             r545_96
 #endif
 #ifndef NV_PUBLIC_BRANCH
-    #define NV_PUBLIC_BRANCH             r545_74
+    #define NV_PUBLIC_BRANCH             r545_96
 #endif

 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
-#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r545/r545_74-96"
-#define NV_BUILD_CHANGELIST_NUM         (33409679)
+#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r545/r545_96-121"
+#define NV_BUILD_CHANGELIST_NUM         (33470109)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "rel/gpu_drv/r545/r545_74-96"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33409679)
+#define NV_BUILD_NAME                   "rel/gpu_drv/r545/r545_96-121"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33470109)

 #else     /* Windows builds */
-#define NV_BUILD_BRANCH_VERSION         "r545_74-8"
-#define NV_BUILD_CHANGELIST_NUM         (33409679)
+#define NV_BUILD_BRANCH_VERSION         "r545_96-3"
+#define NV_BUILD_CHANGELIST_NUM         (33467912)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "545.87"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33409679)
+#define NV_BUILD_NAME                   "546.04"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33467912)
 #define NV_BUILD_BRANCH_BASE_VERSION    R545
 #endif
 // End buildmeister python edited section
--- a/src/common/inc/nvUnixVersion.h
+++ b/src/common/inc/nvUnixVersion.h
@ -4,7 +4,7 @@
 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
    (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)

-#define NV_VERSION_STRING               "545.23.06"
+#define NV_VERSION_STRING               "545.29.03"

 #else

--- a/src/common/sdk/nvidia/inc/ctrl/ctrl30f1.h
+++ b/src/common/sdk/nvidia/inc/ctrl/ctrl30f1.h
@ -887,10 +887,6 @@ typedef struct NV30F1_CTRL_GSYNC_GET_CONTROL_SWAP_LOCK_WINDOW_PARAMS {
 *   This parameter is set by the client to indicate the 
 *   gpuId of the GPU to which the display to be optimized
 *   is attached.
- * display
- *   This parameter is not used by RM currently.
- *   Clients can ignore this parameter.  Note that this
- *   parameter will be removed in future.
 * output
 *   This parameter is set by the client to indicate the 
 *   output resource type of the display to be optimized.
@ -1033,6 +1029,12 @@ typedef struct NV30F1_CTRL_GSYNC_GET_CONTROL_SWAP_LOCK_WINDOW_PARAMS {
 *   optimal pixel clock to use with the adjusted mode, 
 *   in units of Hz. 
 *
+ *
+ * bOptimized[out]
+ *   This is set to NV_TRUE if the timings were successfully optimized, and
+ *   NV_FALSE otherwise.
+ *
+ *
 * Progressive Raster Structure
 *
 *                 hSyncEnd                            hTotal
@ -1145,28 +1147,29 @@ typedef struct NV30F1_CTRL_GSYNC_GET_CONTROL_SWAP_LOCK_WINDOW_PARAMS {
 #define NV30F1_CTRL_GSYNC_GET_OPTIMIZED_TIMING_PARAMS_MESSAGE_ID (0x60U)

 typedef struct NV30F1_CTRL_GSYNC_GET_OPTIMIZED_TIMING_PARAMS {
-    NvU32 gpuId;
-    NvU32 display;
-    NvU32 output;
-    NvU32 protocol;
-    NvU32 structure;
-    NvU32 adjust;
-    NvU32 hDeltaStep;
-    NvU32 hDeltaMax;
-    NvU32 vDeltaStep;
-    NvU32 vDeltaMax;
-    NvU32 hSyncEnd;
-    NvU32 hBlankEnd;
-    NvU32 hBlankStart;
-    NvU32 hTotal;
-    NvU32 vSyncEnd;
-    NvU32 vBlankEnd;
-    NvU32 vBlankStart;
-    NvU32 vInterlacedBlankEnd;
-    NvU32 vInterlacedBlankStart;
-    NvU32 vTotal;
-    NvU32 refreshX10K;
-    NvU32 pixelClockHz;
+    NvU32  gpuId;
+    NvU32  output;
+    NvU32  protocol;
+    NvU32  structure;
+    NvU32  adjust;
+    NvU32  hDeltaStep;
+    NvU32  hDeltaMax;
+    NvU32  vDeltaStep;
+    NvU32  vDeltaMax;
+    NvU32  hSyncEnd;
+    NvU32  hBlankEnd;
+    NvU32  hBlankStart;
+    NvU32  hTotal;
+    NvU32  vSyncEnd;
+    NvU32  vBlankEnd;
+    NvU32  vBlankStart;
+    NvU32  vInterlacedBlankEnd;
+    NvU32  vInterlacedBlankStart;
+    NvU32  vTotal;
+    NvU32  refreshX10K;
+    NvU32  pixelClockHz;
+
+    NvBool bOptimized;
 } NV30F1_CTRL_GSYNC_GET_OPTIMIZED_TIMING_PARAMS;

 /* output values */
--- a/src/nvidia-modeset/src/nvkms-evo.c
+++ b/src/nvidia-modeset/src/nvkms-evo.c
@ -786,6 +786,9 @@ static void TweakTimingsForGsync(const NVDpyEvoRec *pDpyEvo,
            return;
    }

+    nvEvoLogInfoString(pInfoString,
+            "Adjusting Mode Timings for Quadro Sync Compatibility");
+
    ret = nvRmApiControl(nvEvoGlobal.clientHandle,
                         pDispEvo->pFrameLockEvo->device,
                         NV30F1_CTRL_CMD_GSYNC_GET_OPTIMIZED_TIMING,
@ -796,12 +799,13 @@ static void TweakTimingsForGsync(const NVDpyEvoRec *pDpyEvo,
        nvAssert(!"Failed to convert to Quadro Sync safe timing");
        /* do not apply the timings returned by RM if the call failed */
        return;
+    } else if (!gsyncOptTimingParams.bOptimized) {
+        nvEvoLogInfoString(pInfoString, " Timings Unchanged.");
+        return;
    }

    nvConstructNvModeTimingsFromHwModeTimings(pTimings, &modeTimings);

-    nvEvoLogInfoString(pInfoString,
-            "Adjusting Mode Timings for Quadro Sync Compatibility");
    nvEvoLogInfoString(pInfoString, " Old Timings:");
    nvEvoLogModeValidationModeTimings(pInfoString, &modeTimings);

@ -5923,13 +5927,6 @@ NvBool nvConstructHwModeTimingsImpCheckEvo(
                                  NVKMS_MODE_VALIDATION_REQUIRE_BOOT_CLOCKS);
    NvU32 ret;

-    /* bypass this checking if the user disabled IMP */
-
-    if ((pParams->overrides &
-         NVKMS_MODE_VALIDATION_NO_EXTENDED_GPU_CAPABILITIES_CHECK) != 0) {
-        return TRUE;
-    }
-
    activeRmId = nvRmAllocDisplayId(pConnectorEvo->pDispEvo,
                    nvAddDpyIdToEmptyDpyIdList(pConnectorEvo->displayId));
    if (activeRmId == 0x0) {
@ -5954,11 +5951,18 @@ NvBool nvConstructHwModeTimingsImpCheckEvo(
        timingsParams[head].pUsage = &timings[head].viewPort.guaranteedUsage;
    }

-    ret = nvValidateImpOneDispDowngrade(pConnectorEvo->pDispEvo, timingsParams,
-                                        requireBootClocks,
-                                        NV_EVO_REALLOCATE_BANDWIDTH_MODE_NONE,
-                                        /* downgradePossibleHeadsBitMask */
-                                        (NVBIT(NVKMS_MAX_HEADS_PER_DISP) - 1UL));
+    /* bypass this checking if the user disabled IMP */
+    if ((pParams->overrides &
+         NVKMS_MODE_VALIDATION_NO_EXTENDED_GPU_CAPABILITIES_CHECK) != 0) {
+        ret = TRUE;
+    } else {
+        ret = nvValidateImpOneDispDowngrade(pConnectorEvo->pDispEvo, timingsParams,
+                                            requireBootClocks,
+                                            NV_EVO_REALLOCATE_BANDWIDTH_MODE_NONE,
+                                            /* downgradePossibleHeadsBitMask */
+                                            (NVBIT(NVKMS_MAX_HEADS_PER_DISP) - 1UL));
+    }
+
    if (ret) {
        *pNumHeads = numHeads;
    } else {
--- a/src/nvidia/generated/g_nv_name_released.h
+++ b/src/nvidia/generated/g_nv_name_released.h
@ -808,6 +808,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x20B2, 0x147f, 0x10de, "NVIDIA A100-SXM4-80GB" },
    { 0x20B2, 0x1622, 0x10de, "NVIDIA A100-SXM4-80GB" },
    { 0x20B2, 0x1623, 0x10de, "NVIDIA A100-SXM4-80GB" },
+    { 0x20B2, 0x1625, 0x10de, "NVIDIA PG509-210" },
    { 0x20B3, 0x14a7, 0x10de, "NVIDIA A100-SXM-64GB" },
    { 0x20B3, 0x14a8, 0x10de, "NVIDIA A100-SXM-64GB" },
    { 0x20B5, 0x1533, 0x10de, "NVIDIA A100 80GB PCIe" },
@ -815,6 +816,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x20B6, 0x1492, 0x10de, "NVIDIA PG506-232" },
    { 0x20B7, 0x1532, 0x10de, "NVIDIA A30" },
    { 0x20B7, 0x1804, 0x10de, "NVIDIA A30" },
+    { 0x20B7, 0x1852, 0x10de, "NVIDIA A30" },
    { 0x20BD, 0x17f4, 0x10de, "NVIDIA A800-SXM4-40GB" },
    { 0x20F1, 0x145f, 0x10de, "NVIDIA A100-PCIE-40GB" },
    { 0x20F3, 0x179b, 0x10de, "NVIDIA A800-SXM4-80GB" },
@ -899,6 +901,8 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x2331, 0x1626, 0x10de, "NVIDIA H100 PCIe" },
    { 0x2339, 0x17fc, 0x10de, "NVIDIA H100" },
    { 0x233A, 0x183a, 0x10de, "NVIDIA H800 NVL" },
+    { 0x2342, 0x16eb, 0x10de, "GH200 120GB" },
+    { 0x2342, 0x1809, 0x10de, "GH200 480GB" },
    { 0x2414, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Ti" },
    { 0x2420, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080 Ti Laptop GPU" },
    { 0x2438, 0x0000, 0x0000, "NVIDIA RTX A5500 Laptop GPU" },
--- a/src/nvidia/inc/kernel/gpu/mem_mgr/phys_mem_allocator/addrtree.h
+++ b/src/nvidia/inc/kernel/gpu/mem_mgr/phys_mem_allocator/addrtree.h
@ -151,11 +151,28 @@ void pmaAddrtreePrintTree(void *pMap, const char* str);
 *
 * @return void
 */
-void pmaAddrtreeChangeState(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState);
-void pmaAddrtreeChangeStateAttrib(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState, NvBool writeAttrib);
-void pmaAddrtreeChangeStateAttribEx(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState,PMA_PAGESTATUS newStateMask);
-void pmaAddrtreeChangePageStateAttrib(void * pMap, NvU64 startFrame, NvU64 pageSize,
-                                      PMA_PAGESTATUS newState, NvBool writeAttrib);
+void pmaAddrtreeChangeStateAttribEx(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
+void pmaAddrtreeChangePageStateAttribEx(void * pMap, NvU64 startFrame, NvU64 pageSize,
+                                        PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
+
+/*!
+ * @brief Changes the state & attrib bits specified by mask
+ *
+ * Changes the state of the bits given the physical frame number
+ * and the number of frames to change
+ *
+ * @param[in]   pMap         The addrtree to change
+ * @param[in]   frameNum     The frame number to change
+ * @param[in]   numFrames    The number of frames to change
+ * @param[in]   newState     The new state to change to
+ * @param[in]   newStateMask Specific bits to write
+ *
+ * @return void
+ */
+void pmaAddrtreeChangeBlockStateAttrib(void *pMap, NvU64 frameNum,
+                                     NvU64 numFrames,
+                                     PMA_PAGESTATUS newState,
+                                     PMA_PAGESTATUS newStateMask);

 /*!
 * @brief Read the page state & attrib bits
--- a/src/nvidia/inc/kernel/gpu/mem_mgr/phys_mem_allocator/phys_mem_allocator.h
+++ b/src/nvidia/inc/kernel/gpu/mem_mgr/phys_mem_allocator/phys_mem_allocator.h
@ -178,10 +178,9 @@ typedef NV_STATUS (*pmaEvictRangeCb_t)(void *ctxPtr, NvU64 physBegin, NvU64 phys
 */
 typedef void *(*pmaMapInit_t)(NvU64 numFrames, NvU64 addrBase, PMA_STATS *pPmaStats, NvBool bProtected);
 typedef void  (*pmaMapDestroy_t)(void *pMap);
-typedef void  (*pmaMapChangeState_t)(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState);
-typedef void  (*pmaMapChangeStateAttrib_t)(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState, NvBool writeAttrib);
 typedef void  (*pmaMapChangeStateAttribEx_t)(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
-typedef void  (*pmaMapChangePageStateAttrib_t)(void *pMap, NvU64 startFrame, NvU64 pageSize, PMA_PAGESTATUS newState, NvBool writeAttrib);
+typedef void  (*pmaMapChangePageStateAttribEx_t)(void *pMap, NvU64 startFrame, NvU64 pageSize, PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
+typedef void  (*pmaMapChangeBlockStateAttrib_t)(void *pMap, NvU64 frameNum, NvU64 numFrames, PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
 typedef PMA_PAGESTATUS (*pmaMapRead_t)(void *pMap, NvU64 frameNum, NvBool readAttrib);
 typedef NV_STATUS (*pmaMapScanContiguous_t)(void *pMap, NvU64 addrBase, NvU64 rangeStart, NvU64 rangeEnd,
                                            NvU64 numPages, NvU64 *freelist, NvU64 pageSize, NvU64 alignment,
@ -201,10 +200,9 @@ struct _PMA_MAP_INFO
    NvU32                       mode;
    pmaMapInit_t                pmaMapInit;
    pmaMapDestroy_t             pmaMapDestroy;
-    pmaMapChangeState_t         pmaMapChangeState;
-    pmaMapChangeStateAttrib_t   pmaMapChangeStateAttrib;
    pmaMapChangeStateAttribEx_t pmaMapChangeStateAttribEx;
-    pmaMapChangePageStateAttrib_t pmaMapChangePageStateAttrib;
+    pmaMapChangePageStateAttribEx_t pmaMapChangePageStateAttribEx;
+    pmaMapChangeBlockStateAttrib_t pmaMapChangeBlockStateAttrib;
    pmaMapRead_t                pmaMapRead;
    pmaMapScanContiguous_t      pmaMapScanContiguous;
    pmaMapScanDiscontiguous_t   pmaMapScanDiscontiguous;
--- a/src/nvidia/inc/kernel/gpu/mem_mgr/phys_mem_allocator/regmap.h
+++ b/src/nvidia/inc/kernel/gpu/mem_mgr/phys_mem_allocator/regmap.h
@ -89,34 +89,6 @@ void pmaRegmapDestroy(void *pMap);
 NvU64 pmaRegmapGetEvictingFrames(void *pMap);
 void pmaRegmapSetEvictingFrames(void *pMap, NvU64 frameEvictionsInProcess);

-/*!
- * @brief Changes the recorded state bits
- *
- * Changes the state of the bits given the physical frame number
- *
- * @param[in]   pMap         The regmap to change
- * @param[in]   frameNum     The frame number to change
- * @param[in]   newState     The new state to change to
- *
- * @return void
- */
-void pmaRegmapChangeState(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState);
-
-/*!
- * @brief Changes the recorded state & attrib bits
- *
- * Changes the state of the bits given the physical frame number
- *
- * @param[in]   pMap         The regmap to change
- * @param[in]   frameNum     The frame number to change
- * @param[in]   newState     The new state to change to
- * @param[in]   writeAttrib  Write attribute bits as well
- *
- * @return void
- */
-void pmaRegmapChangeStateAttrib(void *pMap, NvU64 frameNum,
-                                PMA_PAGESTATUS newState, NvBool writeAttrib);
-
 /*!
 * @brief Changes the recorded state & attrib bits for an entire page
 *
@ -131,8 +103,8 @@ void pmaRegmapChangeStateAttrib(void *pMap, NvU64 frameNum,
 *
 * @return void
 */
-void pmaRegmapChangePageStateAttrib(void * pMap, NvU64 frameNumStart, NvU64 pageSize,
-                                    PMA_PAGESTATUS newState, NvBool writeAttrib);
+void pmaRegmapChangePageStateAttribEx(void * pMap, NvU64 frameNumStart, NvU64 pageSize,
+                                      PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);

 /*!
 * @brief Changes the state & attrib bits specified by mask
@ -150,6 +122,25 @@ void pmaRegmapChangeStateAttribEx(void *pMap, NvU64 frameNum,
                                  PMA_PAGESTATUS newState,
                                  PMA_PAGESTATUS newStateMask);

+/*!
+ * @brief Changes the state & attrib bits specified by mask
+ *
+ * Changes the state of the bits given the physical frame number
+ * and the number of frames to change
+ *
+ * @param[in]   pMap         The regmap to change
+ * @param[in]   frameNum     The frame number to change
+ * @param[in]   numFrames    The number of frames to change
+ * @param[in]   newState     The new state to change to
+ * @param[in]   newStateMask Specific bits to write
+ *
+ * @return void
+ */
+void pmaRegmapChangeBlockStateAttrib(void *pMap, NvU64 frameNum,
+                                     NvU64 numFrames,
+                                     PMA_PAGESTATUS newState,
+                                     PMA_PAGESTATUS newStateMask);
+
 /*!
 * @brief Read the page state & attrib bits
 *
--- a/src/nvidia/src/kernel/gpu/bus/arch/maxwell/kern_bus_gm107.c
+++ b/src/nvidia/src/kernel/gpu/bus/arch/maxwell/kern_bus_gm107.c
@ -2404,6 +2404,8 @@ kbusUpdateRmAperture_GM107
    {
        pFmt = pKernelBus->bar2[gfid].pFmt;

+        NV_CHECK_OR_RETURN(LEVEL_ERROR, pFmt != NULL, NV_ERR_INVALID_ARGUMENT);
+
        // MMU_MAP_CTX
        mapTarget.pLevelFmt      = mmuFmtFindLevelWithPageShift(pFmt->pRoot,
                                                                BIT_IDX_64(pageSize));
--- a/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c
+++ b/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c
@ -2776,6 +2776,10 @@ kchannelCtrlCmdGetClassEngineid_IMPL
        return NV_ERR_OBJECT_NOT_FOUND;
    }

+    NV_CHECK_OR_RETURN(LEVEL_ERROR,
+                       pParams->hObject != RES_GET_CLIENT_HANDLE(pKernelChannel),
+                       NV_ERR_INVALID_ARGUMENT);
+
    if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
        (IS_VIRTUAL_WITH_SRIOV(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
    {
--- a/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c
+++ b/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c
@ -1738,8 +1738,9 @@ kgraphicsCreateGoldenImageChannel_IMPL
    NvU32                                  classNum;
    MIG_INSTANCE_REF                       ref;
    NvU32                                  objectType;
+    NvU32                                  primarySliSubDeviceInstance;

-    // XXX This should be removed when braodcast SLI support is deprecated
+    // XXX This should be removed when broadcast SLI support is deprecated
    if (!gpumgrIsParentGPU(pGpu))
    {
        return NV_OK;
@ -1750,6 +1751,8 @@ kgraphicsCreateGoldenImageChannel_IMPL
    // FIXME these allocations corrupt BC state
    NV_ASSERT_OK_OR_RETURN(
        rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu, &hClientId, &hDeviceId, &hSubdeviceId));
+    // rmapiutilAllocClientAndDeviceHandles allocates a subdevice object for this subDeviceInstance
+    primarySliSubDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu);

    NV_ASSERT_OK_OR_RETURN(serverGetClientUnderLock(&g_resServ, hClientId, &pClientId));

@ -1765,6 +1768,11 @@ kgraphicsCreateGoldenImageChannel_IMPL
    {
        NvHandle hSecondary;
        NV2080_ALLOC_PARAMETERS nv2080AllocParams;
+        NvU32 thisSubDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
+
+        // Skip if already allocated by rmapiutilAllocClientAndDeviceHandles()
+        if (thisSubDeviceInstance == primarySliSubDeviceInstance)
+            SLI_LOOP_CONTINUE;

        // Allocate a subDevice
        NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
@ -1772,7 +1780,7 @@ kgraphicsCreateGoldenImageChannel_IMPL
            cleanup);

        portMemSet(&nv2080AllocParams, 0, sizeof(nv2080AllocParams));
-        nv2080AllocParams.subDeviceId = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
+        nv2080AllocParams.subDeviceId = thisSubDeviceInstance;

        NV_CHECK_OK(status, LEVEL_SILENT,
            pRmApi->AllocWithHandle(pRmApi,
--- a/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c
+++ b/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c
@ -390,8 +390,8 @@ memmgrInitInternalChannels_IMPL
        IS_MIG_ENABLED(pGpu) ||
        gpuIsCCorApmFeatureEnabled(pGpu) ||
        IsSLIEnabled(pGpu) ||
-        RMCFG_FEATURE_ARCH_PPC64LE ||
-        RMCFG_FEATURE_ARCH_AARCH64)
+        NVCPU_IS_PPC64LE ||
+        NVCPU_IS_AARCH64)
    {
        // BUG 4167899: Temporarily skip CeUtils creation on platforms where it fails
        NV_PRINTF(LEVEL_INFO, "Skipping global CeUtils creation\n");
--- a/src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/addrtree.c
+++ b/src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/addrtree.c
@ -1890,8 +1890,8 @@ __pmaAddrtreeChangePageStateAttribEx
 // This function wraps the real __pmaAddrtreeChangePageStateAttribEx
 // to allow addrtree to set 128KB page size
 //
-static void
-_pmaAddrtreeChangePageStateAttribEx
+void
+pmaAddrtreeChangePageStateAttribEx
 (
    void           *pMap,
    NvU64           frameNumStart,
@ -1939,35 +1939,24 @@ pmaAddrtreeChangeStateAttribEx
    PMA_PAGESTATUS  newStateMask
 )
 {
-    _pmaAddrtreeChangePageStateAttribEx(pMap, frameNum, _PMA_64KB, newState, newStateMask);
-}
-
-// TODO: merge this on PMA level
-void pmaAddrtreeChangeState(void *pTree, NvU64 frameNum, PMA_PAGESTATUS newState)
-{
-    pmaAddrtreeChangeStateAttribEx(pTree, frameNum, newState, STATE_MASK);
-}
-
-// TODO: merge this on PMA level
-void pmaAddrtreeChangeStateAttrib(void *pTree, NvU64 frameNum, PMA_PAGESTATUS newState, NvBool writeAttrib)
-{
-    PMA_PAGESTATUS mask = writeAttrib ? MAP_MASK : STATE_MASK;
-    pmaAddrtreeChangeStateAttribEx(pTree, frameNum, newState, mask);
+    pmaAddrtreeChangePageStateAttribEx(pMap, frameNum, _PMA_64KB, newState, newStateMask);
 }

-// TODO: merge this on PMA level
 void
-pmaAddrtreeChangePageStateAttrib
+pmaAddrtreeChangeBlockStateAttrib
 (
-    void * pTree,
-    NvU64 frameNumStart,
-    NvU64 pageSize,
+    void *pMap,
+    NvU64 frame,
+    NvU64 len,
    PMA_PAGESTATUS newState,
-    NvBool writeAttrib
+    PMA_PAGESTATUS writeMask
 )
 {
-    PMA_PAGESTATUS mask = writeAttrib ? MAP_MASK : STATE_MASK;
-    _pmaAddrtreeChangePageStateAttribEx(pTree, frameNumStart, pageSize, newState, mask);
+    while (len != 0)
+    {
+        len--;
+        pmaAddrtreeChangeStateAttribEx(pMap, frame + len, newState, writeMask);
+    }
 }

 PMA_PAGESTATUS pmaAddrtreeRead
--- a/src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/numa.c
+++ b/src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/numa.c
@ -651,7 +651,7 @@ NV_STATUS pmaNumaAllocate
                    status = NV_ERR_NO_MEMORY;
                    break;
                }
-                pPma->pMapInfo->pmaMapChangeStateAttrib(pMap, frameOffset, allocOption, NV_TRUE);
+                pPma->pMapInfo->pmaMapChangeStateAttribEx(pMap, frameOffset, allocOption, MAP_MASK);
            }
            if (status != NV_OK)
                break;
--- a/src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/phys_mem_allocator.c
+++ b/src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/phys_mem_allocator.c
@ -97,7 +97,7 @@ _pmaRollback

            for (j = 0; j < framesPerPage; j++)
            {
-                pPma->pMapInfo->pmaMapChangeState(pPma->pRegions[regId], (frameNum + j), oldState);
+                pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + j), oldState, STATE_MASK);
            }
        }
    }
@ -110,7 +110,7 @@ _pmaRollback
        frameNum = PMA_ADDR2FRAME(pPages[failCount], addrBase);
        for(i = 0; i < failFrame; i++)
        {
-            pPma->pMapInfo->pmaMapChangeState(pPma->pRegions[regId], (frameNum + i), oldState);
+            pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + i), oldState, STATE_MASK);
        }
    }
 }
@ -208,10 +208,9 @@ pmaInitialize(PMA *pPma, NvU32 initFlags)
    //
    pMapInfo->pmaMapInit = pmaRegmapInit;
    pMapInfo->pmaMapDestroy = pmaRegmapDestroy;
-    pMapInfo->pmaMapChangeState = pmaRegmapChangeState;
-    pMapInfo->pmaMapChangeStateAttrib = pmaRegmapChangeStateAttrib;
    pMapInfo->pmaMapChangeStateAttribEx = pmaRegmapChangeStateAttribEx;
-    pMapInfo->pmaMapChangePageStateAttrib = pmaRegmapChangePageStateAttrib;
+    pMapInfo->pmaMapChangePageStateAttribEx = pmaRegmapChangePageStateAttribEx;
+    pMapInfo->pmaMapChangeBlockStateAttrib = pmaRegmapChangeBlockStateAttrib;
    pMapInfo->pmaMapRead = pmaRegmapRead;
    pMapInfo->pmaMapScanContiguous = pmaRegmapScanContiguous;
    pMapInfo->pmaMapScanDiscontiguous = pmaRegmapScanDiscontiguous;
@ -246,10 +245,9 @@ pmaInitialize(PMA *pPma, NvU32 initFlags)
        {
            pMapInfo->pmaMapInit = pmaAddrtreeInit;
            pMapInfo->pmaMapDestroy = pmaAddrtreeDestroy;
-            pMapInfo->pmaMapChangeState = pmaAddrtreeChangeState;
-            pMapInfo->pmaMapChangeStateAttrib = pmaAddrtreeChangeStateAttrib;
            pMapInfo->pmaMapChangeStateAttribEx = pmaAddrtreeChangeStateAttribEx;
-            pMapInfo->pmaMapChangePageStateAttrib = pmaAddrtreeChangePageStateAttrib;
+            pMapInfo->pmaMapChangePageStateAttribEx = pmaAddrtreeChangePageStateAttribEx;
+            pMapInfo->pmaMapChangeBlockStateAttrib = pmaAddrtreeChangeBlockStateAttrib;
            pMapInfo->pmaMapRead = pmaAddrtreeRead;
            pMapInfo->pmaMapScanContiguous = pmaAddrtreeScanContiguous;
            pMapInfo->pmaMapScanDiscontiguous = pmaAddrtreeScanDiscontiguous;
@ -1084,11 +1082,8 @@ pmaAllocatePages_retry:
                                  frameBase,
                                  frameBase + numFramesAllocated - 1);

-            for (i = 0; i < numPagesAllocatedSoFar; i++)
-            {
-                pPma->pMapInfo->pmaMapChangePageStateAttrib(pMap, frameBase + (i * framesPerPage),
-                                                            pageSize, pinOption, NV_TRUE);
-            }
+            pPma->pMapInfo->pmaMapChangeBlockStateAttrib(pMap, frameBase, numPagesAllocatedSoFar * framesPerPage,
+                                                         pinOption, MAP_MASK);

            if (blacklistOffFlag && blacklistOffPerRegion[regId])
            {
@ -1134,8 +1129,8 @@ pmaAllocatePages_retry:
                }
                lastFrameRangeEnd = frameBase + framesPerPage - 1;

-                pPma->pMapInfo->pmaMapChangePageStateAttrib(pMap, PMA_ADDR2FRAME(pPages[i], addrBase),
-                                                            pageSize, pinOption, NV_TRUE);
+                pPma->pMapInfo->pmaMapChangePageStateAttribEx(pMap, PMA_ADDR2FRAME(pPages[i], addrBase),
+                                                            pageSize, pinOption, MAP_MASK);

            }
            NV_PRINTF(LEVEL_INFO, "0x%llx through 0x%llx \n",
@ -1267,7 +1262,7 @@ pmaPinPages
            }
            else
            {
-                pPma->pMapInfo->pmaMapChangeState(pPma->pRegions[regId], (frameNum + j), STATE_PIN);
+                pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + j), STATE_PIN, STATE_MASK);
            }
        }
    }
@ -1320,7 +1315,7 @@ pmaUnpinPages
            }
            else
            {
-                pPma->pMapInfo->pmaMapChangeState(pPma->pRegions[regId], (frameNum + j), STATE_UNPIN);
+                pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + j), STATE_UNPIN, STATE_MASK);
            }
        }
    }
--- a/src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/regmap.c
+++ b/src/nvidia/src/kernel/gpu/mem_mgr/phys_mem_allocator/regmap.c
--- a/src/nvidia/src/kernel/rmapi/event.c
+++ b/src/nvidia/src/kernel/rmapi/event.c
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
--- a/src/nvidia/src/libraries/mmu/mmu_walk.c
+++ b/src/nvidia/src/libraries/mmu/mmu_walk.c
@ -177,6 +177,7 @@ mmuWalkSetUserCtx
    MMU_WALK_USER_CTX *pUserCtx
 )
 {
+    NV_ASSERT_OR_RETURN(NULL != pWalk, NV_ERR_INVALID_STATE);

    pWalk->pUserCtx = pUserCtx;
    return NV_OK;
@ -223,9 +224,12 @@ mmuWalkFindLevel
 )
 {
    const MMU_WALK_LEVEL *pLevel = &pWalk->root;
-    while (pLevel->pFmt != pLevelFmt)
+    while (pLevel != NULL && pLevel->pFmt != pLevelFmt)
    {
        NvU32 subLevel;
+
+        NV_ASSERT_OR_RETURN(pLevel->pFmt != NULL, NULL);
+
        // Single sub-level always continues.
        if (1 == pLevel->pFmt->numSubLevels)
        {
--- a/utils.mk
+++ b/utils.mk
@ -575,8 +575,14 @@ LD_TARGET_EMULATION_FLAG_SunOS_x86_64   = elf_x86_64_sol2
 LD_TARGET_EMULATION_FLAG_FreeBSD_x86    = elf_i386_fbsd
 LD_TARGET_EMULATION_FLAG_FreeBSD_x86_64 = elf_x86_64_fbsd

+# Different linkers (GNU ld versus ld.lld versus ld.gold) expect different
+# target architecture values for '-m'.  Empirically, only ld.lld appears to
+# actually need it, so only add the option when linking with ld.lld.  Example
+# `ld.lld -v` output: "LLD 15.0.7 (compatible with GNU linkers)".
+LD_IS_LLD := $(if $(filter LLD,$(shell $(LD) -v)),1)
+
 ifdef LD_TARGET_EMULATION_FLAG_$(TARGET_OS)_$(TARGET_ARCH)
-  LD_TARGET_EMULATION_FLAG = -m $(LD_TARGET_EMULATION_FLAG_$(TARGET_OS)_$(TARGET_ARCH))
+  LD_TARGET_EMULATION_FLAG = $(if $(LD_IS_LLD), -m $(LD_TARGET_EMULATION_FLAG_$(TARGET_OS)_$(TARGET_ARCH)))
 endif

 define READ_ONLY_OBJECT_FROM_FILE_RULE
--- a/version.mk
+++ b/version.mk
@ -1,4 +1,4 @@
-NVIDIA_VERSION = 545.23.06
+NVIDIA_VERSION = 545.29.03

 # This file.
 VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))