Browse Source

feat(plat/arm): firmware first error handling support for base RAMs

RD-N2 platform variants support base element RAM. The RAMs implement
ECC that detects ECC 1/2-bit errors and reports them via interrupts. The
error information is reported as part of error record frames defined for
base element RAMs.

This patch provides reference error handler implementation to handle
1/2-bit RAS errors that occur on base element RAM's. On error event the
error handler reads the error records information and forwards the event
to secure partition. Secure partition creates a CPER record from this
error information. Finally the handler notifies the OS about the RAS
error using the SDEI notification mechanism.

Signed-off-by: Omkar Anand Kulkarni <omkar.kulkarni@arm.com>
Change-Id: Ic209c714de6cd2d4c845198b03724940a2e1c240
pull/1999/head
Omkar Anand Kulkarni 1 year ago
parent
commit
5b77a0e675
  1. 5
      plat/arm/css/sgi/include/sgi_ras.h
  2. 5
      plat/arm/css/sgi/include/sgi_soc_css_def_v2.h
  3. 112
      plat/arm/css/sgi/ras/sgi_ras_sram.c

5
plat/arm/css/sgi/include/sgi_ras.h

@ -55,4 +55,9 @@ struct sgi_ras_ev_map *sgi_find_ras_event_map_by_intr(uint32_t intr_num);
*/ */
int sgi_ras_platform_setup(struct plat_sgi_ras_config *config); int sgi_ras_platform_setup(struct plat_sgi_ras_config *config);
/* Base element RAM RAS interrupt handler function. */
int sgi_ras_sram_intr_handler(const struct err_record_info *err_rec,
int probe_data,
const struct err_handler_data *const data);
#endif /* SGI_RAS_H */ #endif /* SGI_RAS_H */

5
plat/arm/css/sgi/include/sgi_soc_css_def_v2.h

@ -58,6 +58,11 @@
#define END_KEY_BASE (SOC_KEYS_BASE + 0x0044) #define END_KEY_BASE (SOC_KEYS_BASE + 0x0044)
#define END_KEY_SIZE U(32) #define END_KEY_SIZE U(32)
/* Base Element RAM error definitions */
#define SOC_NS_RAM_ERR_REC_BASE UL(0x2A4C0000)
#define NS_RAM_ECC_CE_INT U(87)
#define NS_RAM_ECC_UE_INT U(88)
#define SOC_PLATFORM_PERIPH_MAP_DEVICE MAP_REGION_FLAT( \ #define SOC_PLATFORM_PERIPH_MAP_DEVICE MAP_REGION_FLAT( \
SOC_PLATFORM_PERIPH_BASE, \ SOC_PLATFORM_PERIPH_BASE, \
SOC_PLATFORM_PERIPH_SIZE, \ SOC_PLATFORM_PERIPH_SIZE, \

112
plat/arm/css/sgi/ras/sgi_ras_sram.c

@ -0,0 +1,112 @@
/*
* Copyright (c) 2023, ARM Limited and Contributors. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <bl31/interrupt_mgmt.h>
#include <lib/el3_runtime/context_mgmt.h>
#include <plat/common/platform.h>
#include <services/sdei.h>
#include <services/spm_mm_svc.h>
#include <platform_def.h>
#include <sgi_ras.h>
/* Base Element RAM Error Record offsets. */
#define ERRSTATUS U(0)
#define ERRCODE U(8)
#define ERRADDR U(12)
/*
* Base Element RAM error information data structure communicated as part of MM
* Communication data payload.
*/
typedef struct sgi_sram_err_info {
uint32_t err_status;
uint32_t err_code;
uint32_t err_addr;
} sgi_sram_err_info_t;
/*
* MM Communicate message header GUID to indicate the payload is intended for
* base element RAM MM driver.
*/
struct efi_guid sram_ecc_event_guid = {
0x7312db4f, 0xd0c4, 0x4fb5,
{ 0x81, 0x2c, 0xb7, 0x4b, 0xc6, 0xc4, 0xa9, 0x38 }
};
/* Base element RAM RAS error interrupt handler */
int sgi_ras_sram_intr_handler(const struct err_record_info *err_rec,
int probe_data,
const struct err_handler_data *const data)
{
struct sgi_ras_ev_map *ras_map;
mm_communicate_header_t *header;
sgi_sram_err_info_t sram_info;
uintptr_t base_addr;
uint32_t clear_status, intr;
int ret;
cm_el1_sysregs_context_save(NON_SECURE);
intr = data->interrupt;
INFO("SGI: Base element RAM interrupt [%d] handler\n", intr);
/* Determine error record base address to read. */
base_addr = 0;
if (intr == NS_RAM_ECC_CE_INT || intr == NS_RAM_ECC_UE_INT) {
base_addr = SOC_NS_RAM_ERR_REC_BASE;
}
sram_info.err_status = mmio_read_32(base_addr + ERRSTATUS);
sram_info.err_code = mmio_read_32(base_addr + ERRCODE);
sram_info.err_addr = mmio_read_32(base_addr + ERRADDR);
/* Clear the interrupt. */
clear_status = mmio_read_32(base_addr + ERRSTATUS);
mmio_write_32((base_addr + ERRSTATUS), clear_status);
/*
* Prepare the MM Communication buffer to pass the base element RAM
* error information to Secure Partition.
*/
header = (void *)PLAT_SPM_BUF_BASE;
memset(header, 0, sizeof(*header));
memcpy(&header->data, &sram_info, sizeof(sram_info));
header->message_len = sizeof(sram_info);
memcpy(&header->header_guid, (void *)&sram_ecc_event_guid,
sizeof(struct efi_guid));
spm_mm_sp_call(MM_COMMUNICATE_AARCH64, (uint64_t)header, 0,
plat_my_core_pos());
plat_ic_end_of_interrupt(intr);
/*
* Find if this is a RAS interrupt. There must be an event against
* this interrupt
*/
ras_map = sgi_find_ras_event_map_by_intr(intr);
if (ras_map == NULL) {
ERROR("SGI: RAS error info for interrupt id: %d not found\n",
intr);
return -1;
}
/* Dispatch the event to the SDEI client */
ret = sdei_dispatch_event(ras_map->sdei_ev_num);
if (ret != 0) {
/*
* sdei_dispatch_event() may return failing result in some
* cases, for example kernel may not have registered a handler
* or RAS event may happen early during boot. We restore the NS
* context when sdei_dispatch_event() returns failing result.
*/
ERROR("SDEI dispatch failed: %d", ret);
cm_el1_sysregs_context_restore(NON_SECURE);
cm_set_next_eret_context(NON_SECURE);
}
return ret;
}
Loading…
Cancel
Save