-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for fatal error monitoring
- The application monitors APML_ALERT_L and upon assertion of the GPIO PIN , BMC collects the MCA MSR dump and creates CPER record. - Depending on the user configuration , the application initiates system recovery. - ras-config.json contains the configuration parameters with default value. User can get and set the configuration parameters using d-bus calls to the methods getAttribute and setAttribute. - The application also contains harvesting of last transaction address, debug log ID's. - The application is intended to be supported for 1P and 2P platforms. Signed-off-by: Abinaya Dhandapani <[email protected]>
- Loading branch information
Abinaya Dhandapani
committed
Nov 21, 2024
1 parent
bf04998
commit d274822
Showing
15 changed files
with
3,078 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
{ | ||
"Configuration": [ | ||
{ | ||
"ApmlRetries": { | ||
"Description": "Number of APML retry count", | ||
"Value": 10, | ||
"MaxBoundLimit": "50" | ||
} | ||
}, | ||
{ | ||
"SystemRecovery": { | ||
"Description": "System recovery mode", | ||
"Value": "NO_RESET" | ||
} | ||
}, | ||
{ | ||
"HarvestMicrocode": { | ||
"Description": "Harvest microcode version", | ||
"Value": true | ||
} | ||
}, | ||
{ | ||
"HarvestPPIN": { | ||
"Description": "Harvest PPIN", | ||
"Value": true | ||
} | ||
}, | ||
{ | ||
"ResetSignal": { | ||
"Description": "Reset Signal Type", | ||
"Value": "SYS_RST" | ||
} | ||
}, | ||
{ | ||
"SigIdOffset": { | ||
"Description": "List of Signature ID offsets", | ||
"Value": [ | ||
"0x30", | ||
"0x34", | ||
"0x28", | ||
"0x2c", | ||
"0x08", | ||
"0x0c", | ||
"null", | ||
"null" | ||
] | ||
} | ||
}, | ||
{ | ||
"AifsArmed": { | ||
"Description": "If this field is true, AIFS flow is triggered", | ||
"Value": false | ||
} | ||
}, | ||
{ | ||
"AifsSignatureId": { | ||
"Description": "List of signature Id to check if Aifs is triggered", | ||
"Value": { | ||
"EX-WDT": "0xaea0000000000108000500b020009a00000000004d000000" | ||
} | ||
} | ||
}, | ||
{ | ||
"DisableAifsResetOnSyncfloodCounter": { | ||
"Description": "Disable AIFS Reset on syncfloow counter ", | ||
"Value": true | ||
} | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#include "cper_generator.hpp" | ||
#include "interface_manager_base.hpp" | ||
|
||
class ApmlInterfaceManager : public RasManagerBase | ||
{ | ||
public: | ||
virtual void init(); | ||
|
||
virtual void configure(); | ||
|
||
ApmlInterfaceManager( | ||
sdbusplus::asio::object_server& objectServer, | ||
std::shared_ptr<sdbusplus::asio::connection>& systemBus, | ||
boost::asio::io_service& io) : | ||
RasManagerBase(objectServer, systemBus, io) | ||
{} | ||
|
||
protected: | ||
std::vector<uint8_t> blockId; | ||
uint32_t familyId; | ||
std::mutex harvest_in_progress_mtx; | ||
bool p0AlertProcessed = false; | ||
bool p1AlertProcessed = false; | ||
uint64_t recordId = 1; | ||
uint16_t debugLogIdOffset; | ||
uint32_t SignatureID[8]; | ||
virtual void interfaceActiveMonitor(); | ||
virtual void getCpuId(); | ||
virtual void findProgramId(); | ||
virtual void harvestFatalError(uint8_t); | ||
void triggerWarmReset() override; | ||
void clearSbrmiAlertMask(uint8_t); | ||
|
||
void performPlatformInitialization(); | ||
|
||
oob_status_t readRegister(uint8_t, uint32_t, uint8_t*); | ||
|
||
void writeRegister(uint8_t, uint32_t, uint32_t); | ||
bool compare_with_bitwise_AND(const uint32_t*, const std::string&); | ||
bool checkSignatureIdMatch(); | ||
std::vector<uint32_t> hexstring_to_vector(const std::string&); | ||
bool harvestMcaValidityCheck(uint8_t, uint16_t*, uint16_t*); | ||
template <typename T> | ||
void harvestMcaDataBanks(uint8_t, uint16_t, uint16_t, CperGenerator<T>&); | ||
void getLastTransAddr(EFI_AMD_FATAL_ERROR_DATA*, uint8_t); | ||
void dumpContextInfo(EFI_AMD_FATAL_ERROR_DATA*, uint8_t); | ||
void harvestDebugLogDump(EFI_AMD_FATAL_ERROR_DATA*, uint8_t, uint8_t); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#pragma once | ||
|
||
#include <com/amd/RAS/Configuration/common.hpp> | ||
#include <com/amd/RAS/Configuration/server.hpp> | ||
#include <sdbusplus/asio/object_server.hpp> | ||
#include <sdbusplus/server.hpp> | ||
static constexpr auto objectPath = "/com/amd/RAS"; | ||
|
||
using AttributeType = | ||
sdbusplus::common::com::amd::ras::Configuration::AttributeType; | ||
using Base = sdbusplus::com::amd::RAS::server::Configuration; | ||
|
||
using AttributeName = std::string; | ||
using AttributeValue = | ||
std::variant<bool, std::string, int64_t, std::vector<std::string>, | ||
std::map<std::string, std::string>>; | ||
using ConfigTable = | ||
std::map<std::string, | ||
std::tuple<AttributeType, std::string, | ||
std::variant<bool, std::string, int64_t, | ||
std::vector<std::string>, | ||
std::map<std::string, std::string>>, | ||
int64_t>>; | ||
|
||
struct EventDeleter | ||
{ | ||
void operator()(sd_event* event) const | ||
{ | ||
event = sd_event_unref(event); | ||
} | ||
}; | ||
using EventPtr = std::unique_ptr<sd_event, EventDeleter>; | ||
|
||
/** | ||
* @brief Definition of the RasConfiguration class. | ||
* | ||
* @tparam AttributeName The type for attribute names (usually std::string). | ||
* @tparam AttributeValue The variant type for attribute values. | ||
* @tparam ConfigTable The map type for storing attribute information. | ||
*/ | ||
|
||
class RasConfiguration : public Base | ||
{ | ||
public: | ||
/** | ||
* Constructor for Configuration. | ||
* | ||
* @param objectServer Reference to the object server. | ||
* @param systemBus Reference to the system D-Bus connection. | ||
*/ | ||
RasConfiguration(sdbusplus::asio::object_server& objectServer, | ||
std::shared_ptr<sdbusplus::asio::connection>& systemBus); | ||
|
||
/** | ||
* Set the value of an attribute. | ||
* | ||
* @param attribute The name of the attribute. | ||
* @param value The value to set. | ||
*/ | ||
void setAttribute(AttributeName attribute, AttributeValue value) override; | ||
|
||
/** | ||
* Get the value of an attribute. | ||
* | ||
* @param attribute The name of the attribute. | ||
* @return The value of the attribute. | ||
*/ | ||
AttributeValue getAttribute(AttributeName attribute) override; | ||
|
||
private: | ||
sdbusplus::asio::object_server& objServer; | ||
std::shared_ptr<sdbusplus::asio::connection>& systemBus; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#pragma once | ||
|
||
#include "ras.hpp" | ||
|
||
extern "C" | ||
{ | ||
#include "apml.h" | ||
#include "apml_common.h" | ||
#include "esmi_cpuid_msr.h" | ||
#include "esmi_mailbox.h" | ||
#include "esmi_rmi.h" | ||
} | ||
|
||
/** @class CperGenerator | ||
* @brief Implementation of CPER record creation | ||
*/ | ||
template <typename T> | ||
class CperGenerator | ||
{ | ||
protected: | ||
uint32_t boardId; | ||
uint8_t numOfCpu; | ||
uint64_t recordId; | ||
uint8_t progId; | ||
uint32_t familyId; | ||
int errCount; | ||
|
||
public: | ||
CperGenerator(uint8_t numOfCpu, uint8_t progId, uint32_t familyId, | ||
int errCount) : | ||
numOfCpu(numOfCpu), recordId(1), progId(progId), familyId(familyId), | ||
errCount(errCount) | ||
{} | ||
|
||
void dumpCperHeaderSection(const std::shared_ptr<T>& data, | ||
uint16_t sectionCount, uint32_t errorSeverity, | ||
std::string errorType); | ||
|
||
void calculateTimeStamp(const std::shared_ptr<T>& data); | ||
|
||
void dumpErrorDescriptorSection(const std::shared_ptr<T>&, uint16_t, | ||
std::string); | ||
|
||
void dumpProcessorErrorSection(const std::shared_ptr<FatalCperRecord>&, | ||
uint8_t, CpuId*); | ||
|
||
void cperFileWrite(const std::shared_ptr<T>&, std::string, uint16_t); | ||
|
||
void dumpContextInfo(const std::shared_ptr<FatalCperRecord>&, uint16_t, | ||
uint16_t, uint8_t, std::vector<uint8_t>, uint64_t*, | ||
uint32_t*, int64_t*); | ||
|
||
void getLastTransAddr(const std::shared_ptr<FatalCperRecord>&, uint8_t); | ||
|
||
void harvestDebugLogDump(const std::shared_ptr<FatalCperRecord>&, uint8_t, | ||
uint8_t, int64_t*, uint16_t&); | ||
|
||
std::string getCperFilename(int); | ||
|
||
void dumpProcErrorSection(const std::shared_ptr<T>&, uint8_t, | ||
struct ras_rt_valid_err_inst, uint8_t, uint16_t, | ||
uint32_t*, uint64_t*, int64_t, | ||
std::map<std::string, std::string>*, | ||
std::map<std::string, std::string>*); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
#include "config_manager.hpp" | ||
#include "ras.hpp" | ||
|
||
#include <boost/asio/io_service.hpp> | ||
#include <boost/asio/posix/stream_descriptor.hpp> | ||
#include <gpiod.hpp> | ||
#include <nlohmann/json.hpp> | ||
|
||
class RasManagerBase : public RasConfiguration | ||
{ | ||
public: | ||
RasManagerBase(sdbusplus::asio::object_server& objectServer, | ||
std::shared_ptr<sdbusplus::asio::connection>& systemBus, | ||
boost::asio::io_service& io) : | ||
RasConfiguration(objectServer, systemBus), io(io), | ||
p0_apmlAlertEvent(io), p1_apmlAlertEvent(io) | ||
{} | ||
|
||
virtual void init() = 0; | ||
|
||
virtual void configure() = 0; | ||
|
||
virtual ~RasManagerBase(); | ||
|
||
void p0AlertEventHandler(); | ||
void p1AlertEventHandler(); | ||
|
||
protected: | ||
boost::asio::io_service& io; | ||
uint8_t numOfCpu; | ||
CpuId* cpuId; | ||
uint32_t* uCode; | ||
uint64_t* ppin; | ||
std::string* inventoryPath; | ||
unsigned int boardId; | ||
uint8_t progId; | ||
int errCount = 0; | ||
std::shared_ptr<FatalCperRecord> rcd = NULL; | ||
boost::asio::posix::stream_descriptor p0_apmlAlertEvent; | ||
boost::asio::posix::stream_descriptor p1_apmlAlertEvent; | ||
gpiod::line p0_apmlAlertLine; | ||
gpiod::line p1_apmlAlertLine; | ||
|
||
void getNumberOfCpu(); | ||
void getBoardId(); | ||
void createIndexFile(); | ||
void createConfigFile(); | ||
void getMicrocodeRev(); | ||
void getPpinFuse(); | ||
template <typename T> | ||
T getProperty(sdbusplus::bus::bus&, const char*, const char*, const char*, | ||
const char*); | ||
void requestGPIOEvents(const std::string&, const std::function<void()>&, | ||
gpiod::line&, | ||
boost::asio::posix::stream_descriptor&); | ||
void rasRecoveryAction(uint8_t); | ||
void triggerColdReset(); | ||
void triggerRsmrstReset(); | ||
void triggerSysReset(); | ||
virtual void triggerWarmReset() = 0; | ||
void requestHostTransition(std::string); | ||
|
||
virtual void interfaceActiveMonitor() = 0; | ||
virtual void getCpuId() = 0; | ||
virtual void findProgramId() = 0; | ||
virtual void harvestFatalError(uint8_t) = 0; | ||
}; |
Oops, something went wrong.