



# ghosts in the machine check



{ domas / @xoreaxeaxeax / Black Hat 2025



(demo)





state disruption

- Interrupts and Exceptions



```
static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
{
    unsigned long uip_watchdog = jiffies, flags;
    unsigned char ctrl;

    while (rtc_is_updating() != 0 &&
           time_before(jiffies, uip_watchdog + 2*HZ/100))
        cpu_relax();

    ...
}
```

CPU



```
static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
{
    unsigned long uip_watchdog = jiffies, flags;
    unsigned char ctrl;

    while (rtc_is_updating() != 0 &&
           time_before(jiffies, uip_watchdog + 2*HZ/100))
        cpu_relax();

    ...
}
```

CPU

PCIe

```
static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
{
    unsigned long uip_watchdog = jiffies, flags;
    unsigned char ctrl;

    while (rtc_is_updating() != 0 &&
           time_before(jiffies, uip_watchdog + 2*HZ/100))
        cpu_relax();

    ...
}
```





```
static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
{
    unsigned long uip_watchdog = jiffies, flags;
    unsigned char ctrl;

    while (rtc_is_updating() != 0 &&
           time_before(jiffies, uip_watchdog + 2*HZ/100))
        cpu_relax();
    ...
}

static irqreturn_t dpc_irq(int irq, void *context)
{
    struct pci_dev *pdev = context;
    u16 cap = pdev->dpc_cap, status;

    pci_read_config_word(pdev, cap + ..., &status);
    ...
    pci_write_config_word(pdev, cap + ..., ...);
    ...
    return IRQ_HANDLED;
}
```

A red bracket on the right side of the code highlights the line "return IRQ\_HANDLED;" and points to a red downward-pointing arrow. This arrow originates from the text "! interrupt" located above the closing brace of the interrupt handler function.



```
static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
{
    unsigned long uip_watchdog = jiffies, flags;
    unsigned char ctrl;

    while (rtc_is_updating() != 0 &&
           time_before(jiffies, uip_watchdog + 2*HZ/100))
        cpu_relax();
    ...
}

static irqreturn_t dpc_irq(int irq, void *context)
{
    struct pci_dev *pdev = context;
    u16 cap = pdev->dpc_cap, status;

    pci_read_config_word(pdev, cap + ..., &status);
    ...
    pci_write_config_word(pdev, cap + ..., ...);
    ...
    return IRQ_HANDLED;
}
```

A red bracket on the right side of the code highlights the line "return IRQ\_HANDLED;" and the line above it. A red arrow points downwards from the word "interrupt" in the code to the "return IRQ\_HANDLED;" line, indicating that the interrupt handling function returns after executing its main logic.



```
static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
{
    unsigned long uip_watchdog = jiffies, flags;
    unsigned char ctrl;

    while (rtc_is_updating() != 0 &&
           time_before(jiffies, uip_watchdog + 2*HZ/100))
        cpu_relax();
    ...
}

static irqreturn_t dpc_irq(int irq, void *context)
{
    struct pci_dev *pdev = context;
    u16 cap = pdev->dpc_cap, status;

    pci_read_config_word(pdev, cap + ..., &status);
    ...
    pci_write_config_word(pdev, cap + ..., ...);
    ...
    return IRQ_HANDLED;
}
```

A red bracket on the right side of the code highlights the line "return IRQ\_HANDLED;" and points to a red downward-pointing arrow. This arrow originates from the text "! interrupt" located above the closing brace of the function definition.



```
static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
{
    unsigned long uip_watchdog = jiffies, flags;
    unsigned char ctrl;

    while (rtc_is_updating() != 0 &&
           time_before(jiffies, uip_watchdog + 2*HZ/100)) ←
        cpu_relax();
```

...

---

```
static irqreturn_t dpc_irq(int irq, void *context)
{
    struct pci_dev *pdev = context;
    u16 cap = pdev->dpc_cap, status;

    pci_read_config_word(pdev, cap + ..., &status);
    ...
    pci_write_config_word(pdev, cap + ..., ...);
    ...
    return IRQ_HANDLED;
```

- Interrupts and Exceptions
  - Trigger a handler
  - Handler must save/restore system state
    - Not always easy / practical / possible

state disruption

```
static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
{
    unsigned long uip_watchdog = jiffies, flags;
    unsigned char ctrl;

    while ([rtc_is_updating() != 0 &&
           time_before(jiffies, uip_watchdog + 2*HZ/100)])
        cpu_relax();

    rtc_tm->tm_sec = CMOS_READ RTC_SECONDS;
    rtc_tm->tm_min = CMOS_READ RTC_MINUTES;
    rtc_tm->tm_hour = CMOS_READ RTC_HOURS;
    rtc_tm->tm_mday = CMOS_READ RTC_DAY_OF_MONTH];
    rtc_tm->tm_mon = CMOS_READ RTC_MONTH];
    rtc_tm->tm_year = CMOS_READ RTC_YEAR];
    rtc_tm->tm_wday = CMOS_READ RTC_DAY_OF_WEEK];

    ctrl = CMOS_READ RTC_CONTROL];
}
```

```
static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
{
    unsigned long uip_watchdog = jiffies, flags;
    unsigned char ctrl;

    while ([rtc_is_updating() != 0 &&
           time_before(jiffies, uip_watchdog + 2*HZ/100)])
        cpu_relax();

    rtc_tm->tm_sec = CMOS_READ(RTC_SECONDS);
    rtc_tm->tm_min = CMOS_READ(RTC_MINUTES);
    rtc_tm->tm_hour = CMOS_READ(RTC_HOURS);
    rtc_tm->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH);
    rtc_tm->tm_mon = CMOS_READ(RTC_MONTH);
    rtc_tm->tm_year = CMOS_READ(RTC_YEAR);
    rtc_tm->tm_wday = CMOS_READ(RTC_DAY_OF_WEEK);

    ctrl = CMOS_READ(RTC_CONTROL);

    [outb( RTC_SECONDS, RTC_PORT(0));
     val = inb(RTC_PORT(1));]
```

```
static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
{
    unsigned long uip_watchdog = jiffies, flags;
    unsigned char ctrl;

    while ([rtc_is_updating() != 0 &&
           time_before(jiffies, uip_watchdog + 2*HZ/100)])
        cpu_relax();

    rtc_tm->tm_sec = CMOS_READ(RTC_SECONDS);
    rtc_tm->tm_min = CMOS_READ(RTC_MINUTES);
    rtc_tm->tm_hour = CMOS_READ(RTC_HOURS);
    rtc_tm->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH);
    rtc_tm->tm_mon = CMOS_READ(RTC_MONTH);
    rtc_tm->tm_year = CMOS_READ(RTC_YEAR);
    rtc_tm->tm_wday = CMOS_READ(RTC_DAY_OF_WEEK);

    ctrl = CMOS_READ(RTC_CONTROL);
}
```

outb(RTC\_SECONDS, RTC\_PORT(0));  
val = inb(RTC\_PORT(1));

!interrupt

```
static irqreturn_t rtc_interrupt(int irq, void *dev_id)
{
    ...
    outb(RTC_YEAR, RTC_PORT(0));
    ...
}
```

```
static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
{
    unsigned long uip_watchdog = jiffies, flags;
    unsigned char ctrl;

    while ([rtc_is_updating() != 0 &&
           time_before(jiffies, uip_watchdog + 2*HZ/100)])
        cpu_relax();

    rtc_tm->tm_sec = CMOS_READ(RTC_SECONDS);
    rtc_tm->tm_min = CMOS_READ(RTC_MINUTES);
    rtc_tm->tm_hour = CMOS_READ(RTC_HOURS);
    rtc_tm->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH);
    rtc_tm->tm_mon = CMOS_READ(RTC_MONTH);
    rtc_tm->tm_year = CMOS_READ(RTC_YEAR);
    rtc_tm->tm_wday = CMOS_READ(RTC_DAY_OF_WEEK);

    ctrl = CMOS_READ(RTC_CONTROL);
}
```

```
[   outb(RTC_SECONDS, RTC_PORT(0));
    val = inb(RTC_PORT(1));
    ...
    static irqreturn_t rtc_interrupt(int irq, void *dev_id)
    {
        ...
        outb(RTC_YEAR, RTC_PORT(0));
        ...
    }
}
```

- Some things shouldn't be interrupted
  - Privilege and mode transitions
  - Secure environments
  - Interrupt handlers, page-table updates, critical sections, etc.
- Solution: interrupt suppression
  - Keep interrupts pending temporarily, then service in new environment

state disruption

```
static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
{
    unsigned long uip_watchdog = jiffies, flags;
    unsigned char ctrl;

    while ([rtc_is_updating() != 0 &&
           time_before(jiffies, uip_watchdog + 2*HZ/100)])
        cpu_relax();

    spin_lock_irqsave(&rtc_lock, flags);
    rtc_tm->tm_sec = CMOS_READ RTC_SECONDS;
    rtc_tm->tm_min = CMOS_READ RTC_MINUTES;
    rtc_tm->tm_hour = CMOS_READ RTC_HOURS;
    rtc_tm->tm_mday = CMOS_READ RTC_DAY_OF_MONTH;
    rtc_tm->tm_mon = CMOS_READ RTC_MONTH;
    rtc_tm->tm_year = CMOS_READ RTC_YEAR;
    rtc_tm->tm_wday = CMOS_READ RTC_DAY_OF_WEEK;

    ctrl = CMOS_READ RTC_CONTROL;
    spin_unlock_irqrestore(&rtc_lock, flags);
}
```

- Transition code / secure environments must *carefully* accommodate unsuppressed interrupts/exceptions
- As long as everything is written perfectly, all the time, for every fringe case, there are no issues

state disruption



state disruption

- Difficulties arise

- Wojtczuk (2012) – (userland to kernel)  
Interrupts/exceptions in syscall handler on untrusted stack

```
diff -r 340062faf298 -r ad87903fdca1 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S      Wed May 23 11:06:49 2012 +0100
+++ b/xen/arch/x86/x86_64/entry.S      Thu May 24 11:02:35 2012 +0100
@@ -40,6 +40,13 @@ restore_all_guest:
        testw $TRAP_syscall,4(%rsp)
        jz  iret_exit_to_guest

+       /* Don't use SYSRET path if the return address is not canonical. */
+       movq 8(%rsp),%rcx
+       sarq $47,%rcx
+       incl %ecx
+       cmpl $1,%ecx
+       ja   .Lforce_iret
+
+       addq $8,%rsp
+       popq %rcx          # RIP
+       popq %r11          # CS
@@ -50,6 +57,10 @@ restore_all_guest:
        sysretq
1:   sysrelt

+.Lforce_iret:
+   /* Mimic SYSRET behavior. */
+   movq 8(%rsp),%rcx      # RIP
+   movq 24(%rsp),%r11     # RFLAGS
     ALIGN
/* No special register assumptions. */
iret_exit_to_guest:
```

- ~ Peterson/Mulasmajic (2018) – [userland to kernel]  
pop ss/mov ss Vulnerability

```
KiBreakpointTrap proc
sub rsp, 8
push rbp
sub rsp, 158h
lea rbp, [rsp+80h]
mov [rbp+TrapInfo.ExceptionActive], 1
mov [rbp+TrapInfo._Rax], rax
mov [rbp+TrapInfo._Rcx], rcx
mov [rbp+TrapInfo._Rdx], rdx
mov [rbp+TrapInfo._R8], r8
mov [rbp+TrapInfo._R9], r9
mov [rbp+TrapInfo._R10], r10
mov [rbp+TrapInfo._R11], r11
test byte ptr [rbp+TrapInfo.SegCs], 1
jz short ExecutingInKernelModeContext
swapgs
mov r10, gs:_KPCR.Prcb.CurrentThread
test [r10+_KTHREAD.Header.DebugActive], 80h
jz short DebugIsActive
mov ecx, 0C0000102h
rdmsr
```

GPZ (2023) – (hypervisor to TEE)

Induce exception to compromise TDX SEAMLDR

```
lgdt FWORD PTR [rcx].SEAMLDR_COM64_DATA.OriginalGdtr  
mov rbx, QWORD PTR [rcx].SEAMLDR_COM64_DATA.ResumeRip  
mov r8, QWORD PTR [rcx].SEAMLDR_COM64_DATA.OriginalCR3  
mov r9, QWORD PTR [rcx].SEAMLDR_COM64_DATA.RetVal  
mov rdx, 0  
mov rax, EXITAC  
push 2  
popfq  
mov rcx, 0  
...  
GETSEC[EXITAC]
```

- Schluter et al. (2024) – (hypervisor to TEE)  
Inject malicious interrupts to break confidential VMs

%% Example: Leak secret

```
mov eax , 4 % write syscall number  
mov ebx ... % move shared memory fd  
mov ecx, [ebp - 4] % buf  
mov edx, 8 % count
```

... ; << malicious interrupt injection from hypervisor



- Interrupts are a sort of state disruptor

state disruption

- Solution: *heavy* interrupt suppression
  - Software
    - Interrupt flag (e.g. “cli”)
    - Task priority register
  - Microcode
    - Clear interrupt flag on entry to ISR
    - Mask NMI until “iret” to prevent nested NMIs
    - INIT/SIPI
    - Enclaves
  - Hardware
    - Mask interrupt lines at Programmable Interrupt Controller
    - C-states
    - Disable generation from various peripherals
  - e.g. clear IF, clear TF, clear DR7, latch NMI, latch SMI, mask INIT, clear DEBUGCTL, etc.

state disruption



state disruption

- Can we break through this?



- One interrupt that generally cannot be delayed, suppressed, latched, etc.:

the Machine Check Exception (MCE)

state disruption

- Unpredictable hardware failures
  - Memory corruptions
  - Cache errors
  - TLB failures
  - etc.
- Caused by aging devices, thermal limits, signal integrity, static electricity, heat, high energy particles, etc.
- CPU detects and generates #MC exception
- #MC transfers control to 18<sup>th</sup> interrupt handler in Interrupt Descriptor Table (IDT), installed by OS

# machine check exceptions



machine check exceptions



machine check exceptions



machine check exceptions

*“The CATERR# indicates that the system  
has experienced a catastrophic error  
and cannot continue to operate”*

CATERR#



# machine check exceptions



machine check exceptions



machine check exceptions



# machine check exceptions



machine check exceptions



machine check exceptions



machine check exceptions

*“The CATERR# indicates that the system has experienced a catastrophic error and cannot continue to operate”*



# machine check exceptions



machine check exceptions



# machine check exceptions



# machine check exceptions

- Hardware failure happened
- Machine check generated by CPU
- OS has control
- What should handler do?

machine check exceptions



"MCE can be **delivered** at any time"

“Machine check exceptions can trigger all the time, even in a critical section when all normal interrupts are disabled.”



"MCE can be delivered at any time"



“Machine check exceptions can trigger all the time, even in a critical section when all normal interrupts are disabled.”

Default return value:  
Action required, the error must be handled immediately.

“MCE can be delivered at any time”

A dark gray background featuring a faint, stylized circuit board pattern with various lines, nodes, and hexagonal shapes.

“Machine check exceptions can trigger all the time, even in a critical section when all normal interrupts are disabled.”

Default return value:

Action required, the error must be handled immediately.

"It is also important to handle the machine check quickly (because the machine may be already unstable after an hardware failure). When the handling is delayed to bring the kernel into a easier to handle state first there is a risk that the event cannot be handled at all."

"MCE can be delivered at any time"



“Machine check exceptions can trigger all the time, even in a critical section when all normal interrupts are disabled.”

"An uncorrectable error will cause a machine panic"

Default return value:  
Action required, the error must be handled immediately.

"It is also important to handle the machine check quickly (because the machine may be already unstable after an hardware failure). When the handling is delayed to bring the kernel into a easier to handle state first there is a risk that the event cannot be handled at all."

"MCE can be delivered at any time"



“Machine check exceptions can trigger all the time, even in a critical section when all normal interrupts are disabled.”

"An uncorrectable error will cause a machine panic"

Default return value:  
Action required, the error must be handled immediately.

"By default the kernel will always panic on a MC in the kernel to avoid this deadlock.  
The rationale is that a panic can be handled better than a deadlock..."

"It is also important to handle the machine check quickly (because the machine may be already unstable after an hardware failure). When the handling is delayed to bring the kernel into a easier to handle state first there is a risk that the event cannot be handled at all."

"MCE can be delivered at any time"

“Machine check exceptions can trigger all the time, even in a critical section when all normal interrupts are disabled.”

"An uncorrectable error will cause a machine panic"

"By default the kernel will always panic on a MC in the kernel to avoid this deadlock.  
The rationale is that a panic can be handled better than a deadlock..."

"It is also important to handle the machine check quickly (because the machine may be already unstable after an hardware failure). When the handling is delayed to bring the kernel into a easier to handle state first there is a risk that the event cannot be handled at all."

“It is a bad idea to continue when an uncorrectable error occurs – it is indeterminate what was uncorrected and the operating system context might be so mangled that continuing will lead to further corruption.”

Default return value:  
Action required, the error must be handled immediately.

"MCE can be delivered at any time"

“Machine check exceptions can trigger all the time, even in a critical section when all normal interrupts are disabled.”

"An uncorrectable error will cause a machine panic"

"By default the kernel will always panic on a MC in the kernel to avoid this deadlock.  
The rationale is that a panic can be handled better than a deadlock..."

"It is also important to handle the machine check quickly [because the machine may be already unstable after an hardware failure]. When the handling is delayed to bring the kernel into a easier to handle state first there is a risk that the event cannot be handled at all."

“It is a bad idea to continue when an uncorrectable error occurs – it is indeterminate what was uncorrected and the operating system context might be so mangled that continuing will lead to further corruption.”

Default return value:  
Action required, the error must be handled immediately.

`no_way_out = worst >= MCE_PANIC_SEVERITY;`

"MCE can be delivered at any time"

“Machine check exceptions can trigger all the time, even in a critical section when all normal interrupts are disabled.”

"An uncorrectable error will cause a machine panic"

“no idea what we were executing when the machine check hit.”

"By default the kernel will always panic on a MC in the kernel to avoid this deadlock.  
The rationale is that a panic can be handled better than a deadlock..."

"It is also important to handle the machine check quickly [because the machine may be already unstable after an hardware failure]. When the handling is delayed to bring the kernel into a easier to handle state first there is a risk that the event cannot be handled at all."

“It is a bad idea to continue when an uncorrectable error occurs – it is indeterminate what was uncorrected and the operating system context might be so mangled that continuing will lead to further corruption.”

Default return value:  
Action required, the error must be handled immediately.

`no_way_out = worst >= MCE_PANIC_SEVERITY;`

"MCE can be delivered at any time"

“Machine check exceptions can trigger all the time, even in a critical section when all normal interrupts are disabled.”

"An uncorrectable error will cause a machine panic"

“no idea what we were executing when the machine check hit.”

"By default the kernel will always panic on a MC in the kernel to avoid this deadlock.  
The rationale is that a panic can be handled better than a deadlock..."

"It is also important to handle the machine check quickly [because the machine may be already unstable after an hardware failure]. When the handling is delayed to bring the kernel into a easier to handle state first there is a risk that the event cannot be handled at all."

“It is a bad idea to continue when an uncorrectable error occurs – it is indeterminate what was uncorrected and the operating system context might be so mangled that continuing will lead to further corruption.”

Default return value:  
Action required, the error must be handled immediately.

no chance to recover -> PANIC

`no_way_out = worst >= MCE_PANIC_SEVERITY;`

"MCE can be delivered at any time"

“Machine check exceptions can trigger all the time, even in a critical section when all normal interrupts are disabled.”

"An uncorrectable error will cause a machine panic"

“no idea what we were executing when the machine check hit.”

"By default the kernel will always panic on a MC in the kernel to avoid this deadlock.  
The rationale is that a panic can be handled better than a deadlock..."

/\* Must die if the interrupt is not recoverable \*/

"It is also important to handle the machine check quickly [because the machine may be already unstable after an hardware failure]. When the handling is delayed to bring the kernel into a easier to handle state first there is a risk that the event cannot be handled at all."

“It is a bad idea to continue when an uncorrectable error occurs – it is indeterminate what was uncorrected and the operating system context might be so mangled that continuing will lead to further corruption.”

Default return value:  
Action required, the error must be handled immediately.

no chance to recover -> PANIC

no\_way\_out = worst >= MCE\_PANIC\_SEVERITY;

"MCE can be delivered at any time"

“Machine check exceptions can trigger all the time, even in a critical section when all normal interrupts are disabled.”

"An uncorrectable error will cause a machine panic"

“no idea what we were executing when the machine check hit.”

"By default the kernel will always panic on a MC in the kernel to avoid this deadlock.  
The rationale is that a panic can be handled better than a deadlock..."

/\* Must die if the interrupt is not recoverable \*/

"It is also important to handle the machine check quickly [because the machine may be already unstable after an hardware failure]. When the handling is delayed to bring the kernel into a easier to handle state first there is a risk that the event cannot be handled at all."

“It is a bad idea to continue when an uncorrectable error occurs – it is indeterminate what was uncorrected and the operating system context might be so mangled that continuing will lead to further corruption.”

Processor Context Corrupt, no need to fumble too much, die!

Default return value:  
Action required, the error must be handled immediately.

no chance to recover -> PANIC

no\_way\_out = worst >= MCE\_PANIC\_SEVERITY;

"MCE can be delivered at any time"

# machine check exceptions

- MCE handler
  - Determine source of error
  - Print a message if possible
  - Shut down before things get worse

# machine check exceptions

- MCEs are unique
  - Demand immediacy
  - Represent an unexpected, critical hardware failure
  - Cannot be masked, delayed, deprioritized, or preempted

- Single way to avoid handling MCEs
  - Disable in CR4 register
  - If MCE is received while disabled in CR4, CPU resets
- CPU options are:
  - Handling MCEs immediately
  - Or be reset when one is received

machine check exceptions

- Solution: ~~heavy interrupt suppression~~
- MCEs hit the CPU unexpectedly,  
break through all other interrupt defenses

machine check exceptions



Let's build a hammer...



# generating MCEs

- Challenge
  - MCEs are exceedingly rare,  
sporadic, unpredictable hardware failures



generating MCEs

(demo)

Every 0.1s: cat /proc/interrupts | grep --color=always -e "^" -e "MCE.\*" ubuntu-usb-3: Sat Aug 2 21:03:46 2025

|      | CPU0    | CPU1   | CPU2   | CPU3   |                                     |              |
|------|---------|--------|--------|--------|-------------------------------------|--------------|
| 0:   | 48      | 0      | 0      | 0      | IO-APIC                             | 2-edge       |
| 8:   | 1       | 0      | 0      | 0      | IO-APIC                             | 8-edge       |
| 9:   | 0       | 0      | 0      | 0      | IO-APIC                             | 9-fasteoi    |
| 16:  | 0       | 446    | 0      | 0      | IO-APIC                             | 16-fasteoi   |
| 18:  | 0       | 0      | 0      | 536049 | IO-APIC                             | 18-fasteoi   |
| 25:  | 0       | 0      | 0      | 0      | PCI-MSI                             | 34816-edge   |
| 27:  | 0       | 0      | 0      | 0      | PCI-MSI                             | 36864-edge   |
| 29:  | 0       | 0      | 0      | 0      | PCI-MSI                             | 38912-edge   |
| 31:  | 0       | 0      | 0      | 0      | PCI-MSI                             | 40960-edge   |
| 32:  | 0       | 0      | 0      | 0      | PCI-MSI                             | 43008-edge   |
| 33:  | 0       | 220    | 1365   | 0      | PCI-MSI                             | 278528-edge  |
| 34:  | 0       | 0      | 31     | 0      | PCI-MSI                             | 262144-edge  |
| 35:  | 0       | 0      | 0      | 0      | PCI-MSI                             | 262145-edge  |
| 37:  | 0       | 0      | 0      | 0      | PCI-MSI                             | 262146-edge  |
| 38:  | 0       | 0      | 0      | 0      | PCI-MSI                             | 262147-edge  |
| 39:  | 0       | 0      | 0      | 0      | PCI-MSI                             | 262148-edge  |
| 40:  | 0       | 0      | 0      | 140590 | PCI-MSI                             | 2097152-edge |
| 42:  | 0       | 0      | 0      | 0      | PCI-MSI                             | 131073-edge  |
| 44:  | 54      | 0      | 0      | 0      | PCI-MSI                             | 18432-edge   |
| 46:  | 0       | 0      | 6      | 0      | PCI-MSI                             | 16384-edge   |
| NMI: | 18      | 16     | 10     | 12     | Non-maskable interrupts             |              |
| LOC: | 1328403 | 524713 | 484269 | 884136 | Local timer interrupts              |              |
| SPU: | 0       | 0      | 0      | 0      | Spurious interrupts                 |              |
| PMI: | 18      | 16     | 10     | 12     | Performance monitoring interrupts   |              |
| IWI: | 1167887 | 597638 | 473827 | 553122 | IRQ work interrupts                 |              |
| RTR: | 0       | 0      | 0      | 0      | APIC ICR read retries               |              |
| RES: | 6574    | 6263   | 6254   | 5074   | Rescheduling interrupts             |              |
| CAL: | 228571  | 472783 | 380810 | 324352 | Function call interrupts            |              |
| TLB: | 282     | 361    | 370    | 345    | TLB shootdowns                      |              |
| TRM: | 0       | 0      | 0      | 0      | Thermal event interrupts            |              |
| THR: | 0       | 0      | 0      | 0      | Threshold APIC interrupts           |              |
| DFR: | 0       | 0      | 0      | 0      | Deferred Error APIC interrupts      |              |
| MCE: | 0       | 0      | 0      | 0      | <b>Machine check exceptions</b>     |              |
| MCP: | 422     | 422    | 422    | 422    | Machine check polls                 |              |
| ERR: | 0       |        |        |        |                                     |              |
| MIS: | 0       |        |        |        |                                     |              |
| PIN: | 0       | 0      | 0      | 0      | Posted-interrupt notification event |              |
| NPI: | 0       | 0      | 0      | 0      | Nested posted-interrupt event       |              |
| PIW: | 0       | 0      | 0      | 0      | Posted-interrupt wakeup event       |              |



# generating MCEs

- Could we generate these on-demand?
  - Simulations won't work, need real, physical MCEs

- Machine check registers arranged in banks
- Different banks devoted to different sources
  - Changes across generations
  - LS, IF, L2, DE, EX, FP, L3, CS, PIE,  
UMC, PB, PSP, SMU, MP5, NBIO, PCIE, etc.
- Many, *many* options for MCE sources

generating MCEs





A dark gray background featuring a faint, stylized circuit board pattern with various lines, dots, and small circles.

```
static const char * const f15h_mc1_mce_desc[] = {
    "UC during a demand linefill from L2",
    "Parity error during data load from IC",
    "Parity error for IC valid bit",
    "Main tag parity error",
    "Parity error in prediction queue",
    "PFB data/address parity error",
    "Parity error in the branch status reg",
    "PFB promotion address error",
    "Tag error during probe/victimization",
    "Parity error for IC probe tag valid bit",
    "PFB non-cacheable bit parity error",
    "PFB valid bit parity error",           /* xec = 0xd */
    "Microcode Patch Buffer",             /* xec = 010 */
    "uop queue",
    "insn buffer",
    "decode buffer",
    "fetch address FIFO",
    "dispatch uop queue"
};
```

# generating MCEs

A dark gray background featuring a faint, stylized circuit board pattern with various lines, dots, and small circles representing components and connections.

```
static const char * const f15h_mc2_mce_desc[] = {
    "Fill ECC error on data fills",                                /* xec = 0x4 */
    "Fill parity error on insn fills",
    "Prefetcher request FIFO parity error",
    "PRQ address parity error",
    "PRQ data parity error",
    "WCC Tag ECC error",
    "WCC Data ECC error",
    "WCB Data parity error",
    "VB Data ECC or parity error",
    "L2 Tag ECC error",                                            /* xec = 0x10 */
    "Hard L2 Tag ECC error",
    "Multiple hits on L2 tag",
    "XAB parity error",
    "PRB address parity error"
};
```

# generating MCEs

A dark gray background featuring a faint, stylized circuit board pattern with various lines, dots, and small circles.

```
static const char * const mc4_mce_desc[] = {
    "DRAM ECC error detected on the NB",
    "CRC error detected on HT link",
    "Link-defined sync error packets detected on HT link",
    "HT Master abort",
    "HT Target abort",
    "Invalid GART PTE entry during GART table walk",
    "Unsupported atomic RMW received from an IO link",
    "Watchdog timeout due to lack of progress",
    "DRAM ECC error detected on the NB",
    "SVM DMA Exclusion Vector error",
    "HT data error detected on link",
    "Protocol error (link, L3, probe filter)",
    "NB internal arrays parity error",
    "DRAM addr/ctl signals parity error",
    "IO link transmission error",
    "L3 data cache ECC error", /* xec = 0x1c */
    "L3 cache tag error",
    "L3 LRU parity bits error",
    "ECC Error in the Probe Filter directory"
};
```

# generating MCEs

A dark gray background featuring a faint, stylized circuit board pattern with various lines, dots, and small circles.

```
static const char * const mc5_mce_desc[] = {
    "CPU Watchdog timer expire",
    "Wakeup array dest tag",
    "AG payload array",
    "EX payload array",
    "IDRF array",
    "Retire dispatch queue",
    "Mapper checkpoint array",
    "Physical register file EX0 port",
    "Physical register file EX1 port",
    "Physical register file AG0 port",
    "Physical register file AG1 port",
    "Flag register file",
    "DE error occurred",
    "Retire status queue"
};
```

# generating MCEs

# generating MCEs

```
static const char * const mc6_mce_desc[] = {  
    "Hardware Assertion",  
    "Free List",  
    "Physical Register File",  
    "Retire Queue",  
    "Scheduler table",  
    "Status Register File",  
};
```



- Need to start somewhere.
- Some platforms reserve MC4 register bank for logging errors from the northbridge
- NB seems more configurable than others
  - vs. DC, IC, BU, FR, etc.
- Start there, expand later
- Details vary across generations

# generating MCEs

**Table 232: MC4 Error Descriptions**

| Error Type            | Description                                                                                                                                                                                                                                                                                                                                                                                                                              | CTL <sup>1</sup>           | ETG <sup>2</sup> | EAC <sup>4</sup> |
|-----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------|------------------|------------------|
| RMW Error             | An atomic read-modify-write (RMW) command was received from an IO link. Atomic RMW commands are not supported. An atomic RMW command results in a link error response being generated back to the requesting IO device. The generation of the link error response is not affected by the control bit.                                                                                                                                    | AtomicRMWEn                | L D              |                  |
| WDT Error             | NB WDT timeout due to lack of progress. The NB WDT monitors transaction completions. A transaction that exceeds the programmed time limit reports errors via the MCA. The cause of error may be another node or device which failed to respond.                                                                                                                                                                                          | WDTRptEn                   | L D              |                  |
| DRAM ECC Error        | A DRAM ECC error detected.                                                                                                                                                                                                                                                                                                                                                                                                               | CECCEn, UECCEn             | D D              |                  |
| DRAM CRC Error        | A DRAM CRC error was detected.                                                                                                                                                                                                                                                                                                                                                                                                           | CCRCEn, UCRCEn             | D D              |                  |
| Link Data Error       | Data error detected on link.<br>If enabled for reporting and the request is sourced from a core, then PCC is set. (If not enabled for reporting, PCC is not set. If configured to allow an error response to be returned to the core, this could allow error containment to a scope smaller than the entire system.)                                                                                                                     | McaUsPwDatErrEn, CpktdatEn | L D              |                  |
| Protocol Error        | Protocol error detected by link. These errors are distinguished from each other by the value in <a href="#">MSR0000_0412[ErrAddr]</a> . See <a href="#">Table 236</a> .<br><br>For protocol errors, the system cannot continue operation. Protocol errors can be caused by other subcomponents than the one reporting the error. For diagnosis, collect and examine MCA registers from other banks, cores, and processors in the system. | NbIntProtEn                | L <sup>3</sup> D |                  |
| NB Array Error        | A parity error was detected in the NB internal arrays.                                                                                                                                                                                                                                                                                                                                                                                   | NbArrayParEn               | - D              |                  |
| L2 complex Data Error | NB received a data error from a core and this error could not be contained. For the cause of the data error, examine the core MCA registers for deferred errors. This error may occur for the following types of data writes: <ul style="list-style-type: none"><li>• APIC</li><li>• Configuration space (IO and MMIO)</li></ul> For these errors, sync flood will occur if <a href="#">D18F3x180[SyncFloodOnCpuLeakErr]</a> is set.     | McaCpuDatErrEn             | - D              |                  |

1. CTL: See [MSR0000\\_0410](#).

2. ETG: error threshold group. See [2.16.1.7 \[Error Thresholding\]](#).

- L=Link.
- D=DRAM.

3. The error thresholding group is Link if link protocol error; none for non-link protocol error.

4. EAC: D=Error action taken if detected. E=Error action taken if MCA bank enabled.

- Datasheets suggest MCEs can be generated from Master Abort signals arriving from NB

**Table 232: MC4 Error Descriptions**

| Error Type   | Description                                                                                                                                                                                                                                           | CTL <sup>1</sup> | ETG <sup>2</sup> | EAC <sup>4</sup> |
|--------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------|------------------|------------------|
| Sync Error   | Link-defined sync error packets detected on link. The NB floods its outgoing links with sync packets after detecting a sync packet on an incoming link independent of the state of the control bits.                                                  | SyncPktEn        | L                | D                |
| Master Abort | Master abort seen as result of link operation. Reasons for this error include requests to non-existent addresses. The NB returns an error response back to the requestor with any associated data all 1s independent of the state of the control bit. | MstrAbortEn      | L                | D                |
| Target Abort | Target abort seen as result of link operation. The NB returns an error response back to the requestor with any associated data all 1s independent of the state of the control bit.                                                                    | TgtAbortEn       | L                | D                |

52740\_16h\_Models\_30h-3Fh\_BKDG.pdf

generating MCEs

- Master abort
  - Device initiating PCI request terminates transaction because target device failed to respond
  - Something we can control
- Easy! Access a non-existent PCI device:  
`sudo setpci -A linux-sysfs -s 0:1f.0 O.L`
- Nothing.

generating MCEs

- But datasheets suggest there is *some* way for a master abort to cause an MCE
- Dive into the northbridge configuration

generating MCEs

• Bits of interest in B/D/F 0/18/3:

|                                             |                                         |                                          |
|---------------------------------------------|-----------------------------------------|------------------------------------------|
| 0:18.3 0x180[3]:<br>ChgDatErrToTgtAbort     | 0:18.3 0x40[9]:<br>TgtAbortEn           | 0:18.3 0x44[28]:<br>DisTgtAbortCpuErrRsp |
| 0:18.3 0x180[5]:<br>DisPciCfgCpuMstAbortRsp | 0:18.3 0x44[1]:<br>CpuRdDatErrEn        | 0:18.3 0x44[2]:<br>SyncFloodOnDramUcEcc  |
| 0:18.3 0x180[6]:<br>SyncFloodOnDatErr       | 0:18.3 0x44[20]:<br>SyncFloodOnWDT      | 0:18.3 0x44[3]:<br>SyncPktGenDis         |
| 0:18.3 0x180[7]:<br>SyncFloodOnTgtAbortErr  | 0:18.3 0x44[21]:<br>SyncFloodOnAnyUcErr | 0:18.3 0x44[4]:<br>SyncPktPropDis        |
| 0:18.3 0x40[12]:<br>WDTRptEn                | 0:18.3 0x44[24]:<br>IoRdDatErrEn        | 0:18.3 0x44[5]:<br>IoMstAbortDis         |
| 0:18.3 0x40[31]:<br>McaCpuDatErrEn          | 0:18.3 0x44[25]:<br>DisPciCfgCpuErrRsp  | 0:18.3 0x44[6]:<br>CpuErrDis             |
| 0:18.3 0x40[5]:<br>SyncPktEn                | 0:18.3 0x44[26]:<br>FlagMcaCorrErr      | 0:18.3 0x44[7]:<br>IoErrDis              |
| 0:18.3 0x40[8]:<br>MstrAbortEn              | 0:18.3 0x44[27]:<br>NbMcaToMstCpuEn     | 0:18.3 0x44[8]:<br>WDTDIs                |

generating MCEs

- No single bit gives the desired behavior
- Many configurations crash or hang
- Too many permutations, not enough information

generating MCEs

- A northbridge fuzzer
  - Specify the bits of interest (~24 plausible config bits identified)
  - Randomly flip a bit
  - If crash/hang:
    - Power cycle and try again
  - Access a non-existent device in the PCI space
  - Check MCA status registers for MCE logged
    - If platform resets,  
MCA status registers are sticky, check on boot
  - Repeat

generating MCEs



generating MCEs

(demo)

deltaop@ubuntu-usb-3:~/\_research\$

```
[ 1.237854] usb 1-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0
[ 1.239084] hub 1-1:1.0: USB hub found
[ 1.239306] hub 1-1:1.0: 4 ports detected
[ 1.253738] usb 2-1: New USB device found, idVendor=0438, idProduct=7900, bcdDevice= 0.18
[ 1.253884] usb 2-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0
[ 1.255088] hub 2-1:1.0: USB hub found
[ 1.255332] hub 2-1:1.0: 4 ports detected
[ 1.286843] Freeing initrd memory: 85444K
[ 1.302307] Segment Routing with IPv6
[ 1.302384] In-situ OAM (IOAM) with IPv6
[ 1.302474] NET: Registered PF_PACKET protocol family
[ 1.302615] Key type dns_resolver registered
[ 1.302656] x86/pm: family 0x16 cpu detected, MSR saving is needed during suspending.
[ 1.303316] microcode: CPU0: patch_level=0x07030105
[ 1.303383] microcode: CPU1: patch_level=0x07030105
[ 1.303436] microcode: CPU2: patch_level=0x07030105
[ 1.304778] microcode: CPU3: patch_level=0x07030105
[ 1.306110] microcode: Microcode Update Driver: v2.2.
[ 1.306121] IPI shorthand broadcast: enabled
[ 1.308865] mce: [Hardware Error]: Machine check events logged
[ 1.308960] registered taskstats version 1
[ 1.310145] mce: [Hardware Error]: CPU 1: Machine Check: 0 Bank 0: b600000000000083b
[ 1.312692] mce: [Hardware Error]: TSC 0 ADDR fdfc000cf0
[ 1.313970] mce: [Hardware Error]: PROCESSOR 2:730f01 TIME 1753409480 SOCKET 0 APIC 1 microcode 7030105
[ 1.315468] Loading compiled-in X.509 certificates
[ 1.318427] Loaded X.509 cert 'Build time autogenerated kernel key: d5862910adca7ee16194da1e1a805db529424367'
[ 1.321297] Loaded X.509 cert 'Canonical Ltd. Live Patch Signing: 14df34d1a87cf37625abec039ef2bf521249b969'
[ 1.323917] Loaded X.509 cert 'Canonical Ltd. Kernel Module Signing: 88f752e560a1e0737e31163a466ad7b70a850c19'
[ 1.325293] blacklist: Loading compiled-in revocation X.509 certificates
[ 1.326727] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing: 61482aa2830d0ab2ad5af10b7250da9033ddcef0'
[ 1.328228] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing (2017): 242ade75ac4a15e50d50c84b0d45ff3eae707a03'
[ 1.329770] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing (ESM 2018): 365188c1d374d6b07c3c8f240f8ef722433d6a8b'
[ 1.331339] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing (2019): c0746fd6c5da3ae827864651ad66ae47fe24b3e8'
[ 1.332919] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing (2021 v1): a8d54bbb3825cfb94fa13c9f8a594a195c107b8d'
[ 1.334496] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing (2021 v2): 4cf046892d6fd3c9a5b03f98d845f90851dc6a8c'
[ 1.336069] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing (2021 v3): 100437bb6de6e469b581e61cd66bce3ef4ed53af'
[ 1.337638] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing (Ubuntu Core 2019): c1d57b8f6b743f23ee41f4f7ee292f06eecadfb9'
[ 1.341052] zswap: loaded using pool lzo/zbud
[ 1.343135] Key type .fscrypt registered
[ 1.344773] Key type fscrypt-provisioning registered
[ 1.346446] Key type trusted registered
[ 1.354527] Key type encrypted registered
```

--- MC0 (load-store) ---

|   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 6 | 5 | 5 | 4 | 4 | 4 | 3 | 3 | 2 | 2 | 2 | 1 | 1 | 0 | 0 | 0 |
| 0 | 6 | 2 | 8 | 4 | 0 | 6 | 2 | 8 | 4 | 0 | 6 | 2 | 8 | 4 | 0 |

[core 0]

|                        |             |   |      |
|------------------------|-------------|---|------|
| mc0_ctl (00000400):    | xxxxxxxx xx | ( | fec) |
| mc0_status (00000401): |             | ( | 0)   |
| mc0_addr (00000402):   |             | ( | 0)   |
| mc0_misc (00000403):   |             | ( | 0)   |
| mc0_mask (c0010044):   |             | ( | 0)   |

[core 1]

|                        |                 |                    |              |
|------------------------|-----------------|--------------------|--------------|
| mc0_ctl (00000400):    | xxxxxxxx xx     | (                  | fec)         |
| mc0_status (00000401): | x xx xx         | (b60000000000083b) |              |
| mc0_addr (00000402):   | xxxxxx xxxxxxxx | (                  | fdfc000cfcc) |
| mc0_misc (00000403):   |                 | (                  | 0)           |
| mc0_mask (c0010044):   |                 | (                  | 0)           |

[core 2]

|                        |             |   |      |
|------------------------|-------------|---|------|
| mc0_ctl (00000400):    | xxxxxxxx xx | ( | fec) |
| mc0_status (00000401): |             | ( | 0)   |
| mc0_addr (00000402):   |             | ( | 0)   |
| mc0_misc (00000403):   |             | ( | 0)   |
| mc0_mask (c0010044):   |             | ( | 0)   |

[core 3]

|                        |             |   |      |
|------------------------|-------------|---|------|
| mc0_ctl (00000400):    | xxxxxxxx xx | ( | fec) |
| mc0_status (00000401): |             | ( | 0)   |
| mc0_addr (00000402):   |             | ( | 0)   |
| mc0_misc (00000403):   |             | ( | 0)   |
| mc0_mask (c0010044):   |             | ( | 0)   |

- Found a 2-bit northbridge combination that works
- But MCE delivered to core that generates the abort
- Not useful for core to target itself with an MCE
  - Can only interrupt our own code this way
- We have a hammer, but we can only hit ourselves
- Need ability for one core to target *different* core

generating cross-core MCEs



Let's build a hammer...  
Let's add a handle...

- Modify fuzzer
  - Search for more complex bit configurations
  - Generate PCI abort on from one core,  
check MCEs on others

generating cross-core MCEs

```
[ 1.2335] usb 1-1: New USB device found, vendor 0188, product 7900, bcdDevice 0.10
[ 1.234111] usb 1-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0
[ 1.235420] hub 1-1:1.0: USB hub found
[ 1.236579] hub 1-1:1.0: 4 ports detected
[ 1.245835] usb 2-1: New USB device found, idVendor=0438, idProduct=7900, bcdDevice= 0.18
[ 1.245977] usb 2-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0
[ 1.247335] hub 2-1:1.0: USB hub found
[ 1.248317] hub 2-1:1.0: 4 ports detected
[ 1.279300] Freeing initrd memory: 85444K
[ 1.293227] Segment Routing with IPv6
[ 1.293304] In-situ OAM (IOAM) with IPv6
[ 1.293388] NET: Registered PF_PACKET protocol family
[ 1.293539] Key type dns_resolver registered
[ 1.293580] x86/pm: family 0x16 cpu detected, MSR saving is needed during suspending.
[ 1.294209] microcode: CPU0: patch_level=0x07030105
[ 1.294261] microcode: CPU1: patch_level=0x07030105
[ 1.294314] microcode: CPU2: patch_level=0x07030105
[ 1.295677] microcode: CPU3: patch_level=0x07030105
[ 1.297001] microcode: Microcode Update Driver: v2.2.
[ 1.297012] IPI shorthand broadcast: enabled
[ 1.299767] mce: [Hardware Error]: Machine check events logged
[ 1.299871] registered taskstats version 1
[ 1.300976] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: b70000110003081b
[ 1.303337] mce: [Hardware Error]: TSC 0 ADDR fd0c000cf0
[ 1.304506] mce: [Hardware Error]: PROCESSOR 2:730f01 TIME 1753410695 SOCKET 0 APIC 0 microcode 7030105
[ 1.305953] Loading compiled-in X.509 certificates
[ 1.308733] Loaded X.509 cert 'Build time autogenerated kernel key: d5862910adca7ee16194da1e1a805db529424367'
[ 1.311337] Loaded X.509 cert 'Canonical Ltd. Live Patch Signing: 14df34d1a87cf37625abec039ef2bf521249b969'
[ 1.313937] Loaded X.509 cert 'Canonical Ltd. Kernel Module Signing: 88f752e560a1e0737e31163a466ad7b70a850c19'
[ 1.315301] blacklist: Loading compiled-in revocation X.509 certificates
[ 1.316739] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing: 61482aa2830d0ab2ad5af10b7250da9033ddcef0'
[ 1.318252] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing (2017): 242ade75ac4a15e50d50c84b0d45ff3eae707a03'
[ 1.319787] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing (ESM 2018): 365188c1d374d6b07c3c8f240f8ef722433d6a8b'
[ 1.321366] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing (2019): c0746fd6c5da3ae827864651ad66ae47fe24b3e8'
[ 1.322941] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing (2021 v1): a8d54bbb3825cfb94fa13c9f8a594a195c107b8d'
[ 1.324520] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing (2021 v2): 4cf046892d6fd3c9a5b03f98d845f90851dc6a8c'
[ 1.326093] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing (2021 v3): 100437bb6de6e469b581e61cd66bce3ef4ed53af'
[ 1.327659] Loaded X.509 cert 'Canonical Ltd. Secure Boot Signing (Ubuntu Core 2019): c1d57b8f6b743f23ee41f4f7ee292f06eecadfb9'
[ 1.331069] zswap: loaded using pool lzo/zbud
[ 1.333134] Key type .fsckrypt registered
[ 1.334719] Key type fsckrypt-provisioning registered
[ 1.336350] Key type trusted registered
[ 1.344522] Key type encrypted registered
[ 1.346149] AppArmor: AppArmor sha1 policy hashing enabled
[ 1.351789] integrity: Loading X.509 certificate: UEFI:db
```

--- MC4 (northbridge) ---

|   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 6 | 5 | 5 | 4 | 4 | 4 | 3 | 3 | 2 | 2 | 2 | 1 | 1 | 0 | 0 | 0 |
| 0 | 6 | 2 | 8 | 4 | 0 | 6 | 2 | 8 | 4 | 0 | 6 | 2 | 8 | 4 | 0 |

[core 0]

|                        |                                        |                                    |
|------------------------|----------------------------------------|------------------------------------|
| mc4_ctl (00000410):    | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ( ffffffff)                        |
| mc4_status (00000411): | x xx xxx                               | x x xx xx xx xx (b70000110003081b) |
| mc4_addr (00000412):   | xxxxxx                                 | xxxxxx xx (fdfc000cfcc)            |
| mc4_misc0 (00000413):  | xx xx x                                | x (c01a000001000000)               |
| mc4_misc1 (c0000408):  | x                                      | x (10000001000000)                 |
| mc4_misc2 (c0000409):  |                                        | ( 0)                               |
| mc4_mask (c0010048):   |                                        | ( 40000000)                        |

[core 1]

|                        |   |            |
|------------------------|---|------------|
| mc4_ctl (00000410):    |   | ( 0)       |
| mc4_status (00000411): |   | ( 0)       |
| mc4_addr (00000412):   |   | ( 0)       |
| mc4_misc0 (00000413):  | x | ( 1000000) |
| mc4_misc1 (c0000408):  | x | ( 1000000) |
| mc4_misc2 (c0000409):  |   | ( 0)       |
| mc4_mask (c0010048):   |   | ( 0)       |

[core 2]

|                        |   |            |
|------------------------|---|------------|
| mc4_ctl (00000410):    |   | ( 0)       |
| mc4_status (00000411): |   | ( 0)       |
| mc4_addr (00000412):   |   | ( 0)       |
| mc4_misc0 (00000413):  | x | ( 1000000) |
| mc4_misc1 (c0000408):  | x | ( 1000000) |
| mc4_misc2 (c0000409):  |   | ( 0)       |
| mc4_mask (c0010048):   |   | ( 0)       |

[core 3]

|                        |   |            |
|------------------------|---|------------|
| mc4_ctl (00000410):    |   | ( 0)       |
| mc4_status (00000411): |   | ( 0)       |
| mc4_addr (00000412):   |   | ( 0)       |
| mc4_misc0 (00000413):  | x | ( 1000000) |
| mc4_misc1 (c0000408):  | x | ( 1000000) |
| mc4_misc2 (c0000409):  |   | ( 0)       |
| mc4_mask (c0010048):   |   | ( 0)       |

- Found 3-bit northbridge configuration where non-core-0 generates PCI abort, delivered as MCE on core-0
- Issue: platform still resets

generating cross-core MCEs



Let's build a hammer...

Let's add a handle...

Let's ... be a bit more careful

- Interrupting core-0 from core-1  
is not useful if the platform immediately resets
- Operating system is responsible for MCE handling
- Hijack CPU interrupt table  
to install first-pass MCE handler

staying alive





```
→ push %rax  
push %rdi  
push %rsi  
push %rdx  
push %rcx  
push %r8  
push %r9  
push %r10  
push %r11  
mov $0x3,%rdi  
mov $0xfe,%rsi  
mov 0x48[%rsp],%rdx  
mov 0x50[%rsp],%r10  
callq 1180 <os_handler>  
pop %r11  
pop %r10  
pop %r9  
pop %r8  
pop %rcx  
pop %rdx  
pop %rsi  
pop %rdi  
pop %rax  
iretq
```



- Modified handler indiscriminately clears any logged MCE from the MCA banks before handing control to OS handler
  - OS won't reset the platform if it can't see what caused the MCE
  - Dangerous, but good enough

staying alive



(demo)

staying alive

|             | CPU0     | CPU1     | CPU2     | CPU3     |                                     |              |
|-------------|----------|----------|----------|----------|-------------------------------------|--------------|
| 0:          | 48       | 0        | 0        | 0        | IO-APIC                             | 2-edge       |
| 8:          | 1        | 0        | 0        | 0        | IO-APIC                             | 8-edge       |
| 9:          | 0        | 0        | 0        | 0        | IO-APIC                             | 9-fasteoi    |
| 16:         | 0        | 422      | 0        | 0        | IO-APIC                             | 16-fasteoi   |
| 18:         | 0        | 0        | 0        | 33318    | IO-APIC                             | 18-fasteoi   |
| 25:         | 0        | 0        | 0        | 0        | PCI-MSI                             | 34816-edge   |
| 27:         | 0        | 0        | 0        | 0        | PCI-MSI                             | 36864-edge   |
| 29:         | 0        | 0        | 0        | 0        | PCI-MSI                             | 38912-edge   |
| 31:         | 0        | 0        | 0        | 0        | PCI-MSI                             | 40960-edge   |
| 32:         | 0        | 0        | 0        | 0        | PCI-MSI                             | 43008-edge   |
| 33:         | 0        | 327      | 10       | 0        | PCI-MSI                             | 278528-edge  |
| 34:         | 0        | 0        | 32       | 0        | PCI-MSI                             | 262144-edge  |
| 35:         | 0        | 0        | 0        | 0        | PCI-MSI                             | 262145-edge  |
| 36:         | 0        | 0        | 0        | 0        | PCI-MSI                             | 262146-edge  |
| 37:         | 0        | 0        | 0        | 0        | PCI-MSI                             | 262147-edge  |
| 38:         | 0        | 0        | 0        | 0        | PCI-MSI                             | 262148-edge  |
| 40:         | 0        | 0        | 0        | 993      | PCI-MSI                             | 2097152-edge |
| 42:         | 0        | 0        | 0        | 0        | PCI-MSI                             | 131073-edge  |
| 44:         | 54       | 0        | 0        | 0        | PCI-MSI                             | 18432-edge   |
| 46:         | 0        | 0        | 6        | 0        | PCI-MSI                             | 16384-edge   |
| NMI:        | 0        | 0        | 0        | 0        | Non-maskable interrupts             |              |
| LOC:        | 15297    | 14171    | 15864    | 18674    | Local timer interrupts              |              |
| SPU:        | 0        | 0        | 0        | 0        | Spurious interrupts                 |              |
| PMI:        | 0        | 0        | 0        | 0        | Performance monitoring interrupts   |              |
| IWI:        | 3944     | 3774     | 3989     | 3742     | IRQ work interrupts                 |              |
| RTR:        | 0        | 0        | 0        | 0        | APIC ICR read retries               |              |
| RES:        | 1177     | 1419     | 1500     | 1178     | Rescheduling interrupts             |              |
| CAL:        | 26427    | 19112    | 21529    | 15558    | Function call interrupts            |              |
| TLB:        | 208      | 146      | 152      | 179      | TLB shootdowns                      |              |
| TRM:        | 0        | 0        | 0        | 0        | Thermal event interrupts            |              |
| THR:        | 0        | 0        | 0        | 0        | Threshold APIC interrupts           |              |
| DFR:        | 0        | 0        | 0        | 0        | Deferred Error APIC interrupts      |              |
| <b>MCE:</b> | <b>0</b> | <b>0</b> | <b>0</b> | <b>0</b> | <b>Machine check exceptions</b>     |              |
| MCP:        | 1        | 1        | 1        | 1        | Machine check polls                 |              |
| ERR:        | 0        |          |          |          |                                     |              |
| MIS:        | 0        |          |          |          |                                     |              |
| PIN:        | 0        | 0        | 0        | 0        | Posted-interrupt notification event |              |
| NPI:        | 0        | 0        | 0        | 0        | Nested posted-interrupt event       |              |
| PIW:        | 0        | 0        | 0        | 0        | Posted-interrupt wakeup event       |              |

deltaop@ubuntu-usb-3:~/\_research\$ []

- A state disruptor tool
- On-demand generation of *hardware* MCEs entirely from *software*
- Moving forward
  - We'll use the NB approach for MCE generation
  - Configuration specifics will vary by platform
  - But barring this, *many* unexplored ways to generate MCEs

a state disruptor tool



selecting a target

- What to target?



Let's build a hammer...

Let's add a handle...

Let's be a bit more careful...

Let's find a nail...

- Current MCE approach uses ring-0 for northbridge reconfiguration
- Select targets more privileged than ring-0
  - (We'll revisit this requirement)
  - Game not over at ring-0 – gets much, much deeper
- Possible options:
  - Hypervisors, secure guests, enclaves, secure loader, etc.
- System Management Mode is an appealing target

selecting a target

- 35 years old
- Invisible to operating system, hypervisor, etc.
- Can preempt operating system, hypervisor, etc.
- Ring -2
- Critical to platform security, server RAS, client miscellanea
- Firmware R/W access in many configurations

system management mode





































(demo)

system management mode

deltaop@ubuntu-usb-3:~/\_research\$

- Compromising ring -2
  - SMM code running in SMRAM
  - Corrupt or hijack normal control flow to execute malicious payload
  - Unlock SMRAM

system management mode

- CPU modes must share resources with differently privileged modes
  - CPU must reset processor context between modes
  - Not feasible to reset *entire* processor context
  - Architects carefully select which state to change
- Done correctly, event from less privileged mode should not impact more privileged mode

state sanitization

- SMM transition on AMD processors
- Sanitize relevant CPU registers
- Suppress interrupts
  - NMI masked
  - INIT ignored
  - SMI masked
  - Maskable interrupts via IF in eflags
  - Debug interrupts/exception via DR7
  - Traps via TF in eflags



A large arrow points from the 'Sanitize relevant CPU registers' bullet point to the right side of the slide, where a table of register sanitization values is located.

|        |                         |
|--------|-------------------------|
| CS     | 0000                    |
| DS     | 0000                    |
| ES     | 0000                    |
| FS     | 0000                    |
| GS     | 0000                    |
| SS     | 0000                    |
| GPRs   | Unmodified              |
| EFLAGS | 0000_0002               |
| RIP    | 0000_0000_0000_8000     |
| CR0    | PE, EM, TS, PG cleared  |
| CR4    | 0000_0000_0000_0000     |
| GDTR   | Unmodified              |
| LDTR   | Unmodified              |
| IDTR   | Unmodified              |
| TR     | Unmodified              |
| DR6    | Unmodified              |
| DR7    | 0000_0000_0000_0400     |
| EFER   | All cleared except SVME |

state sanitization

- SMM transition on AMD processors
- Sanitize relevant CPU registers
- Suppress interrupts
  - NMI masked
  - INIT ignored
  - SMI masked
  - Maskable interrupts via IF in eflags
  - Debug interrupts/exception via DR7
  - Traps via TF in eflags



A large arrow points from the 'Sanitize relevant CPU registers' bullet point towards the table.

|        |                         |
|--------|-------------------------|
| CS     | 0000                    |
| DS     | 0000                    |
| ES     | 0000                    |
| FS     | 0000                    |
| GS     | 0000                    |
| SS     | 0000                    |
| GPRs   | Unmodified              |
| EFLAGS | 0000_0002               |
| RIP    | 0000_0000_0000_8000     |
| CR0    | PE, EM, TS, PG cleared  |
| CR4    | 0000_0000_0000_0000     |
| GDTR   | Unmodified              |
| LDTR   | Unmodified              |
| IDTR   | Unmodified              |
| TR     | Unmodified              |
| DR6    | Unmodified              |
| DR7    | 0000_0000_0000_0400     |
| EFER   | All cleared except SVME |

state sanitization

- IDTR points to the Interrupt Descriptor Table (IDT)
- IDTR unmodified on entry to SMM
- Any interrupt or exception that *does* occur in SMM will be delivered on an untrusted handler
- Basically: “ try { main() } except { pop\_shell() } ”
- Many ways to approach this
  - If *anything* goes wrong, it leads to privilege escalation
- One option: induce machine check on the untrusted IDT

state sanitization

- Challenge: CR4 is *cleared* by microcode
  - MCE handling disabled (CPU resets on MCE)

state sanitization

|        |                         |
|--------|-------------------------|
| CS     | 0000                    |
| DS     | 0000                    |
| ES     | 0000                    |
| FS     | 0000                    |
| GS     | 0000                    |
| SS     | 0000                    |
| GPRs   | Unmodified              |
| EFLAGS | 0000_0002               |
| RIP    | 0000_0000_0000_8000     |
| CRO    | PE, EM, TS, PG cleared  |
| CR4    | 0000_0000_0000_0000     |
| GDTR   | Unmodified              |
| LDTR   | Unmodified              |
| IDTR   | Unmodified              |
| TR     | Unmodified              |
| DR6    | Unmodified              |
| DR7    | 0000_0000_0000_0400     |
| EFER   | All cleared except SVME |

**BITS 16**

ASM\_PFX[gcSmHandlerTemplate]:  
\_SmiEntryPoint:  
mov bx, \_GdtDesc-\_SmiEntryPoint+0x8000  
mov ax,[cs:DSC\_OFFSET+DSC\_GDTDSIZ]  
dec ax  
mov [cs:bx], ax  
mov eax, [cs:DSC\_OFFSET+DSC\_GDTPTR]  
mov [cs:bx+2], eax  
o32 lgdt [cs:bx]  
mov ax, PROTECT\_MODE\_CS  
mov [cs:bx-0x2],ax  
mov edi, strict dword 0  
ASM\_PFX[gPatchSmbase]:  
lea eax, [edi+[@ProtectedMode-\_SmiEntryPoint]+0x8000]  
mov [cs:bx-0x6],eax  
mov ebx, cr0  
and ebx, 0x9fffff3  
or ebx, 0x23  
mov cr0, ebx  
jmp dword 0x0:0x0  
\_GdtDesc:  
DW 0  
DD 0

**BITS 32**

@ProtectedMode:  
mov ax, PROTECT\_MODE\_DS  
016 mov ds, ax  
016 mov es, ax  
016 mov fs, ax  
016 mov gs, ax  
016 mov ss, ax  
mov esp, strict dword 0  
ASM\_PFX[gPatchSmiStack]:  
jmp ProtFlatMode

**BITS 64**

ProtFlatMode:  
mov eax, strict dword 0  
ASM\_PFX[gPatchSmiCr3]:  
mov cr3, rax  
mov eax, 0x668  
mov cl, strict byte 0  
ASM\_PFX[gPatch5LevelPagingNeeded]:  
cmp cl, 0  
je SkipEnable5LevelPaging  
bts eax, 12  
SkipEnable5LevelPaging:  
mov cr4, rax  
sub esp, 8  
sgdt [rsp]  
mov eax, [rsp + 2]  
add esp, 8  
mov dl, 0x89  
mov [rax+TSS\_SEGMENT+5], dl  
mov eax, TSS\_SEGMENT  
ltr ax  
mov al, strict byte 1  
ASM\_PFX[gPatchXdSupported]:  
cmp al, 0  
jz @SkipXd  
mov al, strict byte 1  
ASM\_PFX[gPatchMsrla32MiscEnable...]:  
cmp al, 1  
jz Msrla32MiscEnableSupported

sub esp, 4  
xor rdx, rdx  
push rdx  
jmp EnableNxe  
Msrla32MiscEnableSupported:  
mov ecx, MSR\_IA32\_MISC\_ENABLE  
rdmsr  
sub esp, 4  
push rdx  
test edx, BIT2  
jz EnableNxe  
and dx, 0xFFFFB  
wrmsr  
EnableNxe:  
mov ecx, MSR\_EFER  
rdmsr  
or ax, MSR\_EFER\_XD  
wrmsr  
jmp @XdDone  
@SkipXd:  
sub esp, 8  
@XdDone:  
push LONG\_MODE\_CS  
call Base  
Base:  
add dword [rsp], @LongMode-Base  
mov ecx, MSR\_EFER  
rdmsr  
or ah, 1  
wrmsr  
mov rbx, cr0  
or ebx, 0x80010023  
mov cr0, rbx  
retf

@LongMode:  
mov rax, strict qword 0  
SmiHandlerIdtrAbsAddr:  
lidt [rax]  
lea ebx, [rdi+DSC\_OFFSET]  
mov ax, [rbx+DSC\_DS]  
mov ds, eax  
mov ax, [rbx+DSC\_OTHERSEG]  
mov es, eax  
mov fs, eax  
mov gs, eax  
mov ax, [rbx+DSC\_SS]  
mov ss, eax  
mov rbx, [rsp+0x8]  
...  
mov rcx, rbx  
mov rax, strict qword 0  
SmiRendezvousAbsAddr:  
call rax  
...  
rsm

**BITS 16**

ASM\_PFX[gcSmihandlerTemplate]:  
\_SmiEntryPoint:  
mov bx, \_GdtDesc-\_SmiEntryPoint+0x8000  
mov ax,[cs:DSC\_OFFSET+DSC\_GDTsiz]  
dec ax  
mov [cs:bx], ax  
mov eax, [cs:DSC\_OFFSET+DSC\_GDTPTR]  
mov [cs:bx+2], eax  
o32 lgdt [cs:bx]  
mov ax, PROTECT\_MODE\_CS  
mov [cs:bx-0x2],ax  
mov edi, strict dword 0  
ASM\_PFX[gPatchSmbase]:  
lea eax, [edi+[@ProtectedMode-\_SmiEntryPoint]+0x8000]  
mov [cs:bx-0x6],eax  
mov ebx, cr0  
and ebx, 0x9fffff3  
or ebx, 0x23  
mov cr0, ebx  
jmp dword 0x0:0x0  
\_GdtDesc:  
DW 0  
DD 0

**BITS 32**

@ProtectedMode:  
mov ax, PROTECT\_MODE\_DS  
o16 mov ds, ax  
o16 mov es, ax  
o16 mov fs, ax  
o16 mov gs, ax  
o16 mov ss, ax  
mov esp, strict dword 0  
ASM\_PFX[gPatchSmistack]:  
jmp ProtFlatMode

**BITS 64**

ProtFlatMode:  
mov eax, strict dword 0  
ASM\_PFX[gPatchSmicr3]:  
mov cr3, rax  
mov eax, 0x668  
mov cl, strict byte 0  
ASM\_PFX[gPatch5LevelPagingNeeded]:  
cmp cl, 0  
je SkipEnable5LevelPaging  
bts eax, 12  
SkipEnable5LevelPaging:  
mov cr4, rax  
sub esp, 8  
sgdt [rsp]  
mov eax, [rsp + 2]  
add esp, 8  
mov dl, 0x89  
mov [rax+TSS\_SEGMENT+5], dl  
mov eax, TSS\_SEGMENT  
ltr ax  
mov al, strict byte 1  
ASM\_PFX[gPatchXdSupported]:  
cmp al, 0  
jz @SkipXd  
mov al, strict byte 1  
ASM\_PFX[gPatchMsrla32MiscEnable...]:  
cmp al, 1  
jz Msrla32MiscEnableSupported  
sub esp, 4  
xor rdx, rdx  
push rdx  
jmp EnableNxe  
Msrla32MiscEnableSupported:  
mov ecx, MSR\_IA32\_MISC\_ENABLE  
rdmsr  
sub esp, 4  
push rdx  
test edx, BIT2  
jz EnableNxe  
and dx, 0xFFFFB  
wrmsr  
EnableNxe:  
mov ecx, MSR\_EFER  
rdmsr  
or ax, MSR\_EFER\_XD  
wrmsr  
jmp @XdDone  
@SkipXd:  
sub esp, 8  
@XdDone:  
push LONG\_MODE\_CS  
call Base  
Base:  
add dword [rsp], @LongMode-Base  
mov ecx, MSR\_EFER  
rdmsr  
or ah, 1  
wrmsr  
mov rbx, cr0  
or ebx, 0x80010023  
mov cr0, rbx  
retf

@LongMode:  
mov rax, strict qword 0  
SmiHandlerIdtrAbsAddr:  
lidt [rax]  
lea ebx, [rdi+DSC\_OFFSET]  
mov ax, [rbx+DSC\_DS]  
mov ds, eax  
mov ax, [rbx+DSC\_OTHERSEG]  
mov es, eax  
mov fs, eax  
mov gs, eax  
mov ax, [rbx+DSC\_SS]  
mov ss, eax  
mov rbx, [rsp+0x8]  
...  
mov rcx, rbx  
mov rax, strict qword 0  
SmiRendezvousAbsAddr:  
call rax  
...  
rsm

```
BITS 16
ASM_PFX[gcSmihandlerTemplate]:
_SmiEntryPoint:
mov bx, _GdtDesc-_SmiEntryPoint+0x8000
mov ax,[cs:DSC_OFFSET+DSC_GDTDSIZ]
dec ax
mov [cs:bx], ax
mov eax, [cs:DSC_OFFSET+DSC_GDTPTR]
mov [cs:bx+2], eax
o32 lgdt [cs:bx]
mov ax, PROTECT_MODE_CS
mov [cs:bx-0x2],ax
mov edi, strict dword 0
ASM_PFX[gPatchSmbase]:
lea eax, [edi+[@ProtectedMode-
_SmiEntryPoint]+0x8000]
mov [cs:bx-0x6],eax
mov ebx, cr0
and ebx, 0x9fffff3
or ebx, 0x23
mov cr0, ebx
jmp dword 0x0:0x0
_GdtDesc:
DW 0
DD 0
```

```
BITS 32
@ProtectedMode:
mov ax, PROTECT_MODE_DS
o16 mov ds, ax
o16 mov es, ax
o16 mov fs, ax
o16 mov gs, ax
o16 mov ss, ax
mov esp, strict dword 0
ASM_PFX[gPatchSmistack]:
jmp ProtFlatMode
```

```
BITS 64
ProtFlatMode:
mov eax, strict dword 0
ASM_PFX[gPatchSmicr3]:
mov cr3, rax
mov eax, 0x668
mov cl, strict byte 0
ASM_PFX[gPatch5LevelPagingNeeded]:
cmp cl, 0
je SkipEnable5LevelPaging
bts eax, 12
SkipEnable5LevelPaging:
mov cr4, rax
sub esp, 8
sgdt [rsp]
mov eax, [rsp + 2]
add esp, 8
mov dl, 0x89
mov [rax+TSS_SEGMENT+5], dl
mov eax, TSS_SEGMENT
ltr ax
mov al, strict byte 1
ASM_PFX[gPatchXdSupported]:
cmp al, 0
jz @SkipXd
mov al, strict byte 1
ASM_PFX[gPatchMsrla32MiscEnable...]:
cmp al, 1
jz Msrla32MiscEnableSupported
```

```
sub esp, 4
xor rdx, rdx
push rdx
jmp EnableNxe
Msrla32MiscEnableSupported:
mov ecx, MSR_IA32_MISC_ENABLE
rdmsr
sub esp, 4
push rdx
test edx, BIT2
jz EnableNxe
and dx, 0xFFFFB
wrmsr
EnableNxe:
mov ecx, MSR_EFER
rdmsr
or ax, MSR_EFER_XD
wrmsr
jmp @XdDone
@SkipXd:
sub esp, 8
@XdDone:
push LONG_MODE_CS
call Base
Base:
add dword [rsp], @LongMode-Base
mov ecx, MSR_EFER
rdmsr
or ah, 1
wrmsr
mov rbx, cr0
or ebx, 0x80010023
mov cr0, rbx
retf
```

```
@LongMode:
mov rax, strict qword 0
SmiHandlerIdtrAbsAddr:
lidt [rax]
lea ebx, [rdi+DSC_OFFSET]
mov ax, [rbx+DSC_DS]
mov ds, eax
mov ax, [rbx+DSC_OTHERSEG]
mov es, eax
mov fs, eax
mov gs, eax
mov ax, [rbx+DSC_SS]
mov ss, eax
mov rbx, [rsp+0x8]
...
mov rcx, rbx
mov rax, strict qword 0
SmiRendezvousAbsAddr:
call rax
...
rsm
```

**BITS 16**

ASM\_PFX[gcSmHandlerTemplate]:  
\_SmiEntryPoint:  
mov bx, \_GdtDesc-\_SmiEntryPoint+0x8000  
mov ax,[cs:DSC\_OFFSET+DSC\_GDTDSIZ]  
dec ax  
mov [cs:bx], ax  
mov eax, [cs:DSC\_OFFSET+DSC\_GDTPTR]  
mov [cs:bx+2], eax  
032 lgdt [cs:bx]  
mov ax, PROTECT\_MODE\_CS  
mov [cs:bx-0x2],ax  
mov edi, strict dword 0  
ASM\_PFX[gPatchSmbase]:  
lea eax, [edi+[@ProtectedMode-\_SmiEntryPoint]+0x8000]  
mov [cs:bx-0x6],eax  
mov ebx, cr0  
and ebx, 0x9fffff3  
or ebx, 0x23  
mov cr0, ebx  
jmp dword 0x0:0x0  
\_GdtDesc:  
DW 0  
DD 0

**BITS 32**

@ProtectedMode:  
mov ax, PROTECT\_MODE\_DS  
016 mov ds, ax  
016 mov es, ax  
016 mov fs, ax  
016 mov gs, ax  
016 mov ss, ax  
mov esp, strict dword 0  
ASM\_PFX[gPatchSmiStack]:  
jmp ProtFlatMode

**BITS 64**

ProtFlatMode:  
mov eax, strict dword 0  
ASM\_PFX[gPatchSmicr3]:  
mov cr3, rax  
mov eax, 0x668  
mov cl, strict byte 0  
ASM\_PFX[gPatch5LevelPagingNeeded]:  
cmp cl, 0  
je SkipEnable5LevelPaging  
bts eax, 12  
SkipEnable5LevelPaging:  
mov cr4, rax  
sub esp, 8  
sgdt [rsp]  
mov eax, [rsp + 2]  
add esp, 8  
mov dl, 0x89  
mov [rax+TSS\_SEGMENT+5], dl  
mov eax, TSS\_SEGMENT  
ltr ax  
mov al, strict byte 1  
ASM\_PFX[gPatchXdSupported]:  
cmp al, 0  
jz @SkipXd  
mov al, strict byte 1  
ASM\_PFX[gPatchMsrla32MiscEnable...]:  
cmp al, 1  
jz Msrla32MiscEnableSupported

sub esp, 4  
xor rdx, rdx  
push rdx  
jmp EnableNxe  
Msrla32MiscEnableSupported:  
mov ecx, MSR\_IA32\_MISC\_ENABLE  
rdmsr  
sub esp, 4  
push rdx  
test edx, BIT2  
jz EnableNxe  
and dx, 0xFFFFB  
wrmsr  
EnableNxe:  
mov ecx, MSR\_EFER  
rdmsr  
or ax, MSR\_EFER\_XD  
wrmsr  
jmp @XdDone  
@SkipXd:  
sub esp, 8  
@XdDone:  
push LONG\_MODE\_CS  
call Base  
Base:  
add dword [rsp], @LongMode-Base  
mov ecx, MSR\_EFER  
rdmsr  
or ah, 1  
wrmsr  
mov rbx, cr0  
or ebx, 0x80010023  
mov cr0, rbx  
retf

@LongMode:  
mov rax, strict qword 0  
SmiHandlerIdtrAbsAddr:  
lidt [rax]  
lea ebx, [rdi+DSC\_OFFSET]  
mov ax, [rbx+DSC\_DS]  
mov ds, eax  
mov ax, [rbx+DSC\_OTHERSEG]  
mov es, eax  
mov fs, eax  
mov gs, eax  
mov ax, [rbx+DSC\_SS]  
mov ss, eax  
mov rbx, [rsp+0x8]  
...  
mov rcx, rbx  
mov rax, strict qword 0  
SmiRendezvousAbsAddr:  
call rax  
...  
rsm

**BITS 16**

ASM\_PFX[gcSmiHandlerTemplate]:  
\_SmiEntryPoint:  
mov bx, \_GdtDesc-\_SmiEntryPoint+0x8000  
mov ax,[cs:DSC\_OFFSET+DSC\_GDTDSIZ]  
dec ax  
mov [cs:bx], ax  
mov eax, [cs:DSC\_OFFSET+DSC\_GDTPTR]  
mov [cs:bx+2], eax  
o32 lgdt [cs:bx]  
mov ax, PROTECT\_MODE\_CS  
mov [cs:bx-0x2],ax  
mov edi, strict dword 0  
ASM\_PFX[gPatchSmbase]:  
lea eax, [edi+[@ProtectedMode-\_SmiEntryPoint]+0x8000]  
mov [cs:bx-0x6],eax  
mov ebx, cr0  
and ebx, 0x9fffff3  
or ebx, 0x23  
mov cr0, ebx  
jmp dword 0x0:0x0  
\_GdtDesc:  
DW 0  
DD 0

**BITS 32**

@ProtectedMode:  
mov ax, PROTECT\_MODE\_DS  
o16 mov ds, ax  
o16 mov es, ax  
o16 mov fs, ax  
o16 mov gs, ax  
o16 mov ss, ax  
mov esp, strict dword 0  
ASM\_PFX[gPatchSmiStack]:  
jmp ProtFlatMode

**BITS 64**

ProtFlatMode:  
mov eax, strict dword 0  
ASM\_PFX[gPatchSmiCr3]:  
mov cr3, rax  
mov eax, 0x668  
mov cl, strict byte 0  
ASM\_PFX[gPatch5LevelPagingNeeded]:  
cmp cl, 0  
je SkipEnable5LevelPaging  
bts eax, 12  
SkipEnable5LevelPaging:  
mov cr4, rax  
sub esp, 8  
sgdt [rsp]  
mov eax, [rsp + 2]  
add esp, 8  
mov dl, 0x89  
mov [rax+TSS\_SEGMENT+5], dl  
mov eax, TSS\_SEGMENT  
ltr ax  
mov al, strict byte 1  
ASM\_PFX[gPatchXdSupported]:  
cmp al, 0  
jz @SkipXd  
mov al, strict byte 1  
ASM\_PFX[gPatchMsrla32MiscEnable...]:  
cmp al, 1  
jz Msrla32MiscEnableSupported

sub esp, 4  
xor rdx, rdx  
push rdx  
jmp EnableNxe  
Msrla32MiscEnableSupported:  
mov ecx, MSR\_IA32\_MISC\_ENABLE  
rdmsr  
sub esp, 4  
push rdx  
test edx, BIT2  
jz EnableNxe  
and dx, 0xFFFFB  
wrmsr  
EnableNxe:  
mov ecx, MSR\_EFER  
rdmsr  
or ax, MSR\_EFER\_XD  
wrmsr  
jmp @XdDone  
@SkipXd:  
sub esp, 8  
@XdDone:  
push LONG\_MODE\_CS  
call Base  
Base:  
add dword [rsp], @LongMode-Base  
mov ecx, MSR\_EFER  
rdmsr  
or ah, 1  
wrmsr  
mov rbx, cr0  
or ebx, 0x80010023  
mov cr0, rbx  
retf

@LongMode:  
mov rax, strict qword 0  
SmiHandlerIdtrAbsAddr:  
lidt [rax]  
lea ebx, [rdi+DSC\_OFFSET]  
mov ax, [rbx+DSC\_DS]  
mov ds, eax  
mov ax, [rbx+DSC\_OTHERSEG]  
mov es, eax  
mov fs, eax  
mov gs, eax  
mov ax, [rbx+DSC\_SS]  
mov ss, eax  
mov rbx, [rsp+0x8]  
...  
mov rcx, rbx  
mov rax, strict qword 0  
SmiRendezvousAbsAddr:  
call rax  
...  
rsm

**BITS 16**

ASM\_PFX[gcSmiHandlerTemplate]:  
\_SmiEntryPoint:  
mov bx, \_GdtDesc-\_SmiEntryPoint+0x8000  
mov ax,[cs:DSC\_OFFSET+DSC\_GDTDSIZ]  
dec ax  
mov [cs:bx], ax  
mov eax, [cs:DSC\_OFFSET+DSC\_GDTPTR]  
mov [cs:bx+2], eax  
o32 lgdt [cs:bx]  
mov ax, PROTECT\_MODE\_CS  
mov [cs:bx-0x2],ax  
mov edi, strict dword 0  
ASM\_PFX[gPatchSmbase]:  
lea eax, [edi+[@ProtectedMode-\_SmiEntryPoint]+0x8000]  
mov [cs:bx-0x6],eax  
mov ebx, cr0  
and ebx, 0x9fffff3  
or ebx, 0x23  
mov cr0, ebx  
jmp dword 0x0:0x0  
\_GdtDesc:  
DW 0  
DD 0

**BITS 32**

@ProtectedMode:  
mov ax, PROTECT\_MODE\_DS  
o16 mov ds, ax  
o16 mov es, ax  
o16 mov fs, ax  
o16 mov gs, ax  
o16 mov ss, ax  
mov esp, strict dword 0  
ASM\_PFX[gPatchSmiStack]:  
jmp ProtFlatMode

**BITS 64**

ProtFlatMode:  
mov eax, strict dword 0  
ASM\_PFX[gPatchSmiCr3]:  
mov cr3, rax  
mov eax, 0x668  
mov cl, strict byte 0  
ASM\_PFX[gPatch5LevelPagingNeeded]:  
cmp cl, 0  
je SkipEnable5LevelPaging  
bts eax, 12  
SkipEnable5LevelPaging:  
mov cr4, rax  
sub esp, 8  
sgdt [rsp]  
mov eax, [rsp + 2]  
add esp, 8  
mov dl, 0x89  
mov [rax+TSS\_SEGMENT+5], dl  
mov eax, TSS\_SEGMENT  
ltr ax  
mov al, strict byte 1  
ASM\_PFX[gPatchXdSupported]:  
cmp al, 0  
jz @SkipXd  
mov al, strict byte 1  
ASM\_PFX[gPatchMsrla32MiscEnable...]:  
cmp al, 1  
jz Msrla32MiscEnableSupported

sub esp, 4  
xor rdx, rdx  
push rdx  
jmp EnableNxe  
Msrla32MiscEnableSupported:  
mov ecx, MSR\_IA32\_MISC\_ENABLE  
rdmsr  
sub esp, 4  
push rdx  
test edx, BIT2  
jz EnableNxe  
and dx, 0xFFFFB  
wrmsr  
EnableNxe:  
mov ecx, MSR\_EFER  
rdmsr  
or ax, MSR\_EFER\_XD  
wrmsr  
jmp @XdDone  
@SkipXd:  
sub esp, 8  
@XdDone:  
push LONG\_MODE\_CS  
call Base  
Base:  
add dword [rsp], @LongMode-Base  
mov ecx, MSR\_EFER  
rdmsr  
or ah, 1  
wrmsr  
mov rbx, cr0  
or ebx, 0x80010023  
mov cr0, rbx  
retf

@LongMode:  
mov rax, strict qword 0  
SmiHandlerIdtrAbsAddr:  
lidt [rax]  
lea ebx, [rdi+DSC\_OFFSET]  
mov ax, [rbx+DSC\_DS]  
mov ds, eax  
mov ax, [rbx+DSC\_OTHERSEG]  
mov es, eax  
mov fs, eax  
mov gs, eax  
mov ax, [rbx+DSC\_SS]  
mov ss, eax  
mov rbx, [rsp+0x8]  
...  
mov rcx, rbx  
mov rax, strict qword 0  
SmiRendezvousAbsAddr:  
call rax  
...  
rsm



**BITS 16**

ASM\_PFX[gcSmHandlerTemplate]:  
\_SmiEntryPoint:  
mov bx, \_GdtDesc-\_SmiEntryPoint+0x8000  
mov ax,[cs:DSC\_OFFSET+DSC\_GDTDSIZ]  
dec ax  
mov [cs:bx], ax  
mov eax, [cs:DSC\_OFFSET+DSC\_GDTPTR]  
mov [cs:bx+2], eax  
o32 lgdt [cs:bx]  
mov ax, PROTECT\_MODE\_CS  
mov [cs:bx-0x2],ax  
mov edi, strict dword 0  
ASM\_PFX[gPatchSmbase]:  
lea eax, [edi+[@ProtectedMode-\_SmiEntryPoint]+0x8000]  
mov [cs:bx-0x6],eax  
mov ebx, cr0  
and ebx, 0x9fffff3  
or ebx, 0x23  
mov cr0, ebx  
jmp dword 0x0:0x0  
\_GdtDesc:  
DW 0  
DD 0

**BITS 32**

@ProtectedMode:  
mov ax, PROTECT\_MODE\_DS  
016 mov ds, ax  
016 mov es, ax  
016 mov fs, ax  
016 mov gs, ax  
016 mov ss, ax  
mov esp, strict dword 0  
ASM\_PFX[gPatchSmiStack]:  
jmp ProtFlatMode

**BITS 64**

ProtFlatMode:  
mov eax, strict dword 0  
ASM\_PFX[gPatchSmiCr3]:  
mov cr3, rax  
mov eax, 0x668  
mov cl, strict byte 0  
ASM\_PFX[gPatch5LevelPagingNeeded]:  
cmp cl, 0  
je SkipEnable5LevelPaging  
bts eax, 12  
SkipEnable5LevelPaging:  
mov cr4, rax  
sub esp, 8  
sgdt [rsp]  
mov eax, [rsp + 2]  
add esp, 8  
mov dl, 0x89  
mov [rax+TSS\_SEGMENT+5], dl  
mov eax, TSS\_SEGMENT  
ltr ax  
mov al, strict byte 1  
ASM\_PFX[gPatchXdSupported]:  
cmp al, 0  
jz @SkipXd  
mov al, strict byte 1  
ASM\_PFX[gPatchMsrla32MiscEnable...]:  
cmp al, 1  
jz Msrla32MiscEnableSupported

sub esp, 4  
xor rdx, rdx  
push rdx  
jmp EnableNxe  
Msrla32MiscEnableSupported:  
mov ecx, MSR\_IA32\_MISC\_ENABLE  
rdmsr  
sub esp, 4  
push rdx  
test edx, BIT2  
jz EnableNxe  
and dx, 0xFFFFB  
wrmsr  
EnableNxe:  
mov ecx, MSR\_EFER  
rdmsr  
or ax, MSR\_EFER\_XD  
wrmsr  
jmp @XdDone  
@SkipXd:  
sub esp, 8  
@XdDone:  
push LONG\_MODE\_CS  
call Base  
Base:  
add dword [rsp], @LongMode-Base  
mov ecx, MSR\_EFER  
rdmsr  
or ah, 1  
wrmsr  
mov rbx, cr0  
or ebx, 0x80010023  
mov cr0, rbx  
retf

@LongMode:  
mov rax, strict qword 0  
SmiHandlerIdtrAbsAddr:  
lidt [rax]  
lea ebx, [rdi+DSC\_OFFSET]  
mov ax, [rbx+DSC\_DS]  
mov ds, eax  
mov ax, [rbx+DSC\_OTHERSEG]  
mov es, eax  
mov fs, eax  
mov gs, eax  
mov ax, [rbx+DSC\_SS]  
mov ss, eax  
mov rbx, [rsp+0x8]  
...  
mov rcx, rbx  
mov rax, strict qword 0  
SmiRendezvousAbsAddr:  
call rax  
...  
rsm





- With IDT left unsanitized, exceptions and interrupts are delivered on attacker’s interrupt handler
- Attack window for interrupts/exceptions:
  - After transition to SMM, before “`lidt`”
- Attack window for MCEs
  - Between “`mov CR4`” and “`lidt`”

the attack windows

- MCE attack:
  - Create MCE from attacking thread, target victim thread
  - Receive MCE on victim thread in SMM attack window
  - Victim thread must be in SMM
  - While attacking thread is outside SMM

the attack windows

- SMM design has all threads enter/exit SMM simultaneously
  - Thread triggers SMI through some hardware event
  - SMI signal sent to all threads on platform
  - Each thread finishes its current instruction, then enters SMM
  - In SMM, each thread waits for all others to enter rendezvous point
  - Thread “quiescing” ensures all threads executing within SMM at same time
- Prevents non-SMM thread from attacking SMM thread
- Common pattern in privileged execution modes

thread quiescing



## • Challenge:

- Victim thread must be in SMM
- While attacking thread is outside SMM
- Need one thread in SMM,  
and one thread outside SMM,  
*at the same time*

the attack windows



```
testl %eax, %eax  
je 0x100002421  
movq %r13, -64(%rbp)  
movq %r14, -56(%rbp)  
movq %r12, -48(%rbp)  
movl 56(%r12), %r13d  
testl %r13d, %r13d  
jle 0x1000023c9  
movq -48(%rbp), %rax
```

```
mov %rsp,%rbp  
mov %edi,-0x4(%rbp)  
mov %esi,%eax  
mov %ax,-0x8(%rbp)  
mov -0x4(%rbp),%eax  
movzwl -0x8(%rbp),%edx  
out %eax,[%dx]  
nop  
pop %rbp
```

```
movq %rcx, %rax  
addq $8, %rsp  
popq %rbx  
popq %r12  
popq %r13  
popq %r14  
popq %r15  
popq %rbp  
retq
```

```
addq $16, %rax  
cmpq $1880, %rax  
jne 0x100032976  
leaq 2000785(%rip), %rax  
jmp 0x100032998  
movq -8(%rax,%rsi), %rax  
movq %rax, -64(%rbp)  
leaq 2344893(%rip), %rax  
movq [%rax,%r12,8], %r15
```







CPU

```
testl %eax, %eax  
je 0x100002421  
movq %r13, -64(%rbp)  
movq %r14, -56(%rbp)  
movq %r12, -48(%rbp)  
movl 56(%r12), %r13d  
testl %r13d, %r13d  
jle 0x1000023c9  
movq -48(%rbp), %rax
```

```
mov %rsp,%rbp  
mov %edi,-0x4(%rbp)  
mov %esi,%eax  
mov %ax,-0x8(%rbp)  
mov -0x4(%rbp),%eax  
movzwl -0x8(%rbp),%edx  
out %eax,[%dx]  
nop  
pop %rbp
```

```
movq %rcx, %rax  
addq $8, %rsp  
popq %rbx  
popq %r12  
popq %r13  
popq %r14  
popq %r15  
popq %rbp  
retq
```

```
addq $16, %rax  
cmpq $1880, %rax  
jne 0x100032976  
leaq 2000785(%rip), %rax  
jmp 0x100032998  
movq -8(%rax,%rsi), %rax  
movq %rax, -64(%rbp)  
leaq 2344893(%rip), %rax  
movq (%rax,%r12,8), %r15
```



RAM

SMRAM

MMIO

RAM



CPU

```
testl %eax, %eax  
je 0x100002421  
movq %r13, -64(%rbp)  
movq %r14, -56(%rbp)  
movq %r12, -48(%rbp)  
movl 56(%r12), %r13d  
testl %r13d, %r13d  
jle 0x1000023c9  
movq -48(%rbp), %rax
```

```
mov %rsp,%rbp  
mov %edi,-0x4(%rbp)  
mov %esi,%eax  
mov %ax,-0x8(%rbp)  
mov -0x4(%rbp),%eax  
movzwl -0x8(%rbp),%edx  
out %eax,[%dx]  
nop  
pop %rbp
```

```
movq %rcx, %rax  
addq $8, %rsp  
popq %rbx  
popq %r12  
popq %r13  
popq %r14  
popq %r15  
popq %rbp  
retq
```

```
addq $16, %rax  
cmpq $1880, %rax  
jne 0x100032976  
leaq 2000785(%rip), %rax  
jmp 0x100032998  
movq -8(%rax,%rsi), %rax  
movq %rax, -64(%rbp)  
leaq 2344893(%rip), %rax  
movq (%rax,%r12,8), %r15
```



RAM

SMRAM

MMIO

RAM





















- ↪ Challenge:
  - ↪ Victim thread must be in SMM
  - ↪ While attacking thread is outside SMM
  - ↪ Need one thread in SMM,  
and one thread outside SMM,  
*at the same time*
- ↪ Observation:
  - ↪ Threads do not *technically* enter SMM at the same time
  - ↪ Each thread gets to finish its current instruction
  - ↪ Attacking thread has *one instruction* with which to complete the attack

thread quiescing

• What if...

Thread 1 [attacker]

begin ??? instruction

receive SMI

??? triggers MCE

??? instruction ends

enter SMM

begin executing SMI handler

Thread 0 [victim]

out b2 (trigger SMI)

receive SMI

out b2 ends

enter SMM (idt unchanged, cr4.mce cleared)

begin executing SMI handler

...

set cr4.mce

...

...

receive MCE

...

...

reload IDT



- Does a ??? instruction exist?
  - Must generate MCE after 10,000+ cycles
  - Must be precise enough for 100 cycle attack window



Let's build a hammer...

Let's add a handle...

Let's be a bit more careful...

Let's find a nail...

Let's light a **fuse**...

- Some instruction cycle timings (target: 10,000)
  - incq %rax ... 1 cycle
  - divq %rdx ... 14
  - fsin ... 50
  - fyl2xp1 ... 135
- No where near what is needed.

building a fuse

- Bigger challenge
  - Need instruction that generates master abort
  - Master abort done through MMIO on PCI space
  - Architecture requires “`movl %[mem], %eax`” instruction for MMIO
  - ~6 cycles (hitting cache)
  - ~250 cycles (hitting RAM)
  - ~700 cycles (hitting PCI MMIO)

building a fuse



# building a fuse

- MMIO reads are an order of magnitude away from the latency needed for the fuse instruction.
- The attack won't work.



(demo)

building a fuse



- Not all MMIO reads are created equal
  - Normal devices on PCIe bus: ~700 cycles
  - Slowest devices on PCIe bus: ~4000 cycles
- Can we increase this?
  - Add competing MMIO traffic: +2000 cycles
  - Low power states and underclocking: +1400 cycles
  - Complex physical PCI topology: +1000 cycles
- Still not enough, and attack is increasingly impractical

building a fuse



- “MMIO Configuration Coding Requirements”

“

Instructions used to read MMIO configuration space  
are required to take the following form:

```
mov eax/ax/al, any_address_mode;
```

No other source/target registers may be used other than eax/ax/al.

”

# building a fuse



|    |    |    |    |    |    |    |    |    |    |    |    |    |    |    |    |
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
| a1 | 82 | 9f | 1c | 13 | 92 | 5e | e7 | 98 | 56 | 9f | af | b3 | 67 | 8b | f1 |
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|

f8013bf0

f8013c00

f8013c04

f8013c08

movl [0xf8013c00], %eax

eax

access time: ~4000 cycles

building a fuse



|    |    |    |    |    |    |    |    |    |    |    |    |    |    |    |    |
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
| a1 | 82 | 9f | 1c | 13 | 92 | 5e | e7 | 98 | 56 | 9f | af | b3 | 67 | 8b | f1 |
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|

f8013bf0

f8013c00

f8013c04

f8013c08

access time: ~8000 cycles

building a fuse



# building a fuse

- “MMIO Configuration Coding Requirements”

“

In addition, all such accesses are required not to cross  
any naturally aligned DW boundary.

Access to MMIO configuration space registers  
that do not meet these requirements result in undefined behavior.

”

building a fuse

|    |    |    |    |    |    |    |    |    |    |    |    |    |    |    |    |
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
| a1 | 82 | 9f | 1c | 13 | 92 | 5e | e7 | 98 | 56 | 9f | af | b3 | 67 | 8b | f1 |
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|

f8013bf0

f8013c00

f8013c04

f8013c08

movq [0xf8013c00], %rax

rax



|    |    |    |    |    |    |    |    |    |    |    |    |    |    |    |    |
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
| a1 | 82 | 9f | 1c | 13 | 92 | 5e | e7 | 98 | 56 | 9f | af | b3 | 67 | 8b | f1 |
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|

f8013bf0

f8013c00

f8013c04

f8013c08

access time: ~12,000 cycles

building a fuse

- Find high access time on existing PCIe device...  
... followed by non-existing device
  - Need non-existing device to generate master-abort and trigger MCE

movq [0xf8013ff9], %rax



building a fuse

- Find high access time on existing PCIe device...  
... followed by non-existing device
  - Need non-existing device to generate master-abort and trigger MCE

movq [0xf8013ff9], %rax



building a fuse

- Find high access time on existing PCIe device...  
... followed by non-existing device
  - Need non-existing device to generate master-abort and trigger MCE

movq [0xf8013ff9], %rax



building a fuse

- Find high access time on existing PCIe device...  
... followed by non-existing device
  - Need non-existing device to generate master-abort and trigger MCE

movq [0xf8013ff9], %rax



building a fuse

- The fuse instruction

- An unaligned 8-byte PCIe access
- Straddles slow PCIe device and non-existing device
- CPU performs 3 separate 4 byte MMIO access
- Final 4-byte MMIO access hits non-existing PCIe device
- No device claims PCI request, results in PCI master-abort
- PCI master-abort received by northbridge
- Northbridge sends error to CPU
- CPU generates machine check exception
- 10,000+ cycles after fuse instruction began

lighting the fuse

- Little control over how long the fuse instruction takes
- But MCE must arrive in precise window

lighting the fuse





100 cycles X

10,000 cycles

Thread 1 [attacker]

Thread 0 [victim]

begin ??? instruction

• What we wanted...

Thread 1 (attacker)

begin fuse instruction

~10,000 cycles

generate MCE

Thread 0 (victim)

out b2 (trigger SMI)

receive SMI

out b2 ends

enter SMM (idt unchanged, cr4.mce cleared)

begin executing SMI handler

...

set cr4.mce

...

receive MCE

...

reload IDT

Attack window



- This won't work
  - If the MCE is received after SMM reloads the IDT, exception will be handled on SMM's interrupt handler, not attacker's
- Solution:
  - Slide the SMI trigger to calibrate the MCE to fall within the attack window

lighting the fuse







- A targeted disruptor tool
  - Previous MCE tool could generate MCE from attacking thread, and deliver to victim thread, but without any control of timing
  - Modify tool to light MCE fuse, and sliding delay on the victim thread
  - Can deliver cross-core MCEs to victim threads during privilege transitions or secure modes, at *precise* target times

lighting the fuse



Let's build a hammer...

Let's add a handle...

Let's be a bit more careful...

Let's find a nail...

Let's light a fuse...

We have all the pieces.



Let's build a hammer...

Let's add a handle...

Let's be a bit more careful...

Let's find a nail...

Let's light a fuse...

We have all the pieces.

We need a name...



Let's build a hammer...

Let's add a handle...

Let's be a bit more careful...

Let's find a nail...

Let's light a fuse...

We have all the pieces.

We need a name...

mchammer



the exploit.



































































































(demo)

the exploit.

deltaop@ubuntu-usb-3:~/\_research\$

- Arbitrary code execution with SMM privileges
  - “ring -2”
  - Invisible to operating system, hypervisor, etc.
  - Can preempt OS, hypervisor, etc.
  - Critical to platform security, server RAS, client miscellanea
  - Firmware R/W access in many configurations

impact

The background of the slide features a dark gray circuit board pattern with various traces, capacitors, and resistors, creating a technical and futuristic aesthetic.

# mitigation

- Malicious IDT allowed in SMM, on all AMD CPUs
- MCE, developed on pre-Zen

- Firmware mitigation of SMM MCE path
  - EDK2 SMM code is correct,  
but assumes IDT made safe by microcode
  - On platforms leaving IDT in untrusted state,  
EDK2 should be changed to mitigate MCE threat
  - Submitted patch to remove MCE vector

mitigation

The background of the slide features a dark gray circuit board pattern with various tracks, capacitors, and resistors.

mitigation

- IDT issue remains
  - `try { main() } except { pop_shell() }`

A dark gray background featuring a faint, glowing circuit board pattern with various nodes and connections.

future research

- Machine checks are *powerful*,  
but have never been explored for exploitation



- Other sources of MCEs

future research

"MPDMA TVF SDP Master Memory 1 ECC or parity error" "MPDMA TVF SDP Master Memory 2 ECC or parity error" "MPDMA TVF SDP Master Memory 3 ECC or parity error"  
"MPDMA TVF SDP Master Memory 4 ECC or parity error" "MPDMA TVF SDP Master Memory 5 ECC or parity error" "MPDMA TVF SDP Master Memory 6 ECC or parity error"  
"SDP Watchdog Timer expired" "MPDMA PTE Command FIFO ECC or parity error" "MPDMA PTE Hub Data FIFO ECC or parity error"  
"MPDMA PTE Internal Data FIFO ECC or parity error" "MPDMA PTE Command Memory DMA ECC or parity error" "MPDMA PTE Command Internal ECC or parity error"  
"MPDMA TVF SDP Master Memory 7 ECC or parity error" "ECC or Parity error" "PCIe error" "External SDP ErrEvent error" "SDP Egress Poison error" "Internal Poison error"  
"Internal system fatal error event" "CCIX PER Message logging" "CCIX Read Response with Status: Non-Data Error"  
"CCIX Write Response with Status: Non-Data Error" "CCIX Read Response with Status: Data Error" "CCIX Non-okay write response with data error"  
"SDP Data Parity Error logging" "Data Loss Error" "Training Error" "Flow Control Acknowledge Error" "Rx Fifo Underflow Error" "Rx Fifo Overflow Error"  
"CRC Error" "BER Exceeded Error" "Tx Vcid Data Error" "Replay Buffer Parity Error" "Data Parity Error" "Replay Fifo Overflow Error"  
"Replay Fifo Underflow Error" "Elastic Fifo Overflow Error" "Deskew Error"  
"Flow Control CRC Error" "Data Startup Limit Error" "FC Init Timeout Error"  
"Recovery Timeout Error" "Ready Serial Timeout Error" "Ready Serial Attempt Error"  
"Recovery Attempt Error" "Recovery Relock Attempt Error"  
"Replay Attempt Error" "Sync Header Error" "Tx Replay Timeout Error"  
"Rx Replay Timeout Error" "LinkSub Tx Timeout Error"  
"LinkSub Rx Timeout Error" "Rx CMD Packet Error"  
"RAM ECC Error" "ARC instruction buffer parity error"  
"ARC data buffer parity error"  
"PHY APB error" "Timeout error from GMI" "SRAM ECC error"  
"NTB Error Event" "SDP Parity error" "Parity error for port 0"  
"Parity error for port 1" "Parity error for port 2" "Parity error for port 3"  
"Parity error for port 4" "Parity error for port 5" "Parity error for port 6"  
"Parity error for port 7" "Parity error or ECC error for S0 RAM0"  
"Parity error or ECC error for S0 RAM1"  
"Parity error or ECC error for S0 RAM2" "Parity error for PHY RAM0"  
"Parity error for PHY RAM1"  
"AXI Slave Response error" "Mst CMD Error" "Mst Rx FIFO Error"  
"Mst Deskew Error" "Mst Detect Timeout Error" "Mst FlowControl Error"  
"Mst DataValid FIFO Error" "Mac LinkState Error" "Deskew Error"  
"Init Timeout Error" "Init Attempt Error" "Recovery Timeout Error"  
"Recovery Attempt Error" "Eye Training Timeout Error"  
"Data Startup Limit Error" "LSO Exit Error"  
"PLL powerState Update Timeout Error" "Rx FIFO Error"  
"Lcu Error" "Conv CECC Error" "Conv UECC Error"  
"Reserved" "Rx DataLoss Error" "Replay CECC Error"  
"Replay UECC Error" "CRC Error" "BER Exceeded Error"  
"FC Init Timeout Error" "FC Init Attempt Error" "Replay Timeout Error"  
"Replay Attempt Error" "Replay Underflow Error" "Replay Overflow Error"  
"Packet Type Error" "Rx FIFO Error" "Deskew Error"  
"Rx Detect Timeout Error" "Data Parity Error" "Data Loss Error"  
"Lou Error" "HB1 Handshake Timeout Error" "HB2 Handshake Timeout Error"  
"Clk Sleep Rsp Timeout Error" "Clk Wake Rsp Timeout Error" "Reset Attack Error"  
"Remote Link Fatal Error" "Data Loss Error" "Training Error"  
"Replay Parity Error" "Rx Fifo Underflow Error" "Rx Fifo Overflow Error"  
"CRC Error" "BER Exceeded Error" "Tx Fifo Underflow Error"  
"Replay Buffer Parity Error" "Tx Overflow Error" "Replay Fifo Overflow Error"  
"Replay Fifo Underflow Error" "Elastic Fifo Overflow Error" "Deskew Error"  
"Offline Error" "Data Startup Limit Error" "FC Init Timeout Error"  
"Recovery Timeout Error" "Ready Serial Timeout Error" "Ready Serial Attempt Error"  
"Recovery Attempt Error" "Recovery Relock Attempt Error" "Deskew Error"  
"Rx Buffer Error" "Rx LFDS Fifo Overflow Error" "Rx LFDS Fifo Underflow Error"  
"LinkSub Tx Timeout Error" "LinkSub Rx Timeout Error" "Rx CMD Packet Error"  
"LFDS Training Timeout Error" "LFDS FC Init Timeout Error" "Data Loss Error"

## • Other sources of MCEs

future research

ECC injection

processor errata

row-hammer bit flips in DRAM

## • Other sources of MCEs

future research

"MPDMA TVF SDP Master Memory 1 ECC or parity error" "MPDMA TVF SDP Master Memory 2 ECC or parity error" "MPDMA TVF SDP Master Memory 3 ECC or parity error"  
"MPDMA TVF SDP Master Memory 4 ECC or parity error" "MPDMA TVF SDP Master Memory 5 ECC or parity error" "MPDMA TVF SDP Master Memory 6 ECC or parity error"  
"SDP Watchdog Timer expired" "MPDMA PTE Command FIFO ECC or parity error" "MPDMA PTE Hub Data FIFO ECC or parity error"  
"MPDMA PTE Internal Data FIFO ECC or parity error" "MPDMA PTE Command Memory DMA ECC or parity error" "MPDMA PTE Command Internal ECC or parity error"  
"MPDMA TVF SDP Master Memory 7 ECC or parity error" "ECC or Parity error" "PCIe error" "External SDP ErrEvent error" "SDP Egress Poison error" "Internal Poison error"  
"Internal system fatal error event" "CCIX PER Message logging" "CCIX Read Response with Status: Non-Data Error"  
"CCIX Write Response with Status: Non-Data Error" "CCIX Read Response with Status: Data Error" "CCIX Non-okay write response with data error"  
"SDP Data Parity Error logging" "Data Loss Error" "Training Error" "Flow Control Acknowledge Error" "Rx Fifo Underflow Error" "Rx Fifo Overflow Error"  
"CRC Error" "BER Exceeded Error" "Tx Vcid Data Error" "Replay Buffer Parity Error" "Data Parity Error" "Replay Fifo Overflow Error"  
"Replay Fifo Underflow Error" "Elastic Fifo Overflow Error" "Deskew Error"  
"Flow Control CRC Error" "Data Startup Limit Error" "FC Init Timeout Error"  
"Recovery Timeout Error" "Ready Serial Timeout Error" "Ready Serial Attempt Error"  
"Recovery Attempt Error" "Recovery Relock Attempt Error"  
"Replay Attempt Error" "Sync Header Error" "Tx Replay Timeout Error"  
"Rx Replay Timeout Error" "LinkSub Tx Timeout Error"  
"LinkSub Rx Timeout Error" "Rx CMD Packet Error"  
"RAM ECC Error" "ARC instruction buffer parity error"  
"ARC data buffer parity error"  
"PHY APB error" "Timeout error from GMI" "SRAM ECC error"  
"NTB Error Event" "SDP Parity error" "Parity error for port 0"  
"Parity error for port 1" "Parity error for port 2" "Parity error for port 3"  
"Parity error for port 4" "Parity error for port 5" "Parity error for port 6"  
"Parity error for port 7" "Parity error or ECC error for S0 RAM0"  
"Parity error or ECC error for S0 RAM1"  
"Parity error or ECC error for S0 RAM2" "Parity error for PHY RAM0"  
"Parity error for PHY RAM1"  
"AXI Slave Response error" "Mst CMD Error" "Mst Rx FIFO Error"  
"Mst Deskew Error" "Mst Detect Timeout Error" "Mst FlowControl Error"  
"Mst DataValid FIFO Error" "Mac LinkState Error" "Deskew Error"  
"Init Timeout Error" "Init Attempt Error" "Recovery Timeout Error"  
"Recovery Attempt Error" "Eye Training Timeout Error"  
"Data Startup Limit Error" "LSO Exit Error"  
"PLL powerState Update Timeout Error" "Rx FIFO Error"  
"Lcu Error" "Conv CECC Error" "Conv UECC Error"  
"Reserved" "Rx DataLoss Error" "Replay CECC Error"  
"Replay UECC Error" "CRC Error" "BER Exceeded Error"  
"FC Init Timeout Error" "FC Init Attempt Error" "Replay Timeout Error"  
"Replay Attempt Error" "Replay Underflow Error" "Replay Overflow Error"  
"Packet Type Error" "Rx FIFO Error" "Deskew Error"  
"Rx Detect Timeout Error" "Data Parity Error" "Data Loss Error"  
"Lou Error" "HB1 Handshake Timeout Error" "HB2 Handshake Timeout Error"  
"Clk Sleep Rsp Timeout Error" "Clk Wake Rsp Timeout Error" "Reset Attack Error"  
"Remote Link Fatal Error" "Data Loss Error" "Training Error"  
"Replay Parity Error" "Rx Fifo Underflow Error" "Rx Fifo Overflow Error"  
"CRC Error" "BER Exceeded Error" "Tx Fifo Underflow Error"  
"Replay Buffer Parity Error" "Tx Overflow Error" "Replay Fifo Overflow Error"  
"Replay Fifo Underflow Error" "Elastic Fifo Overflow Error" "Deskew Error"  
"Offline Error" "Data Startup Limit Error" "FC Init Timeout Error"  
"Recovery Timeout Error" "Ready Serial Timeout Error" "Ready Serial Attempt Error"  
"Recovery Attempt Error" "Recovery Relock Attempt Error" "Deskew Error"  
"Rx Buffer Error" "Rx LFDS Fifo Overflow Error" "Rx LFDS Fifo Underflow Error"  
"LinkSub Tx Timeout Error" "LinkSub Rx Timeout Error" "Rx CMD Packet Error"  
"LFDS Training Timeout Error" "LFDS FC Init Timeout Error" "Data Loss Error"

ECC injection

processor errata

row-hammer bit flips in DRAM

memory scrubber

other MMIO

no MMIO

- Other sources of MCEs
- Asynchronous MCEs

future research

"MPDMA TVF SDP Master Memory 1 ECC or parity error" "MPDMA TVF SDP Master Memory 2 ECC or parity error" "MPDMA TVF SDP Master Memory 3 ECC or parity error"  
"MPDMA TVF SDP Master Memory 4 ECC or parity error" "MPDMA TVF SDP Master Memory 5 ECC or parity error" "MPDMA TVF SDP Master Memory 6 ECC or parity error"  
"SDP Watchdog Timer expired" "MPDMA PTE Command FIFO ECC or parity error" "MPDMA PTE Hub Data FIFO ECC or parity error"  
"MPDMA PTE Internal Data FIFO ECC or parity error" "MPDMA PTE Command Memory DMA ECC or parity error" "MPDMA PTE Command Internal ECC or parity error"  
"MPDMA TVF SDP Master Memory 7 ECC or parity error" "ECC or Parity error" "PCIe error" "External SDP ErrEvent error" "SDP Egress Poison error" "Internal Poison error"  
"Internal system fatal error event" "CCIX PER Message logging" "CCIX Read Response with Status: Non-Data Error"  
"CCIX Write Response with Status: Non-Data Error" "CCIX Read Response with Status: Data Error" "CCIX Non-okay write response with data error"  
"SDP Data Parity Error logging" "Data Loss Error" "Training Error" "Flow Control Acknowledge Error" "Rx Fifo Underflow Error" "Rx Fifo Overflow Error"  
"CRC Error" "BER Exceeded Error" "Tx Vcid Data Error" "Replay Buffer Parity Error" "Data Parity Error" "Replay Fifo Overflow Error"  
"Replay Fifo Underflow Error" "Elastic Fifo Overflow Error" "Deskew Error"  
"Flow Control CRC Error" "Data Startup Limit Error" "FC Init Timeout Error"  
"Recovery Timeout Error" "Ready Serial Timeout Error" "Ready Serial Attempt Error"  
"Recovery Attempt Error" "Recovery Relock Attempt Error"  
"Replay Attempt Error" "Sync Header Error" "Tx Replay Timeout Error"  
"Rx Replay Timeout Error" "LinkSub Tx Timeout Error"  
"LinkSub Rx Timeout Error" "Rx CMD Packet Error"  
"RAM ECC Error" "ARC instruction buffer parity error"  
"ARC data buffer parity error"  
"PHY APB error" "Timeout error from GMI" "SRAM ECC error"  
"NTB Error Event" "SDP Parity error" "Parity error for port 0"  
"Parity error for port 1" "Parity error for port 2" "Parity error for port 3"  
"Parity error for port 4" "Parity error for port 5" "Parity error for port 6"  
"Parity error for port 7" "Parity error or ECC error for S0 RAM0"  
"Parity error or ECC error for S0 RAM1"  
"Parity error or ECC error for S0 RAM2" "Parity error for PHY RAM0"  
"Parity error for PHY RAM1"  
"AXI Slave Response error" "Mst CMD Error" "Mst Rx FIFO Error"  
"Mst Deskew Error" "Mst Detect Timeout Error" "Mst FlowControl Error"  
"Mst DataValid FIFO Error" "Mac LinkState Error" "Deskew Error"  
"Init Timeout Error" "Init Attempt Error" "Recovery Timeout Error"  
"Recovery Attempt Error" "Eye Training Timeout Error"  
"Data Startup Limit Error" "LSO Exit Error"  
"PLL powerState Update Timeout Error" "Rx FIFO Error"  
"Lcu Error" "Conv CECC Error" "Conv UECC Error"  
"Reserved" "Rx DataLoss Error" "Replay CECC Error"  
"Replay UECC Error" "CRC Error" "BER Exceeded Error"  
"FC Init Timeout Error" "FC Init Attempt Error" "Replay Timeout Error"  
"Replay Attempt Error" "Replay Underflow Error" "Replay Overflow Error"  
"Packet Type Error" "Rx FIFO Error" "Deskew Error"  
"Rx Detect Timeout Error" "Data Parity Error" "Data Loss Error"  
"Lou Error" "HB1 Handshake Timeout Error" "HB2 Handshake Timeout Error"  
"Clk Sleep Rsp Timeout Error" "Clk Wake Rsp Timeout Error" "Reset Attack Error"  
"Remote Link Fatal Error" "Data Loss Error" "Training Error"  
"Replay Parity Error" "Rx Fifo Underflow Error" "Rx Fifo Overflow Error"  
"CRC Error" "BER Exceeded Error" "Tx Fifo Underflow Error"  
"Replay Buffer Parity Error" "Tx Overflow Error" "Replay Fifo Overflow Error"  
"Replay Fifo Underflow Error" "Elastic Fifo Overflow Error" "Deskew Error"  
"Offline Error" "Data Startup Limit Error" "FC Init Timeout Error"  
"Recovery Timeout Error" "Ready Serial Timeout Error" "Ready Serial Attempt Error"  
"Recovery Attempt Error" "Recovery Relock Attempt Error" "Deskew Error"  
"Rx Buffer Error" "Rx LFDS Fifo Overflow Error" "Rx LFDS Fifo Underflow Error"  
"LinkSub Tx Timeout Error" "LinkSub Rx Timeout Error" "Rx CMD Packet Error"  
"LFDS Training Timeout Error" "LFDS FC Init Timeout Error" "Data Loss Error"

ECC injection

processor errata

row-hammer bit flips in DRAM

memory scrubber

other MMIO

no MMIO

ring-3 errata

faulty devices

here-be-dragons

- Other sources of MCEs
- Asynchronous MCEs
- Userland MCEs

future research

"MPDMA TVF SDP Master Memory 1 ECC or parity error" "MPDMA TVF SDP Master Memory 2 ECC or parity error" "MPDMA TVF SDP Master Memory 3 ECC or parity error"  
"MPDMA TVF SDP Master Memory 4 ECC or parity error" "MPDMA TVF SDP Master Memory 5 ECC or parity error" "MPDMA TVF SDP Master Memory 6 ECC or parity error"  
"SDP Watchdog Timer expired" "MPDMA PTE Command FIFO ECC or parity error" "MPDMA PTE Hub Data FIFO ECC or parity error"  
"MPDMA PTE Internal Data FIFO ECC or parity error" "MPDMA PTE Command Memory DMA ECC or parity error" "MPDMA PTE Command Internal ECC or parity error"  
"MPDMA TVF SDP Master Memory 7 ECC or parity error" "ECC or Parity error" "PCIe error" "External SDP ErrEvent error" "SDP Egress Poison error" "Internal Poison error"  
"Internal system fatal error event" "CCIX PER Message logging" "CCIX Read Response with Status: Non-Data Error"  
"CCIX Write Response with Status: Non-Data Error" "CCIX Read Response with Status: Data Error" "CCIX Non-okay write response with data error"  
"SDP Data Parity Error logging" "Data Loss Error" "Training Error" "Flow Control Acknowledge Error" "Rx Fifo Underflow Error" "Rx Fifo Overflow Error"  
"CRC Error" "BER Exceeded Error" "Tx Vcid Data Error" "Replay Buffer Parity Error" "Data Parity Error" "Replay Fifo Overflow Error"  
"Replay Fifo Underflow Error" "Elastic Fifo Overflow Error" "Deskew Error"  
"Flow Control CRC Error" "Data Startup Limit Error" "FC Init Timeout Error"  
"Recovery Timeout Error" "Ready Serial Timeout Error" "Ready Serial Attempt Error"  
"Recovery Attempt Error" "Recovery Relock Attempt Error"  
"Replay Attempt Error" "Sync Header Error" "Tx Replay Timeout Error"  
"Rx Replay Timeout Error" "LinkSub Tx Timeout Error"  
"LinkSub Rx Timeout Error" "Rx CMD Packet Error"  
"RAM ECC Error" "ARC instruction buffer parity error"  
"ARC data buffer parity error"  
"PHY APB error" "Timeout error from GMI" "SRAM ECC error"  
"NTB Error Event" "SDP Parity error" "Parity error for port 0"  
"Parity error for port 1" "Parity error for port 2" "Parity error for port 3"  
"Parity error for port 4" "Parity error for port 5" "Parity error for port 6"  
"Parity error for port 7" "Parity error or ECC error for S0 RAM0"  
"Parity error or ECC error for S0 RAM1"  
"Parity error or ECC error for S0 RAM2" "Parity error for PHY RAM0"  
"Parity error for PHY RAM1"  
"AXI Slave Response error" "Mst CMD Error" "Mst Rx FIFO Error"  
"Mst Deskew Error" "Mst Detect Timeout Error" "Mst FlowControl Error"  
"Mst DataValid FIFO Error" "Mac LinkState Error" "Deskew Error"  
"Init Timeout Error" "Init Attempt Error" "Recovery Timeout Error"  
"Recovery Attempt Error" "Eye Training Timeout Error"  
"Data Startup Limit Error" "LSO Exit Error"  
"PLL powerState Update Timeout Error" "Rx FIFO Error"  
"Lcu Error" "Conv CECC Error" "Conv UECC Error"  
"Reserved" "Rx DataLoss Error" "Replay CECC Error"  
"Replay UECC Error" "CRC Error" "BER Exceeded Error"  
"FC Init Timeout Error" "FC Init Attempt Error" "Replay Timeout Error"  
"Replay Attempt Error" "Replay Underflow Error" "Replay Overflow Error"  
"Packet Type Error" "Rx FIFO Error" "Deskew Error"  
"Rx Detect Timeout Error" "Data Parity Error" "Data Loss Error"  
"Lou Error" "HB1 Handshake Timeout Error" "HB2 Handshake Timeout Error"  
"Clk Sleep Rsp Timeout Error" "Clk Wake Rsp Timeout Error" "Reset Attack Error"  
"Remote Link Fatal Error" "Data Loss Error" "Training Error"  
"Replay Parity Error" "Rx Fifo Underflow Error" "Rx Fifo Overflow Error"  
"CRC Error" "BER Exceeded Error" "Tx Fifo Underflow Error"  
"Replay Buffer Parity Error" "Tx Overflow Error" "Replay Fifo Overflow Error"  
"Replay Fifo Underflow Error" "Elastic Fifo Overflow Error" "Deskew Error"  
"Offline Error" "Data Startup Limit Error" "FC Init Timeout Error"  
"Recovery Timeout Error" "Ready Serial Timeout Error" "Ready Serial Attempt Error"  
"Recovery Attempt Error" "Recovery Relock Attempt Error" "Deskew Error"  
"Rx Buffer Error" "Rx LFDS Fifo Overflow Error" "Rx LFDS Fifo Underflow Error"  
"LinkSub Tx Timeout Error" "LinkSub Rx Timeout Error" "Rx CMD Packet Error"  
"LFDS Training Timeout Error" "LFDS FC Init Timeout Error" "Data Loss Error"

ECC injection

processor errata

row-hammer bit flips in DRAM

memory scrubber

other MMIO

no MMIO

ring-3 errata

faulty devices

here-be-dragons

hypervisors

secure guests

enclaves

secure loader

- Other sources of MCEs
- Asynchronous MCEs
- Userland MCEs
- Other exploit targets

future research

"MPDMA TVF SDP Master Memory 1 ECC or parity error" "MPDMA TVF SDP Master Memory 2 ECC or parity error" "MPDMA TVF SDP Master Memory 3 ECC or parity error"  
"MPDMA TVF SDP Master Memory 4 ECC or parity error" "MPDMA TVF SDP Master Memory 5 ECC or parity error" "MPDMA TVF SDP Master Memory 6 ECC or parity error"  
"SDP Watchdog Timer expired" "MPDMA PTE Command FIFO ECC or parity error" "MPDMA PTE Hub Data FIFO ECC or parity error"  
"MPDMA PTE Internal Data FIFO ECC or parity error" "MPDMA PTE Command Memory DMA ECC or parity error" "MPDMA PTE Command Internal ECC or parity error"  
"MPDMA TVF SDP Master Memory 7 ECC or parity error" "ECC or Parity error" "PCIe error" "External SDP ErrEvent error" "SDP Egress Poison error" "Internal Poison error"  
"Internal system fatal error event" "CCIX PER Message logging" "CCIX Read Response with Status: Non-Data Error"  
"CCIX Write Response with Status: Non-Data Error" "CCIX Read Response with Status: Data Error" "CCIX Non-okay write response with data error"  
"SDP Data Parity Error logging" "Data Loss Error" "Training Error" "Flow Control Acknowledge Error" "Rx Fifo Underflow Error" "Rx Fifo Overflow Error"  
"CRC Error" "BER Exceeded Error" "Tx Vcid Data Error" "Replay Buffer Parity Error" "Data Parity Error" "Replay Fifo Overflow Error"  
"Replay Fifo Underflow Error" "Elastic Fifo Overflow Error" "Deskew Error"  
"Flow Control CRC Error" "Data Startup Limit Error" "FC Init Timeout Error"  
"Recovery Timeout Error" "Ready Serial Timeout Error" "Ready Serial Attempt Error"  
"Recovery Attempt Error" "Recovery Relock Attempt Error"  
"Replay Attempt Error" "Sync Header Error" "Tx Replay Timeout Error"  
"Rx Replay Timeout Error" "LinkSub Tx Timeout Error"  
"LinkSub Rx Timeout Error" "Rx CMD Packet Error"  
"RAM ECC Error" "ARC instruction buffer parity error"  
"ARC data buffer parity error"  
"PHY APB error" "Timeout error from GMI" "SRAM ECC error"  
"NTB Error Event" "SDP Parity error" "Parity error for port 0"  
"Parity error for port 1" "Parity error for port 2" "Parity error for port 3"  
"Parity error for port 4" "Parity error for port 5" "Parity error for port 6"  
"Parity error for port 7" "Parity error or ECC error for S0 RAM0"  
"Parity error or ECC error for S0 RAM1"  
"Parity error or ECC error for S0 RAM2" "Parity error for PHY RAM0"  
"Parity error for PHY RAM1"  
"AXI Slave Response error" "Mst CMD Error" "Mst Rx FIFO Error"  
"Mst Deskew Error" "Mst Detect Timeout Error" "Mst FlowControl Error"  
"Mst DataValid FIFO Error" "Mac LinkState Error" "Deskew Error"  
"Init Timeout Error" "Init Attempt Error" "Recovery Timeout Error"  
"Recovery Attempt Error" "Eye Training Timeout Error"  
"Data Startup Limit Error" "LSO Exit Error"  
"PLL powerState Update Timeout Error" "Rx FIFO Error"  
"Lcu Error" "Conv CECC Error" "Conv UECC Error"  
"Reserved" "Rx DataLoss Error" "Replay CECC Error"  
"Replay UECC Error" "CRC Error" "BER Exceeded Error"  
"FC Init Timeout Error" "FC Init Attempt Error" "Replay Timeout Error"  
"Replay Attempt Error" "Replay Underflow Error" "Replay Overflow Error"  
"Packet Type Error" "Rx FIFO Error" "Deskew Error"  
"Rx Detect Timeout Error" "Data Parity Error" "Data Loss Error"  
"Lou Error" "HB1 Handshake Timeout Error" "HB2 Handshake Timeout Error"  
"Clk Sleep Rsp Timeout Error" "Clk Wake Rsp Timeout Error" "Reset Attack Error"  
"Remote Link Fatal Error" "Data Loss Error" "Training Error"  
"Replay Parity Error" "Rx Fifo Underflow Error" "Rx Fifo Overflow Error"  
"CRC Error" "BER Exceeded Error" "Tx Fifo Underflow Error"  
"Replay Buffer Parity Error" "Tx Overflow Error" "Replay Fifo Overflow Error"  
"Replay Fifo Underflow Error" "Elastic Fifo Overflow Error" "Deskew Error"  
"Offline Error" "Data Startup Limit Error" "FC Init Timeout Error"  
"Recovery Timeout Error" "Ready Serial Timeout Error" "Ready Serial Attempt Error"  
"Recovery Attempt Error" "Recovery Relock Attempt Error" "Deskew Error"  
"Rx Buffer Error" "Rx LFDS Fifo Overflow Error" "Rx LFDS Fifo Underflow Error"  
"LinkSub Tx Timeout Error" "LinkSub Rx Timeout Error" "Rx CMD Packet Error"  
"LFDS Training Timeout Error" "LFDS FC Init Timeout Error" "Data Loss Error"

ECC injection

processor errata

row-hammer bit flips in DRAM

memory scrubber

other MMIO

no MMIO

ring-3 errata

faulty devices

here-be-dragons

hypervisors

secure guests

enclaves

secure loader

ARM

RISC-V

MIPS

PowerPC/Power

- Other sources of MCEs

- Asynchronous MCEs

- Userland MCEs

- Other exploit targets

- Other architectures

future research

"MPDMA TVF SDP Master Memory 1 ECC or parity error" "MPDMA TVF SDP Master Memory 2 ECC or parity error" "MPDMA TVF SDP Master Memory 3 ECC or parity error"  
"MPDMA TVF SDP Master Memory 4 ECC or parity error" "MPDMA TVF SDP Master Memory 5 ECC or parity error" "MPDMA TVF SDP Master Memory 6 ECC or parity error"  
"SDP Watchdog Timer expired" "MPDMA PTE Command FIFO ECC or parity error" "MPDMA PTE Hub Data FIFO ECC or parity error"  
"MPDMA PTE Internal Data FIFO ECC or parity error" "MPDMA PTE Command Memory DMA ECC or parity error" "MPDMA PTE Command Internal ECC or parity error"  
"MPDMA TVF SDP Master Memory 7 ECC or parity error" "ECC or Parity error" "PCIe error" "External SDP ErrEvent error" "SDP Egress Poison error" "Internal Poison error"  
"Internal system fatal error event" "CCIX PER Message logging" "CCIX Read Response with Status: Non-Data Error"  
"CCIX Write Response with Status: Non-Data Error" "CCIX Read Response with Status: Data Error" "CCIX Non-okay write response with data error"  
"SDP Data Parity Error logging" "Data Loss Error" "Training Error" "Flow Control Acknowledge Error" "Rx Fifo Underflow Error" "Rx Fifo Overflow Error"  
"CRC Error" "BER Exceeded Error" "Tx Vcid Data Error" "Replay Buffer Parity Error" "Data Parity Error" "Replay Fifo Overflow Error"  
"Replay Fifo Underflow Error" "Elastic Fifo Overflow Error" "Deskew Error"  
"Flow Control CRC Error" "Data Startup Limit Error" "FC Init Timeout Error"  
"Recovery Timeout Error" "Ready Serial Timeout Error" "Ready Serial Attempt Error"  
"Recovery Attempt Error" "Recovery Relock Attempt Error"  
"Replay Attempt Error" "Sync Header Error" "Tx Replay Timeout Error"  
"Rx Replay Timeout Error" "LinkSub Tx Timeout Error"  
"LinkSub Rx Timeout Error" "Rx CMD Packet Error"  
"RAM ECC Error" "ARC instruction buffer parity error"  
"ARC data buffer parity error"  
"PHY APB error" "Timeout error from GMI" "SRAM ECC error"  
"NTB Error Event" "SDP Parity error" "Parity error for port 0"  
"Parity error for port 1" "Parity error for port 2" "Parity error for port 3"  
"Parity error for port 4" "Parity error for port 5" "Parity error for port 6"  
"Parity error for port 7" "Parity error or ECC error for S0 RAM0"  
"Parity error or ECC error for S0 RAM1"  
"Parity error or ECC error for S0 RAM2" "Parity error for PHY RAM0"  
"Parity error for PHY RAM1"  
"AXI Slave Response error" "Mst CMD Error" "Mst Rx FIFO Error"  
"Mst Deskew Error" "Mst Detect Timeout Error" "Mst FlowControl Error"  
"Mst DataValid FIFO Error" "Mac LinkState Error" "Deskew Error"  
"Init Timeout Error" "Init Attempt Error" "Recovery Timeout Error"  
"Recovery Attempt Error" "Eye Training Timeout Error"  
"Data Startup Limit Error" "LSO Exit Error"  
"PLL powerState Update Timeout Error" "Rx FIFO Error"  
"Lcu Error" "Conv CECC Error" "Conv UECC Error"  
"Reserved" "Rx DataLoss Error" "Replay CECC Error"  
"Replay UECC Error" "CRC Error" "BER Exceeded Error"  
"FC Init Timeout Error" "FC Init Attempt Error" "Replay Timeout Error"  
"Replay Attempt Error" "Replay Underflow Error" "Replay Overflow Error"  
"Packet Type Error" "Rx FIFO Error" "Deskew Error"  
"Rx Detect Timeout Error" "Data Parity Error" "Data Loss Error"  
"Lou Error" "HB1 Handshake Timeout Error" "HB2 Handshake Timeout Error"  
"Clk Sleep Rsp Timeout Error" "Clk Wake Rsp Timeout Error" "Reset Attack Error"  
"Remote Link Fatal Error" "Data Loss Error" "Training Error"  
"Replay Parity Error" "Rx Fifo Underflow Error" "Rx Fifo Overflow Error"  
"CRC Error" "BER Exceeded Error" "Tx Fifo Underflow Error"  
"Replay Buffer Parity Error" "Tx Overflow Error" "Replay Fifo Overflow Error"  
"Replay Fifo Underflow Error" "Elastic Fifo Overflow Error" "Deskew Error"  
"Offline Error" "Data Startup Limit Error" "FC Init Timeout Error"  
"Recovery Timeout Error" "Ready Serial Timeout Error" "Ready Serial Attempt Error"  
"Recovery Attempt Error" "Recovery Relock Attempt Error" "Deskew Error"  
"Rx Buffer Error" "Rx LFDS Fifo Overflow Error" "Rx LFDS Fifo Underflow Error"  
"LinkSub Tx Timeout Error" "LinkSub Rx Timeout Error" "Rx CMD Packet Error"  
"LFDS Training Timeout Error" "LFDS FC Init Timeout Error" "Data Loss Error"

ECC injection

processor errata

row-hammer bit flips in DRAM

memory scrubber

other MMIO

no MMIO

ring-3 errata

faulty devices

here-be-dragons

hypervisors

secure guests

enclaves

secure loader

ARM

RISC-V

MIPS

PowerPC/Power

???

- Other sources of MCEs

- Asynchronous MCEs

- Userland MCEs

- Other exploit targets

- Other architectures

- Tip of the iceberg...

# future research

"MPDMA TVF SDP Master Memory 1 ECC or parity error" "MPDMA TVF SDP Master Memory 2 ECC or parity error" "MPDMA TVF SDP Master Memory 3 ECC or parity error"  
"MPDMA TVF SDP Master Memory 4 ECC or parity error" "MPDMA TVF SDP Master Memory 5 ECC or parity error" "MPDMA TVF SDP Master Memory 6 ECC or parity error"  
"SDP Watchdog Timer expired" "MPDMA PTE Command FIFO ECC or parity error" "MPDMA PTE Hub Data FIFO ECC or parity error"  
"MPDMA PTE Internal Data FIFO ECC or parity error" "MPDMA PTE Command Memory DMA ECC or parity error" "MPDMA PTE Command Internal ECC or parity error"  
"MPDMA TVF SDP Master Memory 7 ECC or parity error" "ECC or Parity error" "PCIe error" "External SDP ErrEvent error" "SDP Egress Poison error" "Internal Poison error"  
"Internal system fatal error event" "CCIX PER Message logging" "CCIX Read Response with Status: Non-Data Error"  
"CCIX Write Response with Status: Non-Data Error" "CCIX Read Response with Status: Data Error" "CCIX Non-okay write response with data error"  
"SDP Data Parity Error logging" "Data Loss Error" "Training Error" "Flow Control Acknowledge Error" "Rx Fifo Underflow Error" "Rx Fifo Overflow Error"  
"CRC Error" "BER Exceeded Error" "Tx Vcid Data Error" "Replay Buffer Parity Error" "Data Parity Error" "Replay Fifo Overflow Error"  
"Replay Fifo Underflow Error" "Elastic Fifo Overflow Error" "Deskew Error"  
"Flow Control CRC Error" "Data Startup Limit Error" "FC Init Timeout Error"  
"Recovery Timeout Error" "Ready Serial Timeout Error" "Ready Serial Attempt Error"  
"Recovery Attempt Error" "Recovery Relock Attempt Error"  
"Replay Attempt Error" "Sync Header Error" "Tx Replay Timeout Error"  
"Rx Replay Timeout Error" "LinkSub Tx Timeout Error"  
"LinkSub Rx Timeout Error" "Rx CMD Packet Error"  
"RAM ECC Error" "ARC instruction buffer parity error"  
"ARC data buffer parity error"  
"PHY APB error" "Timeout error from GMI" "SRAM ECC error"  
"NTB Error Event" "SDP Parity error" "Parity error for port 0"  
"Parity error for port 1" "Parity error for port 2" "Parity error for port 3"  
"Parity error for port 4" "Parity error for port 5" "Parity error for port 6"  
"Parity error for port 7" "Parity error or ECC error for S0 RAM0"  
"Parity error or ECC error for S0 RAM1"  
"Parity error or ECC error for S0 RAM2" "Parity error for PHY RAM0"  
"Parity error for PHY RAM1"  
"AXI Slave Response error" "Mst CMD Error" "Mst Rx FIFO Error"  
"Mst Deskew Error" "Mst Detect Timeout Error" "Mst FlowControl Error"  
"Mst DataValid FIFO Error" "Mac LinkState Error" "Deskew Error"  
"Init Timeout Error" "Init Attempt Error" "Recovery Timeout Error"  
"Recovery Attempt Error" "Eye Training Timeout Error"  
"Data Startup Limit Error" "LSO Exit Error"  
"PLL powerState Update Timeout Error" "Rx FIFO Error"  
"Lcu Error" "Conv CECC Error" "Conv UECC Error"  
"Reserved" "Rx DataLoss Error" "Replay CECC Error"  
"Replay UECC Error" "CRC Error" "BER Exceeded Error"  
"FC Init Timeout Error" "FC Init Attempt Error" "Replay Timeout Error"  
"Replay Attempt Error" "Replay Underflow Error" "Replay Overflow Error"  
"Packet Type Error" "Rx FIFO Error" "Deskew Error"  
"Rx Detect Timeout Error" "Data Parity Error" "Data Loss Error"  
"Lou Error" "HB1 Handshake Timeout Error" "HB2 Handshake Timeout Error"  
"Clk Sleep Rsp Timeout Error" "Clk Wake Rsp Timeout Error" "Reset Attack Error"  
"Remote Link Fatal Error" "Data Loss Error" "Training Error"  
"Replay Parity Error" "Rx Fifo Underflow Error" "Rx Fifo Overflow Error"  
"CRC Error" "BER Exceeded Error" "Tx Fifo Underflow Error"  
"Replay Buffer Parity Error" "Tx Overflow Error" "Replay Fifo Overflow Error"  
"Replay Fifo Underflow Error" "Elastic Fifo Overflow Error" "Deskew Error"  
"Offline Error" "Data Startup Limit Error" "FC Init Timeout Error"  
"Recovery Timeout Error" "Ready Serial Timeout Error" "Ready Serial Attempt Error"  
"Recovery Attempt Error" "Recovery Relock Attempt Error" "Deskew Error"  
"Rx Buffer Error" "Rx LFDS Fifo Overflow Error" "Rx LFDS Fifo Underflow Error"  
"LinkSub Tx Timeout Error" "LinkSub Rx Timeout Error" "Rx CMD Packet Error"  
"LFDS Training Timeout Error" "LFDS FC Init Timeout Error" "Data Loss Error"

- ~ INT31 team
- ~ dazzle cat (Stephanie)
- ~ Andi Kleen - "Machine check handling on Linux"
- ~ Wojtczuk - "A Stitch In Time Saves Nine: A Stitch In Time Saves Nine"
- ~ Schluter et al. - "Heckler: Breaking Confidential VMs with Malicious Interrupts"
- ~ Google Cloud Security - "Intel Trust Domain Extensions (TDX) Security Review"
- ~ Peterson/Mulasmaijic - "POP SS/MOV SS Vulnerability"
- ~ Steven Rostedt – “The x86 NMI iret problem”
- ~ Andy Lutomirski - CVE-2014-9090
- ~ Van Bulck et al. – SGX-Step
- ~ arch/x86/kernel/cpu/mce/severity.c
- ~ arch/x86/kernel/cpu/mce/amd.c
- ~ arch/x86/kernel/cpu/mce/intel.c
- ~ arch/powerpc/kernel/mce\_power.c
- ~ drivers/edac/amd64\_edac.c
- ~ drivers/edac/mce\_amd.c
- ~ ...

keep digging...



more to come  
@xoreaxeaxeax  
[github.com/xoreaxeaxeax/mchammer](https://github.com/xoreaxeaxeax/mchammer)

conclusion





Tell a joke about machine check exceptions.

Why don't machine check exceptions get invited to computer parties?  
Because they always bring the system down.