The Windows 10 TH2 INT 2E mystery

Since Windows 10 TH2, NTDLL's syscall routines have changed: syscalls can now be performed with the SYSCALL instruction, and with the INT 2E old one. We say "old" because, until this change, this method had not been supported on x64 architectures since Windows 8. So why such a change? Let's try to find out.

Currently, on Windows 10 RS3 with Virtualization Based Security enabled, the NtCreateFile routine follows this scheme:

2:012> uf NtCreateFile
DBGHELP: SharedUserData - virtual symbol module
ntdll!NtCreateFile:
00007ff8`f55b0910 4c8bd1          mov     r10,rcx
00007ff8`f55b0913 b855000000      mov     eax,55h
00007ff8`f55b0918 f604250803fe7f01 test    byte ptr [SharedUserData+0x308 (00000000`7ffe0308)],1
00007ff8`f55b0920 7503            jne     ntdll!NtCreateFile+0x15 (00007ff8`f55b0925)  Branch

ntdll!NtCreateFile+0x12:
00007ff8`f55b0922 0f05            syscall
00007ff8`f55b0924 c3              ret

ntdll!NtCreateFile+0x15:
00007ff8`f55b0925 cd2e            int     2Eh
00007ff8`f55b0927 c3              ret
0:000> dt _KUSER_SHARED_DATA 7ffe0000
ntdll!_KUSER_SHARED_DATA
   +0x000 TickCountLowDeprecated : 0
   +0x004 TickCountMultiplier : 0xfa00000
   +0x008 InterruptTime    : _KSYSTEM_TIME
   +0x014 SystemTime       : _KSYSTEM_TIME
   +0x020 TimeZoneBias     : _KSYSTEM_TIME
   +0x02c ImageNumberLow   : 0x8664
   +0x02e ImageNumberHigh  : 0x8664
   +0x030 NtSystemRoot     : [260]  "C:\WINDOWS"
   [...]
   +0x2f8 TestRetInstruction : 0xc3
   +0x300 QpcFrequency     : 0n2533196
   +0x308 SystemCall       : 1
   +0x30c SystemCallPad0   : 0
   +0x310 SystemCallPad    : [2] 0
   +0x320 TickCount        : _KSYSTEM_TIME
   +0x320 TickCountQuad    : 0x6d22e8
   [...]
   +0x3d0 TimeZoneBiasEffectiveEnd : _LARGE_INTEGER 0x01d3c3d4`99852800
   +0x3d8 XState           : _XSTATE_CONFIGURATION

SharedUserData.SystemCall is 1, and all the system calls in the VTL0 are performed using an INT 2E instruction. Let's try to perform a SYSCALL one :

> eb ntdll!NtCreateFile+0x10 90 90
> g

Everything is OK, the process runs without any problem. Regarding performances issues, we tried a simple benchmark based on the rdtsc instruction (rdtsc/NtQuerySystemInformation/rdtsc loops, with hardcoded INT 2E or SYSCALL instructions), and found that the large majority of the NtQuerySystemInformation performed with the SYSCALL instruction are faster of about ~1.000 clock ticks than INT 2E ones (rdtsc instructions should not be impacted by Hyper-V virtualization, and should also contain "non-VTL0" ticks counts). The code may be found at the end of this blog post.

INT 2E handlers

In fact, in the VTL0 kernel there is an INT 2E handler installed, pointing to KiSystemService:

0: kd> uf VslIsSecureKernelRunning
nt!VslIsSecureKernelRunning:
fffff803`1293bb7c 8b0596e62400    mov     eax,dword ptr [nt!HvlpFlags (fffff803`12b8a218)]
fffff803`1293bb82 c1e80d          shr     eax,0Dh
fffff803`1293bb85 2401            and     al,1
fffff803`1293bb87 c3              ret
0: kd> dd nt!HvlpFlags L 1
fffff803`12b8a218  000061df
0: kd> !idt
[...]
2d: fffff80312957100 nt!KiDebugServiceTrap
2e: fffff80312957200 nt!KiSystemService
2f: fffff80312950380 nt!KiDpcInterrupt
[...]
0: kd> dt _KUSER_SHARED_DATA 0FFFFF78000000000 SystemCall
nt!_KUSER_SHARED_DATA
   +0x308 SystemCall : 1

However, on a Windows 10 1709 build running under Hyper-V, but WITHOUT Virtualization Based Security enabled, there's no handler:

kd> dd nt!HvlpFlags L1
fffff800`9626bf8c  000010ad
0: kd> !idt
[...]
2d: fffff80096000d00 nt!KiDebugServiceTrap
2f: fffff80095ff9c90 nt!KiDpcInterrupt
[...]

Let's check on the VTL1's kernel, securekernel.exe. First, the VTL1 dedicated syscalls provided by iumdll.dll don't embed this INT 2E possibility:

IumPostMailbox
.text:00000001800011B0
.text:00000001800011B0                 mov     r10, rcx
.text:00000001800011B3                 mov     eax, 800000Ah
.text:00000001800011B8                 syscall
.text:00000001800011BA                 retn

And if we actually check if the INT 2E routine is actually supported by the securekernel, we could not find such capability:

SkiInterruptTablePhase1 dq 0            ; DATA XREF: SkeInitSystem+1D
.data:00000001400542B8 00 2D 04 40 01 00 00 00                       dq offset KiDivideErrorFault
.data:00000001400542C0 01 00 00 00 00 00 00 00                       dq 1
.data:00000001400542C8 40 2E 04 40 01 00 00 00                       dq offset KiDebugTrapOrFault
.data:00000001400542D0 02 00 03 00 00 00 00 00                       dq 30002h
.data:00000001400542D8 C0 2F 04 40 01 00 00 00                       dq offset KiNmiInterrupt
.data:00000001400542E0 03 03 00 00 00 00 00 00                       dq 303h
.data:00000001400542E8 80 34 04 40 01 00 00 00                       dq offset KiBreakpointTrap
.data:00000001400542F0 04 03 00 00 00 00 00 00                       dq 304h
.data:00000001400542F8 40 35 04 40 01 00 00 00                       dq offset KiOverflowTrap
.data:0000000140054300 05 00 00 00 00 00 00 00                       dq 5
.data:0000000140054308 80 36 04 40 01 00 00 00                       dq offset KiBoundFault
.data:0000000140054310 06 00 00 00 00 00 00 00                       dq 6
.data:0000000140054318 C0 37 04 40 01 00 00 00                       dq offset KiInvalidOpcodeFault
.data:0000000140054320 07 00 00 00 00 00 00 00                       dq 7
.data:0000000140054328 40 3A 04 40 01 00 00 00                       dq offset KiNpxNotAvailableFault
.data:0000000140054330 08 00 01 00 00 00 00 00                       dq 10008h
.data:0000000140054338 80 3B 04 40 01 00 00 00                       dq offset KiDoubleFaultAbort
.data:0000000140054340 09 00 00 00 00 00 00 00                       dq 9
.data:0000000140054348 C0 3C 04 40 01 00 00 00                       dq offset KiNpxSegmentOverrunAbort
.data:0000000140054350 0A 00 00 00 00 00 00 00                       dq 0Ah
.data:0000000140054358 00 3E 04 40 01 00 00 00                       dq offset KiInvalidTssFault
.data:0000000140054360 0B 00 00 00 00 00 00 00                       dq 0Bh
.data:0000000140054368 40 3F 04 40 01 00 00 00                       dq offset KiSegmentNotPresentFault
.data:0000000140054370 0C 00 00 00 00 00 00 00                       dq 0Ch
.data:0000000140054378 C0 40 04 40 01 00 00 00                       dq offset KiStackFault
.data:0000000140054380 0D 00 00 00 00 00 00 00                       dq 0Dh
.data:0000000140054388 00 42 04 40 01 00 00 00                       dq offset KiGeneralProtectionFault
.data:0000000140054390 0E 00 00 00 00 00 00 00                       dq 0Eh
.data:0000000140054398 40 43 04 40 01 00 00 00                       dq offset KiPageFault
.data:00000001400543A0 10 00 00 00 00 00 00 00                       dq 10h
.data:00000001400543A8 40 46 04 40 01 00 00 00                       dq offset KiFloatingErrorFault
.data:00000001400543B0 11 00 00 00 00 00 00 00                       dq 11h
.data:00000001400543B8 00 48 04 40 01 00 00 00                       dq offset KiAlignmentFault
.data:00000001400543C0 12 00 02 00 00 00 00 00                       dq 20012h
.data:00000001400543C8 40 49 04 40 01 00 00 00                       dq offset KiMcheckAbort
.data:00000001400543D0 13 00 00 00 00 00 00 00                       dq 13h
.data:00000001400543D8 C0 4C 04 40 01 00 00 00                       dq offset KiXmmException
.data:00000001400543E0 29 03 00 00 00 00 00 00                       dq 329h
.data:00000001400543E8 80 4E 04 40 01 00 00 00                       dq offset KiRaiseSecurityCheckFailure
.data:00000001400543F0 2C 03 00 00 00 00 00 00                       dq 32Ch
.data:00000001400543F8 C0 4F 04 40 01 00 00 00                       dq offset KiRaiseAssertion
.data:0000000140054400 2D 03 00 00 00 00 00 00                       dq 32Dh
.data:0000000140054408 00 51 04 40 01 00 00 00                       dq offset KiDebugServiceTrap
.data:0000000140054410 40 00 00 00 00 00 00 00                       dq 40h
.data:0000000140054418 50 F8 03 40 01 00 00 00                       dq offset KiVinaInterrupt
.data:0000000140054420 50 00 00 00 00 00 00 00                       dq 50h
.data:0000000140054428 30 FB 03 40 01 00 00 00                       dq offset KiCallbackInterrupt
.data:0000000140054430 51 00 00 00 00 00 00 00                       dq 51h
.data:0000000140054438 80 01 04 40 01 00 00 00                       dq offset KiTimerInterrupt
.data:0000000140054440 E0 00 00 00 00 00 00 00                       dq 0E0h
.data:0000000140054448 60 FF 03 40 01 00 00 00                       dq offset KiIpiInterrupt
.data:0000000140054450 F0 00 00 00 00 00 00 00                       dq 0F0h
.data:0000000140054458 50 FD 03 40 01 00 00 00                       dq offset KiInterceptInterrupt
.data:0000000140054460 00 00 00 00 00 00 00 00                       dq 0

Moreover, KiSystemServiceExit ends either with calls to KiBugCheckEx, SkiTerminateThread, SYSRET or RET, but no IRETQ.

We did not actually debug the Secure Kernel to validate this assumption, but these findings let us think there is no interrupt 2E handling in the VTL1. Our first thought was that this mechanism was built in order to make Hyper-V able to "dispatch" VTL1 trustlets' "NT" syscalls directly to the VTL0 kernel, without using any hypercalls. This would be quite a challenge to implement, but might improve performances.

SharedUserData.SystemCall update

We didn't want to debug the secure kernel, so let's find out who sets the SharedUserData.SystemCall flag. The SharedUserData page is also mapped on the kernel memory at a dedicated address, and we need to find it. A good hint is that its SystemTime member is continually updated by the KiUpdateSystemTime routine:

KiUpdateSystemTime
    [...]
    mov     rax, ds:0FFFFF78000000014h
    and     cs:KiSystemTimeErrorAccumulator, 0
    lea     r11, [rbx+rax]
    jmp     loc_140120367
    [...]

Therefore the SharedUserData is mapped at 0FFFFF78000000000, and our SystemCall member is at FFFFF78000000308. Let's find some cross references in NTOSKRNL:

NTOS.KiInitializeKernel
    [...]
    call    HvlPhase0Initialize
    mov     r14d, 1
    cmp     ds:KiSystemCallSelector, r14d
    jnz     loc_1403D03F6                       ; not taken
    [...]
    test    cs:HvlEnlightenments, 80000h
    jz      loc_1403D03F6           
    mov     eax, r14d
    mov     ds:0FFFFF78000000308h, eax
    [...]

The KiSystemCallSelector global is set by KiInitializeBootStructures after having called HvlEnableVsmCalls and seems to be related to VBS stuff. KiInitializeBootStructures takes the KeLoaderBlock (provided to the kernel directly by the bootloader) as a parameter and updates the KiSystemCallSelector global depending on several flags in the loader block:

    mov     rax, [rdi+0F0h]             ; RDI points to KeLoaderBlock, so RAX points to LOADER_PARAMETER_EXTENSION
    lea     r10, cs:14075F170h
    mov     ecx, [rax+74h]              ; LOADER_PARAMETER_EXTENSION -> Flags
    mov     r11b, cl
    shr     r11b, 3
    and     r11b, r13b
    bt      ecx, 8                      ; VsmConfigured (0x100) test

Therefore, the INT 2E flag is set if the VsmConfigured bit is active.

Now let's check in the SecureKernel image how the SharedUserData.SystemCall flag may be updated. Only one reference may be found:

SecureKernel.SkmmInitializeUserSharedData
    [...]
    xor eax, eax
    mov ds:0FFFFF78000000294h, al
    mov ds:0FFFFF78000000308h, eax
    [...]

Conclusion

Okay, so in our securekernel the flag is never set, and NTOS syscalls are only performed with SYSCALL instructions, which invalidates our previous hypothesis. However, in 2015 HVSI, aka Windows Defender Application Guard, was also introduced (disclaimer: we did not take a look on HVSI internals yet, and the following hypothesis is highly theoretical...). On unsecured VTL2 (HVSI) virtual machines, it appears that the kernel is also the NTOSKRNL image, and it will probably have its VsmConfigured flag set. Our current guess is that this INT 2E mechanism is tied to HVCI's VTL2 syscalls monitoring. It should not be related to "pure" performance issues, as the SYSCALL instructions appear to be faster in VTL0. We do not know if SYSCALL instructions are allowed in VTL2. This hypothesis could be verified by performing tests in a VTL2 usermode process or by performing a static analysis of the VTL2 virtual machines startup (main differences with the VTL0 startup?) and VTL1 control over VTL2 machines (is the SYSCALL instruction enabled?).

On the reason why VTL0 NTDLL also uses INT 2E interrupts (MS could actually enable SYSCALL mechanisms only for VTL0), our guess is that the 2 methods to make this happen would not be possible: SharedUserData page is probably shared with VTL2 and cannot be modified, and NTDLL "syscall" pages cannot be modified as the digital signature will therefore become invalid.

Again, regarding this conclusion, we did not perform any check (yet), and this is nothing but an hypothesis. Feel free to contact us at contact at amossys.fr for any remarks/help :).

Updates

Artem Shishkin (@honorary_bot), Liran Alon (@Liran_Alon) and Alex Ionescu (@aionescu) pointed out on twitter (https://twitter.com/honorary_bot/status/966609444674162688) that the INT 2E have been used instead of the SYSCALL one because it is actually faster to process (intercepting SYSCALL instructions would require decoding the faulting instruction).

code used to perform the benchmark

#include <Windows.h>
#include <stdio.h>
#define SystemProcessorInformation 0x1
typedef struct _SYSTEM_PROCESSOR_INFORMATION {
    USHORT ProcessorArchitecture;
    USHORT ProcessorLevel;
    USHORT ProcessorRevision;
    USHORT MaximumProcessors;
    ULONG ProcessorFeatureBits;
} SYSTEM_PROCESSOR_INFORMATION, *PSYSTEM_PROCESSOR_INFORMATION;
typedef NTSTATUS(WINAPI *_NtQuerySystemInformation)(
    ULONG SystemInformationClass,
    PVOID SystemInformation,
    ULONG SystemInformationLength,
    PULONG ReturnLength
    );
UCHAR data[0x200];
#define LOOPSCNT 0x2000
ULONGLONG doCallRdtsc(_NtQuerySystemInformation fct) {
    LARGE_INTEGER ticksSt, ticksNd;
    ULONG i, retL;
    ticksSt.QuadPart = __rdtsc();
    for (i = 0; i < LOOPSCNT; i++) {
        fct(SystemProcessorInformation, &data, 0x200, &retL);
    }
    ticksNd.QuadPart = __rdtsc();
    ticksNd.QuadPart = ticksNd.QuadPart - ticksSt.QuadPart;
    ticksNd.QuadPart /= LOOPSCNT;
    return ticksNd.QuadPart;
}
#define TESTSCNT 0x100
int main(int argc, char** argv) {
    ULONGLONG ticksInt, ticksSysc, avgTicks;
    UCHAR shc[0x20] = { 0x4c, 0x8b, 0xd1, 0xb8, 0x36, 0x00, 0x00, 0x00, 0x0f, 0x05, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, // SYSCALL
                        0x4c, 0x8b, 0xd1, 0xb8, 0x36, 0x00, 0x00, 0x00, 0xcd, 0x2e, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, // INT 2E
    };
    PULONG tmpPtr = NULL;
    _NtQuerySystemInformation NtSyscall = NULL;
    _NtQuerySystemInformation NtInt2E = NULL;
    ULONG i = 0, c = 0, retL;
    PVOID zdata = VirtualAlloc(NULL, 0x1000, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE);
    NTSTATUS st = 0;

    tmpPtr = (PULONG)GetProcAddress(GetModuleHandleA("ntdll"), "NtQuerySystemInformation");
    printf("NtQuerySystemInformation: 0x%x\n", *(PULONG)((PUCHAR)tmpPtr + 0x04));
    *(PULONG)(shc + 0x04) = *(PULONG)((PUCHAR)tmpPtr + 0x04);
    *(PULONG)(shc + 0x14) = *(PULONG)((PUCHAR)tmpPtr + 0x04);

    memcpy(zdata, shc, 0x20);
    NtSyscall = (_NtQuerySystemInformation)zdata;
    NtInt2E = (_NtQuerySystemInformation)((SIZE_T)zdata + 0x10);

    memset(data, 0, 0x200);
    st = NtSyscall(SystemProcessorInformation, &data, 0x200, &retL);
    printf("SYSCALL returned %x and ProcessorArchitecture is 0x%x\n", st, ((PSYSTEM_PROCESSOR_INFORMATION)data)->ProcessorArchitecture);
    memset(data, 0, 0x200);
    st = NtInt2E(SystemProcessorInformation, &data, 0x200, &retL);
    printf("INT 2E returned %x and ProcessorArchitecture is 0x%x\n", st, ((PSYSTEM_PROCESSOR_INFORMATION)data)->ProcessorArchitecture);

    avgTicks = 0;
    for (c = 0; c < TESTSCNT; c++) {
        ticksInt = doCallRdtsc(NtInt2E);
        ticksSysc = doCallRdtsc(NtSyscall);
        avgTicks = avgTicks + (ticksSysc - ticksInt);
        if (ticksSysc > ticksInt)
            printf("S");
        else
            printf("I");
    }
    avgTicks /= TESTSCNT;
    printf("\nRDTSC\tAverage diffs (syscall - int2E): %d\n", (LONG64)avgTicks);

    system("pause");
    return 0;
}