-
Notifications
You must be signed in to change notification settings - Fork 182
/
ksm.h
800 lines (702 loc) · 21.4 KB
/
ksm.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
/*
* ksm - a really simple and fast x64 hypervisor
* Copyright (C) 2016, 2017 Ahmed Samy <[email protected]>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __KSM_H
#define __KSM_H
#ifdef __linux__
#include <linux/kernel.h>
#else
#include <intrin.h>
#endif
#include "compiler.h"
#include "x86.h"
#include "vmx.h"
#include "mm.h"
#include "bitmap.h"
#define KSM_MAX_VCPUS 32
#define __EXCEPTION_BITMAP 0
#define HCALL_STOP 0 /* Stop virtualization on this CPU */
#define HCALL_IDT 1 /* Hook IDT entry (see vcpu_put_idt()) */
#define HCALL_UIDT 2 /* Unhook IDT entry */
#ifdef EPAGE_HOOK
#define HCALL_HOOK 3 /* Hook page */
#define HCALL_UNHOOK 4 /* Unhook page */
#endif
#define HCALL_VMFUNC 5 /* Emulate VMFunc */
#ifdef PMEM_SANDBOX
#define HCALL_SA_TASK 6 /* Sandbox: free EPTPs */
#endif
#ifdef INTROSPECT_ENGINE
#define HCALL_INTROSPECT 7 /* Introspect: create eptp */
#endif
/*
* NOTE:
* All of these are relative to the per-cpu host stack
* pointer, do not change!!! These are supposed
* to match ones defined by Intel in Exit Qualification.
* Those are also matched with the assembly code, see PUSH_REGS.
*
* For a brief look on how the stack looks like when passed over to
* vcpu_handle_exit(), see vmx.S
*
* Note for the last 2:
* STACK_EFL_VCPU: in vcpu_do_exit(), this is vcpu.
* STACK_EFL_VCPU: in vcpu_handle_fail(), this is the eflags.
* STACK_VCPU: in vcpu_handle_fail(), this is vcpu,
*/
#define STACK_REG_AX 0
#define STACK_REG_CX 1
#define STACK_REG_DX 2
#define STACK_REG_BX 3
#define STACK_REG_SP 4
#define STACK_REG_BP 5
#define STACK_REG_SI 6
#define STACK_REG_DI 7
#define STACK_REG_R8 8
#define STACK_REG_R9 9
#define STACK_REG_R10 10
#define STACK_REG_R11 11
#define STACK_REG_R12 12
#define STACK_REG_R13 13
#define STACK_REG_R14 14
#define STACK_REG_R15 15
#define STACK_EFL_VCPU 16
#define STACK_VCPU 17
#define KSM_PANIC_CODE 0xCCDDFF11
#define VCPU_TRIPLEFAULT 0x33DDE83A
#define VCPU_BUG_UNHANDLED 0xBAADF00D
#define VCPU_IRQ_UNHANDLED 0xCAFEBABE
#define KSM_PANIC_FAILED_VMENTRY 0xBAADBABE
#define KSM_PANIC_GUEST_STATE 0xBAAD7A1E
#define KSM_PANIC_UNEXPECTED 0xEEEEEEE9
#ifdef DBG
#ifndef __linux__
#define KSM_PANIC(a, b, c, d) KeBugCheckEx(MANUALLY_INITIATED_CRASH, a, b, c, d)
#else
#define KSM_PANIC(a, b, c, d) panic("bugcheck %016llX %016llX 0x%016llX 0x%016llX\n", \
(u64)a, (u64)b, (u64)c, (u64)d);
#endif
#else
#define KSM_PANIC(a, b, c, d) (void)0
#endif
/* Short name: */
#ifdef __linux__
#define cpu_nr() smp_processor_id()
#else
#define cpu_nr() KeGetCurrentProcessorNumberEx(NULL)
#endif
/* VPID 0 is used by VMX root. */
#define vpid_nr() (u16)(cpu_nr() + 1)
#ifdef __linux__
#define proc_name() current->comm
#define proc_id() current->pid
#else
#define current PsGetCurrentProcess()
#define proc_name() PsGetProcessImageFileName(current)
#define proc_id() PsGetProcessId(current)
#endif
#ifdef ENABLE_PRINT
#ifdef __linux__
#define KSM_DEBUG(fmt, args...) printk(KERN_INFO "ksm: CPU %hd: %s: " fmt, cpu_nr(), __func__, ##args)
#define KSM_DEBUG_RAW(str) printk(KERN_INFO "ksm: CPU %hd: %s: " str, cpu_nr(), __func__)
#else
#ifdef _MSC_VER
#define KSM_DEBUG(fmt, ...) do_print("ksm: CPU %hd: " __func__ ": " fmt, cpu_nr(), __VA_ARGS__)
#define KSM_DEBUG_RAW(str) do_print("ksm: CPU %hd: " __func__ ": " str, cpu_nr())
#else
/* avoid warning on empty argument list */
#define KSM_DEBUG(fmt, args...) do_print("ksm: CPU %hd: %s: " fmt, cpu_nr(), __func__, ##args)
#define KSM_DEBUG_RAW(str) do_print("ksm: CPU %hd: %s: " str, cpu_nr(), __func__)
#endif
#endif
#else
#define KSM_DEBUG(fmt, ...)
#define KSM_DEBUG_RAW(str)
#endif
/*
* Should definitely replace this with something more useful, right now this is
* utterly useless...
*/
#ifdef VCPU_TRACER_LOG
#define VCPU_TRACER_START() KSM_DEBUG("%p\n", vcpu)
#define VCPU_TRACER_END() KSM_DEBUG("%p handled\n", vcpu)
#else
#define VCPU_TRACER_START()
#define VCPU_TRACER_END()
#endif
/* EPT Memory type */
#define EPT_MT_UNCACHABLE 0
#define EPT_MT_WRITECOMBINING 1
#define EPT_MT_WRITETHROUGH 4
#define EPT_MT_WRITEPROTECTED 5
#define EPT_MT_WRITEBACK 6
#define EPT_MT_UNCACHED 7
/* EPT Access bits */
#define EPT_ACCESS_NONE 0
#define EPT_ACCESS_READ 0x1
#define EPT_ACCESS_WRITE 0x2
#define EPT_ACCESS_RW (EPT_ACCESS_READ | EPT_ACCESS_WRITE)
#define EPT_ACCESS_EXEC 0x4
#define EPT_ACCESS_RX (EPT_ACCESS_READ | EPT_ACCESS_EXEC)
#define EPT_ACCESS_RWX (EPT_ACCESS_RW | EPT_ACCESS_EXEC)
#define EPT_ACCESS_ALL EPT_ACCESS_RWX
/* Accessed dirty flags */
#define EPT_ACCESSED 0x100
#define EPT_DIRTY 0x200
/* #VE (ept violation) bits (Exit qualification) and suppress bit */
#define EPT_VE_READABLE 0x8 /* EPTE is readable */
#define EPT_VE_WRITABLE 0x10 /* EPTE is writable */
#define EPT_VE_EXECUTABLE 0x20 /* EPTE is executable */
#define EPT_VE_RWX 0x38 /* All of the above OR'd */
#define EPT_VE_AR_SHIFT 0x3
#define EPT_AR_MASK 0x7
#define EPT_VE_VALID_GLA 0x80 /* Valid guest linear address */
#define EPT_VE_TRANSLATION 0x100 /* Translation fault */
#define EPT_VE_NMI_UNBLOCKING 0x2000 /* NMI unblocking due to IRET */
#define EPT_SUPPRESS_VE_BIT 0x8000000000000000 /* Suppress convertible EPT violations */
#define EPT_MAX_EPTP_LIST 512 /* Processor defined size */
#define EPTP_EXHOOK 0 /* hook eptp index, executable hooks only */
#define EPTP_RWHOOK 1 /* hook eptp index, readwrite hooks, no exec */
#define EPTP_NORMAL 2 /* sane eptp index, no hooks */
#define EPTP_DEFAULT EPTP_EXHOOK
#define EPTP_INIT_USED 3 /* number of unique ptrs currently in use and should be freed */
#define EPTP(e, i) (e)->ptr_list[(i)]
#define EPT4(e, i) (e)->pml4_list[(i)]
#define for_each_eptp(ept, i) \
for (u16 i = 0; i < EPT_MAX_EPTP_LIST; ++i) \
if (test_bit(i, ept->ptr_bitmap))
#define EPT_BUGCHECK_CODE 0x3EDFAAAA
#define EPT_BUGCHECK_TOOMANY 0xFFFFFFFE
#define EPT_BUGCHECK_MISCONFIG 0xE3E3E3E3
#define EPT_BUGCHECK_EPTP_LIST 0xDFDFDFDF
#define EPT_UNHANDLED_VIOLATION 0xEEEEEEEE
#define KSM_EPT_REQUIRED_EPT (VMX_EPT_PAGE_WALK_4_BIT | VMX_EPTP_WB_BIT |\
VMX_EPT_INVEPT_BIT | VMX_EPT_EXTENT_GLOBAL_BIT)
struct shadow_idt_entry {
unsigned n;
void *h;
};
struct vmcs {
u32 revision_id;
u32 abort;
u32 data[1];
};
#ifdef _MSC_VER
#pragma warning(disable:4201) /* stupid nonstandard bullshit */
#endif
/* Posted interrupt descriptor */
struct pi_desc {
/*
* 256 bits of posted interrupt requests
* The bit index is the vector in IDT.
*/
u32 pir[8];
union {
struct {
/*
* bit 256 - Outstanding notification, must be set to notify
* the processor when interrupt vector is set in the PIR.
*/
u16 on : 1;
/* bit 257 - suppress notification */
u16 sn : 1;
/* bits 271:258 - reserved */
u16 rsvd0 : 14;
/* bit 279:272 - notification vector */
u8 nv;
/* bits 287:280 - reserved */
u16 rsvd1;
/* bits 319:288 - notification destination */
u32 ndst;
};
u64 control;
};
u32 rsvd[6];
} __align(64);
static inline bool pi_test_bit(struct pi_desc *d, int vector)
{
return test_bit(vector, (unsigned long *)d->pir);
}
static inline void pi_set_irq(struct pi_desc *d, int vector)
{
set_bit(vector, (unsigned long *)d->pir);
d->on = 1;
}
static inline void pi_clear_irq(struct pi_desc *d, int vector)
{
clear_bit(vector, (unsigned long *)d->pir);
d->on = 0;
}
#ifdef NESTED_VMX
#define VMCS_LAUNCH_STATE_NONE 0 /* no state */
#define VMCS_LAUNCH_STATE_CLEAR 1 /* vmclear was executed */
#define VMCS_LAUNCH_STATE_LAUNCHED 2 /* vmlaunch was executed */
struct nested_vcpu {
uintptr_t vmcs; /* mapped via gpa->hpa (vmcs_region) */
uintptr_t vmcs_region; /* gpa */
uintptr_t vmxon_region; /* gpa */
uintptr_t current_vmxon; /* gpa (set if nested in root) */
u32 launch_state; /* vmcs launch state */
u64 feat_ctl; /* MSR_IA32_FEATURE_CONTROL */
bool inside_guest; /* set if inside nested's guest */
};
static inline void nested_enter(struct nested_vcpu *nested)
{
/*
* About to enter nested guest due to a vmlaunch /
* vmresume exuected by the nested hypervisor.
*/
nested->inside_guest = true;
nested->current_vmxon = 0;
}
static inline void nested_leave(struct nested_vcpu *nested)
{
/*
* About to leave nested guest to enter nested hypervisor
* to process an event coming from the nested guest.
*/
nested->inside_guest = false;
nested->current_vmxon = nested->vmxon_region;
}
static inline bool nested_entered(const struct nested_vcpu *nested)
{
/*
* If this value is false, then it means the event came from
* the nested hypervisor and therefore needs to be processed
* by us, otherwise, it came from the nested guest and we should
* probably exit to the nested hypervisor, see exit.c
*/
return nested->inside_guest;
}
/*
* Should probably map and unmap vmcs as needed, but this is OK for the time
* being...
* */
static inline bool nested_has_vmcs(const struct nested_vcpu *nested)
{
return nested->vmcs != 0;
}
static inline void nested_free_vmcs(struct nested_vcpu *nested)
{
if (nested->vmcs != 0) {
mm_unmap((void *)nested->vmcs, PAGE_SIZE);
nested->vmcs = 0;
}
}
#endif
/*
* IRQs are queued to incase we inject another interrupt
* (or we were unable to past VM exit), so that we can inject
* contributory faults appropriately, e.g. #PF into #DF, etc.
*
* See exit.c on how this is used.
*/
struct pending_irq {
bool pending;
u32 err;
u32 bits;
u32 instr_len;
};
#ifdef ENABLE_PML
#define PML_MAX_ENTRIES 512
#endif
/* #VE (EPT Violation via IDT exception informaiton) */
struct ve_except_info {
u32 reason; /* EXIT_REASON_EPT_VIOLATION */
u32 except_mask; /* FFFFFFFF (set to 0 to deliver more) */
u64 exit; /* normal exit qualification bits, see above */
u64 gla; /* guest linear address */
u64 gpa; /* guest physical address */
u16 eptp; /* current EPTP index */
};
struct ept_ve_around {
struct vcpu *vcpu;
struct ve_except_info *info;
uintptr_t rip;
uintptr_t pgd;
int dpl;
u16 eptp_next;
bool invalidate;
};
struct ept {
__align(PAGE_SIZE) u64 ptr_list[EPT_MAX_EPTP_LIST];
u64 *pml4_list[EPT_MAX_EPTP_LIST];
DECLARE_BITMAP(ptr_bitmap, EPT_MAX_EPTP_LIST);
};
#define HOST_STACK_SIZE (2 << PAGE_SHIFT)
struct vcpu {
__align(PAGE_SIZE) u8 stack[HOST_STACK_SIZE];
__align(PAGE_SIZE) u8 vapic_page[PAGE_SIZE];
#ifdef ENABLE_PML
__align(PAGE_SIZE) u64 pml[PML_MAX_ENTRIES];
#endif
__align(PAGE_SIZE) struct vmcs vmxon;
__align(PAGE_SIZE) struct vmcs vmcs;
__align(PAGE_SIZE) struct ve_except_info ve;
struct pi_desc pi_desc;
u32 entry_ctl;
u32 exit_ctl;
u32 pin_ctl;
u32 cpu_ctl;
u32 secondary_ctl; /* Emulation purposes of VE / VMFUNC */
u64 vm_func_ctl; /* Same as above */
bool subverted;
/* Those are set during VM-exit only: */
uintptr_t *hsp; /* stack ptr when passed to vcpu_handle_exit() */
uintptr_t eflags; /* guest eflags */
uintptr_t ip; /* guest IP */
u16 curr_handler; /* Current VM exit handler */
#ifdef DBG
u16 prev_handler; /* Previous VM exit handler */
#endif
/* These bits are also masked from CRx_READ_SHADOW. */
uintptr_t cr0_guest_host_mask;
uintptr_t cr4_guest_host_mask;
/* Pending IRQ */
struct pending_irq irq;
/* EPT for this CPU */
struct ept ept;
/* Guest IDT (emulated) */
struct gdtr g_idt;
/* Shadow IDT (working) */
struct gdtr idt;
/* Shadow entires we know about so we can restore them appropriately. */
struct kidt_entry64 shadow_idt[256];
#ifdef PMEM_SANDBOX
/* EPTP before switch to per-task eptp. */
u16 eptp_before;
void *last_switch;
#endif
#ifdef NESTED_VMX
/* Nested */
struct nested_vcpu nested_vcpu;
#endif
};
static inline struct vcpu *ept_to_vcpu(struct ept *ept)
{
return container_of(ept, struct vcpu, ept);
}
static inline bool vcpu_has_pending_irq(const struct vcpu *vcpu)
{
return vcpu->irq.pending;
}
static inline void ksm_write_reg16(struct vcpu *vcpu, int reg, u16 val)
{
*(u16 *)&vcpu->hsp[reg] = val;
}
static inline void ksm_write_reg32(struct vcpu *vcpu, int reg, u32 val)
{
*(u32 *)&vcpu->hsp[reg] = val;
}
static inline void ksm_write_reg(struct vcpu *vcpu, int reg, uintptr_t val)
{
*(uintptr_t *)&vcpu->hsp[reg] = val;
}
static inline u16 ksm_read_reg16(struct vcpu *vcpu, int reg)
{
return (u16)vcpu->hsp[reg];
}
static inline u32 ksm_read_reg32(struct vcpu *vcpu, int reg)
{
return (u32)vcpu->hsp[reg];
}
static inline uintptr_t ksm_read_reg(struct vcpu *vcpu, int reg)
{
return vcpu->hsp[reg];
}
static inline u32 ksm_combine_reg32(struct vcpu *vcpu, int lo, int hi)
{
return (u32)ksm_read_reg16(vcpu, lo) | (u32)ksm_read_reg16(vcpu, hi) << 16;
}
static inline u64 ksm_combine_reg64(struct vcpu *vcpu, int lo, int hi)
{
return (u64)ksm_read_reg32(vcpu, lo) | (u64)ksm_read_reg32(vcpu, hi) << 32;
}
static inline uintptr_t *ksm_reg(struct vcpu *vcpu, int reg)
{
return &vcpu->hsp[reg];
}
#ifdef EPAGE_HOOK
struct epage_info {
u64 dpa;
u64 cpa;
u64 origin;
void *c_va;
struct list_head link;
};
#endif
struct ksm {
int active_vcpus;
struct vcpu vcpu_list[KSM_MAX_VCPUS];
struct pmem_range ranges[MAX_RANGES];
int range_count;
struct mtrr_range mtrr_ranges[MAX_MTRR];
int mtrr_count;
u8 mtrr_def;
uintptr_t host_pgd;
u64 vpid_ept;
#ifdef EPAGE_HOOK
struct list_head epage_list;
spinlock_t epage_lock;
#endif
#ifdef PMEM_SANDBOX
struct list_head task_list;
spinlock_t task_lock;
#endif
#ifdef INTROSPECT_ENGINE
struct list_head watch_list;
spinlock_t watch_lock;
#endif
/* see ksm.c */
__align(PAGE_SIZE) u8 msr_bitmap[PAGE_SIZE];
/* IO bitmap A: ports 0000H through 7FFFH */
__align(PAGE_SIZE) u8 io_bitmap_a[PAGE_SIZE];
/* IO bitmap B: ports 8000H through FFFFh */
__align(PAGE_SIZE) u8 io_bitmap_b[PAGE_SIZE];
};
/*
* Do NOT use inside VMX root mode, use vcpu_to_ksm() instead...
* Use this and I'll come after you.
*/
extern struct ksm *ksm;
#if !defined(__linux__) && defined(ENABLE_PRINT)
/* print.c */
extern NTSTATUS print_init(void);
extern void print_exit(void);
extern void do_print(const char *fmt, ...);
#endif
/* ksm.c */
extern int ksm_init(struct ksm **kp);
extern int ksm_free(struct ksm *k);
extern int ksm_subvert(struct ksm *k);
extern int ksm_unsubvert(struct ksm *k);
extern int __ksm_init_cpu(struct ksm *k);
extern int __ksm_exit_cpu(struct ksm *k);
extern int ksm_hook_idt(unsigned n, void *h);
extern int ksm_free_idt(unsigned n);
extern bool ksm_write_virt(struct vcpu *vcpu, u64 gva, const u8 *data, size_t len);
extern bool ksm_read_virt(struct vcpu *vcpu, u64 gva, u8 *data, size_t len);
static inline struct vcpu *ksm_cpu_at(struct ksm *k, int cpu)
{
return &k->vcpu_list[cpu];
}
static inline struct vcpu *ksm_cpu(struct ksm *k)
{
return ksm_cpu_at(k, cpu_nr());
}
static inline struct vcpu *ksm_current_cpu(void)
{
BUG_ON(!ksm);
return ksm_cpu(ksm);
}
static inline struct ksm *vcpu_to_ksm(struct vcpu *vcpu)
{
struct vcpu *list = vcpu - cpu_nr();
return (struct ksm *)((uintptr_t)list - offsetof(struct ksm, vcpu_list));
}
struct h_vmfunc {
u32 eptp;
u32 func;
};
static inline u16 vcpu_eptp_idx(const struct vcpu *vcpu)
{
if (vcpu->secondary_ctl & SECONDARY_EXEC_ENABLE_VE)
return vmcs_read16(EPTP_INDEX);
const struct ve_except_info *ve = &vcpu->ve;
return ve->eptp;
}
static inline u8 vcpu_vmfunc(u32 eptp, u32 func)
{
struct vcpu *vcpu = ksm_current_cpu();
struct ve_except_info *ve = &vcpu->ve;
if (vcpu->secondary_ctl & SECONDARY_EXEC_ENABLE_VMFUNC) {
if (func == VM_FUNCTION_CTL_EPTP_SWITCHING)
ve->eptp = (u16)eptp;
return __vmx_vmfunc(eptp, func);
}
return __vmx_vmcall(HCALL_VMFUNC, &(struct h_vmfunc) {
.eptp = eptp,
.func = func,
});
}
static inline void vcpu_put_idt(struct vcpu *vcpu, u16 cs, unsigned n, void *h)
{
struct kidt_entry64 *e = idt_entry(vcpu->idt.base, n);
memcpy(&vcpu->shadow_idt[n], e, sizeof(*e));
set_intr_gate(n, cs, vcpu->idt.base, (uintptr_t)h);
}
static inline bool cpu_supports_invvpidtype(const struct ksm *k, int type)
{
int avail = (k->vpid_ept >> VMX_VPID_EXTENT_SHIFT) & 7;
return avail & (1 << type);
}
static inline bool cpu_supports_invepttype(const struct ksm *k, int type)
{
int avail = (k->vpid_ept >> VMX_EPT_EXTENT_SHIFT) & 6;
return avail & (1 << type);
}
static inline u8 cpu_invept(struct ksm *k, u64 gpa, u64 ptr)
{
if (cpu_supports_invepttype(k, VMX_EPT_EXTENT_CONTEXT))
return __invept_gpa(ptr, gpa);
return __invept_all();
}
static inline u8 cpu_invvpid(struct ksm *k, u64 gva)
{
if (cpu_supports_invvpidtype(k, VMX_VPID_EXTEND_INDIVIDUAL_ADDR))
return __invvpid_addr(vpid_nr(), gva);
return __invvpid_all();
}
#ifdef EPAGE_HOOK
/* page.c */
extern int ksm_epage_init(struct ksm *k);
extern int ksm_epage_exit(struct ksm *k);
struct epage_info *ksm_prepare_epage(void *original, void *redirect, bool *exist);
extern int ksm_hook_epage(void *original, void *redirect);
extern int ksm_hook_epage_on_cpu(struct epage_info *epage, int cpu);
extern int ksm_unhook_epage(struct ksm *k, void *original);
extern int __ksm_unhook_epage(struct epage_info *epage);
extern struct epage_info *ksm_find_epage(struct ksm *k, uintptr_t gpa);
extern void ksm_handle_epage(struct vcpu *vcpu, struct epage_info *epage);
extern void ksm_handle_epage_ve(struct epage_info *epage, struct ept_ve_around *ve);
#endif
/* sandbox.c */
#ifdef PMEM_SANDBOX
#ifndef __linux__
typedef HANDLE pid_t;
#endif
extern int ksm_sandbox_init(struct ksm *k);
extern int ksm_sandbox_exit(struct ksm *k);
extern bool ksm_sandbox_handle_ept(struct ept_ve_around *ve);
extern void ksm_sandbox_handle_cr3(struct vcpu *vcpu, u64 cr3);
extern bool ksm_sandbox_handle_vmcall(struct vcpu *vcpu, uintptr_t arg);
extern int ksm_sandbox(struct ksm *k, pid_t pid);
extern int ksm_unbox(struct ksm *k, pid_t pid);
#endif
/* introspect.c */
#ifdef INTROSPECT_ENGINE
struct watch_ioctl;
extern int ksm_introspect_init(struct ksm *k);
extern int ksm_introspect_exit(struct ksm *k);
extern int ksm_introspect_start(struct ksm *k);
extern int ksm_introspect_stop(struct ksm *k);
extern bool ksm_introspect_handle_vmcall(struct vcpu *vcpu, uintptr_t arg);
extern bool ksm_introspect_handle_ept(struct ept_ve_around *ve);
extern int ksm_introspect_add_watch(struct ksm *k, struct watch_ioctl *watch);
extern int ksm_introspect_rem_watch(struct ksm *k, struct watch_ioctl *watch);
extern int ksm_introspect_collect(struct ksm *k, struct watch_ioctl *watch);
#endif
/* vcpu.c */
extern int vcpu_init(struct vcpu *vcpu);
extern void vcpu_free(struct vcpu *vcpu);
extern void vcpu_switch_root_eptp(struct vcpu *vcpu, u16 index);
extern u64 *ept_alloc_page(u64 *pml4, int access, int mtype, u64 gpa, u64 hpa);
extern u64 *ept_pte(u64 *pml4, u64 gpa);
extern bool ept_handle_violation(struct vcpu *vcpu);
extern bool ept_create_ptr(struct ept *ept, int access, u16 *out_eptp);
extern void ept_free_ptr(struct ept *ept, u16 eptp);
extern u8 ept_memory_type(struct ksm *k, u64 gpa);
static inline void __set_epte_pfn(u64 *epte, u64 pfn)
{
*epte &= ~PAGE_PA_MASK;
*epte |= (pfn & PTE_MASK_P) << PTE_SHIFT_P;
}
static inline void __set_epte_ar(u64 *epte, u64 ar)
{
*epte &= ~(ar ^ EPT_ACCESS_ALL);
*epte |= ar & EPT_AR_MASK;
}
static inline void __set_epte_ar_inplace(u64 *epte, u64 ar)
{
*epte |= ar & EPT_AR_MASK;
}
static inline void __set_epte_ar_pfn(u64 *epte, u64 ar, u64 pfn)
{
__set_epte_pfn(epte, pfn);
__set_epte_ar(epte, ar);
}
static inline void ept_set_hpa(struct ept *ept, int eptp, u64 gpa, u64 hpa)
{
u64 *epte = ept_pte(EPT4(ept, eptp), gpa);
if (epte)
__set_epte_pfn(epte, hpa >> PAGE_SHIFT);
}
static inline void ept_set_ar(struct ept *ept, int eptp, u64 gpa, u64 ar)
{
u64 *epte = ept_pte(EPT4(ept, eptp), gpa);
if (epte)
__set_epte_ar(epte, ar);
}
static inline bool ept_gpa_to_hpa(struct ept *ept, int eptp, u64 gpa, u64 *hpa)
{
u64 *epte = ept_pte(EPT4(ept, eptp), gpa);
if (!epte || !(*epte & EPT_AR_MASK))
return false;
*hpa = PAGE_PA(*epte);
return true;
}
static inline pte_t *__gva_to_gpa(struct vcpu *vcpu, uintptr_t cr3,
uintptr_t gva, u32 ac)
{
pte_t *pte = pte_from_cr3_va(cr3, gva);
if (!pte || (pte->pte & ac) != ac)
return NULL;
return pte;
}
static inline bool gva_to_gpa(struct vcpu *vcpu, uintptr_t cr3,
uintptr_t gva, u32 ac, u64 *gpa)
{
pte_t *pte = __gva_to_gpa(vcpu, cr3, gva, ac);
if (!pte)
return false;
*gpa = PAGE_PPA(pte);
return true;
}
static inline bool gpa_to_hpa(struct vcpu *vcpu, u64 gpa, u64 *hpa)
{
return ept_gpa_to_hpa(&vcpu->ept, vcpu_eptp_idx(vcpu), gpa, hpa);
}
static inline void ar_get_bits(u8 ar, char *p)
{
p[0] = p[1] = p[2] = '-';
p[3] = '\0';
if (ar & EPT_ACCESS_READ)
p[0] = 'r';
if (ar & EPT_ACCESS_WRITE)
p[1] = 'w';
if (ar & EPT_ACCESS_EXEC)
p[2] = 'x';
}
static inline void __get_epte_ar(u64 *epte, char *p)
{
return ar_get_bits((u8)*epte & EPT_AR_MASK, p);
}
static inline void get_epte_ar(u64 *pml4, u64 gpa, char *p)
{
return __get_epte_ar(ept_pte(pml4, gpa), p);
}
/* resubv.c */
#ifdef ENABLE_RESUBV
extern int register_power_callback(void);
extern void unregister_power_callback(void);
#else
static int register_power_callback(void) { return 0; }
static void unregister_power_callback(void) { }
#endif
/* hotplug.c */
extern int register_cpu_callback(void);
extern void unregister_cpu_callback(void);
#endif