sev-snp
调用栈
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
snp_set_memory_private
set_memory_encrypted
=> __set_memory_enc_dec(addr, numpages, true)
=> __set_memory_enc_pgtable
=> cpa_flush()
=> x86_platform.guest.enc_status_change_prepare
(amd_enc_status_change_prepare)
=> if (!enc)
=> snp_set_memory_shared
=> set_pages_state
=> __set_pages_state
## 修改页表c-bit
=> ret = __change_page_attr_set_clr(&cpa, 1);
=> cpa_flush(&cpa, 0);
=> x86_platform.guest.enc_status_change_finish(addr, numpages, enc);
=> if (enc)
=> snp_set_memory_private(vaddr, npages);
=> set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE)
=> enc_dec_hypercall(vaddr, npages << PAGE_SHIFT, enc)
set_memory_decrypted
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
set_memory_decrypted
=> __set_memory_enc_dec(addr, numpages, false)
__set_pages_state
=> if op == SNP_PAGE_STATE_SHARED:
## set rmp validate_pages => 0
## 这个地方必须重新做验证。否则这个page可以会被host利用。
=> pvalidate_pages()
## 只考虑sev_cfg.ghcbs_inititalized 情况
=> ghcb = __sev_get_ghcb(&state);
=> vmgexit_psc(ghcb, data) ## data: desc
=> memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
=> foreach_entry
=> sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0)
## 初始化exit_code 和 exit_info
=> ghcb_set_sw_exit_code(ghcb, exit_code)
=> ghcb_set_sw_exit_info_1(ghcb, exit_info_1)
=> ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
## 设置ghcb pa 到msr
=> sev_es_wr_ghcb_msr(__pa(ghcb))
=> native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
=> VMGEXIT()
### ???? 实际上是VMGEXIT指令 rep + vmmcall 编码
=> asm volatile("rep; vmmcall\n\r")
=> __set_put_ghcb(&state)
=> if op == SNP_PAGE_STATE_PRIVATE:
## 如果是要变为private, 需要对新映射的page,重新做验证
=> pvalidate_pages()
enc_dec_hypercall
1
2
3
4
5
6
7
enc_dec_hypercall
=> foreach pfn (may be huge)
=> notify_page_enc_status_changed
=> kvm_sev_hc_page_enc_status
=> kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages,
KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K)
=> vmmcall
sev_handle_vmgexit(kvm)
1
2
3
4
5
6
7
8
sev_handle_vmgexit
=> case SVM_VMGEXIT_PSC:
=> setup_vmgexit_scratch()
=> snp_begin_psc()
=> switch(entry_state.operation)
=> case VMGEXIT_PSC_OP_PRIVATE, VMGEXIT_PSC_OP_SHARED
=> vcpu->run->exit_reason = KVM_EXIT_HYPERCALL;
=> vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE;
csv
reserve memory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
early_csv_reserve_mem
csv_cma_reserve_mem
|-> csv_smr = memblock_alloc_node(nr_node_ids)
|-> for_each_node (node)
|-> size = csv_early_percent_memory_on_node(node)
## 每个numanode 一个 array
|-> struct cma_array *array;
## 然后整个 numa reserve 空间, 通过 1<<CSV_CMA_SHIFT 划分
## (PUD_SHIFT)分割,方便csv预留
|-> count = DIV_ROUND_UP(size, 1 << CSV_CMA_SHIFT);
|-> cma_array_size = count * sizeof(*csv_cma) + sizeof(*array);
|-> array = memblock_alloc_node(cma_array_size, SMP_CACHE_BYTES, NUMA_NO_NODE);
|-> csv_contiguous_pernuma_area[node] = array;
## 调用cma相关接口预留内存
|-> for(i = 0; i < count; i++)
|-> csv_cma = &array->csv_cma[i];
|-> ret = cma_declare_contiguous_nid(0, CSV_CMA_SIZE, 0,
1 << CSV_MR_ALIGN_BITS, PMD_SHIFT - PAGE_SHIFT,
false, name, &(csv_cma->cma), node);
## 比较每个csv_cma, 找到其最低的地址和最高的地址 -- [start,
## end]
## 并且记录最大的间隙
|-> spanned_size = end - start;
|-> if (spanned_size > max_spanned_size)
-- max_spanned_size = spanned_size;
## 将start end 赋值到 csv_smr
|-> csv_smr[node].start = start
|-> csv_smr[node].start = end
## 设置 smr_entry_shift, 这里说明 smr 数量为 NUM_SMR_ENTRIES 8192 个
|-> csv_set_smr_entry_shift(ilog2(max_spanned_size / NUM_SMR_ENTRIES - 1) + 1);
|-> smr_entry_shift = max_t(unsigned int, shift, MIN_SMR_ENTRY_SHIFT);
declare SMR, SMCR
猜测全称 secure Secure Memory Region, Secure Memory Control Register ```sh module_init(sp_mod_init) sp_mod_init psp_pci_init sev_pci_init |-> if (is_vendor_hygon() && boot_cpu_has(X86_FEATURE_CSV3)) csv_platform_cmd_set_secure_memory_region(sev, &error);
csv_platform_cmd_set_secure_memory_region
将csv_smr中的所有的region,通过 CSV3_CMD_SET_SMR 传递给 csv fw
|-> hygon_psp_hooks.sev_do_cmd(CSV3_CMD_SET_SMR, csv_smr)
从 cma 中分配 1 « CSV_MR_ALIGN_BITS(28) 内存 ,256M
##
这里,如果是按照 AMDSEV的 RMP 设计, 假设一个RMP page 可以容纳256 entry
那一共有
##
entry_num = (256 * 1024 / 4) * 256
mem_size = entry_num * 4 / 1024 /1024 = 64G
##
也就是覆盖64G内存, 内存覆盖率远远不够
##
所以其单位就不是page_size, 而是 1 « smr_entry_shift
##
有 NUM_SMR_ENTRIES (8192) 区域。假设1T内存
每个区域有 1 * 1024 * 1024 (M) / 8192 = 128M 内存
##
每个区域有 32K(8 page) SMCR
256M / 8192 = 256 * 1024 (k) / 8192 = 32K
|-> cmd_set_smcr->base_address = csv_alloc_from_contiguous(1« CSV_MR_ALIGN_BITS)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
### csv3_set_guest_private_memory
```sh
csv3_set_guest_private_memory
## 查看guest 内存所在的numanode
|-> for_each_set_bit(i, &csv->nodemask, BITS_PER_LONG)
node_set(i, nodemask)
## 计算分配内存大小,注意,这里包含了页表大小
## 引用这段代码注释:
##
## NPT secure memory size
##
## PTEs_entries = nr_pages
## PDEs_entries = nr_pages / 512
## PDPEs_entries = nr_pages / (512 * 512)
## PML4Es_entries = nr_pages / (512 * 512 * 512)
##
## Totals_entries = nr_pages + nr_pages / 512 + nr_pages / (512 * 512) +
## nr_pages / (512 * 512 * 512) <= nr_pages + nr_pages / 256
##
## Total_NPT_size = (Totals_entries / 512) * PAGE_SIZE = ((nr_pages +
## nr_pages / 256) / 512) * PAGE_SIZE = nr_pages * 8 + nr_pages / 32
## <= nr_pages * 9
##
## 这里分配时,也是按照 smr_entry_shift 进行分配
|-> size = ALIGN((nr_pages << PAGE_SHIFT), 1UL << smr_entry_shift) +
ALIGN(nr_pages * 9, 1UL << smr_entry_shift);
|-> nr_smr = size >> smr_entry_shift;
|-> for(i = 0; i < nr_smr; i++)
|-> smr = kzalloc(sizeof(*smr), GFP_KERNEL_ACCOUNT);
## 以 smr_entry_shift 颗粒分配连续内存
|-> smr->hpa = csv_alloc_from_contiguous((1UL << smr_entry_shift),
nodemask_ptr,
get_order(1 << smr_entry_shift));
## 记录该内存信息到一个数组
|-> regions[count].size = (1UL << smr_entry_shift);
|-> regions[count].base_address = smr->hpa;
|-> count++
## 当数组大小超过一个PAGE_SIZE时, 将该区域 调用 SET_GUEST_PRIVATE_MEMORY
## 提交给 csv fw
|-> if (count >= (PAGE_SIZE / sizeof(regions[0])) || (remainder == count)) {
## 封装命令
|-> set_guest_private_memory->nregions = count;
|-> set_guest_private_memory->handle = sev->handle;
|-> set_guest_private_memory->regions_paddr = __sme_pa(regions);
## 提交命令
|-> ret = hygon_kvm_hooks.sev_issue_cmd(kvm,
CSV3_CMD_SET_GUEST_PRIVATE_MEMORY,
set_guest_private_memory, &argp->error);
QEMU
1
2
3
kvm_cpu_exec
|-> case KVM_EXIT_MEMORY_FAULT:
|-> kvm_convert_memory
参考commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
[PATCH 0/4] x86: Cleanup and extend computing computing API
https://lore.kernel.org/all/20220222185740.26228-1-kirill.shutemov@linux.intel.com/
Add AMD Secure Nested Paging (SEV-SNP) Guest Support
https://lore.kernel.org/all/20220307213356.2797205-1-brijesh.singh@amd.com/
commit dc3f3d2474b80eaee8be89f4c5eb344f10648f42
Author: Brijesh Singh <brijesh.singh@amd.com>
Date: Thu Feb 24 10:56:01 2022 -0600
x86/mm: Validate memory when changing the C-bit
Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.
This post is licensed under CC BY 4.0 by the author.