Post

sev-snp

调用栈

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
snp_set_memory_private


set_memory_encrypted
=> __set_memory_enc_dec(addr, numpages, true)
   => __set_memory_enc_pgtable
      => cpa_flush()
      => x86_platform.guest.enc_status_change_prepare 
      (amd_enc_status_change_prepare)
         => if (!enc)
            => snp_set_memory_shared
               => set_pages_state
                  => __set_pages_state
      ## 修改页表c-bit
      => ret = __change_page_attr_set_clr(&cpa, 1);
      => cpa_flush(&cpa, 0);
      => x86_platform.guest.enc_status_change_finish(addr, numpages, enc);
         => if (enc)
            => snp_set_memory_private(vaddr, npages);
               => set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE)
            => enc_dec_hypercall(vaddr, npages << PAGE_SHIFT, enc)

set_memory_decrypted

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
set_memory_decrypted
  => __set_memory_enc_dec(addr, numpages, false)

__set_pages_state
=> if op == SNP_PAGE_STATE_SHARED:
   ## set rmp validate_pages => 0
   ## 这个地方必须重新做验证。否则这个page可以会被host利用。
   => pvalidate_pages()
   ## 只考虑sev_cfg.ghcbs_inititalized 情况
=> ghcb = __sev_get_ghcb(&state);
=> vmgexit_psc(ghcb, data)  ## data: desc
   => memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
   => foreach_entry
      => sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0)
         ## 初始化exit_code 和 exit_info
         => ghcb_set_sw_exit_code(ghcb, exit_code)
         => ghcb_set_sw_exit_info_1(ghcb, exit_info_1)
         => ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
         ## 设置ghcb pa 到msr
         => sev_es_wr_ghcb_msr(__pa(ghcb))
            => native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
         => VMGEXIT()
            ### ???? 实际上是VMGEXIT指令 rep + vmmcall 编码
            => asm volatile("rep; vmmcall\n\r")
=> __set_put_ghcb(&state)
=> if op == SNP_PAGE_STATE_PRIVATE:
   ## 如果是要变为private, 需要对新映射的page,重新做验证
   => pvalidate_pages()

enc_dec_hypercall

1
2
3
4
5
6
7
enc_dec_hypercall
=> foreach pfn (may be huge)
   => notify_page_enc_status_changed
      => kvm_sev_hc_page_enc_status
         => kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages,
                          KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K)
            => vmmcall

sev_handle_vmgexit(kvm)

1
2
3
4
5
6
7
8
sev_handle_vmgexit
=> case SVM_VMGEXIT_PSC:
   => setup_vmgexit_scratch()
   => snp_begin_psc()
   => switch(entry_state.operation)
      => case VMGEXIT_PSC_OP_PRIVATE, VMGEXIT_PSC_OP_SHARED
         => vcpu->run->exit_reason = KVM_EXIT_HYPERCALL;
         => vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE;

csv

reserve memory

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
early_csv_reserve_mem
  csv_cma_reserve_mem
  |-> csv_smr = memblock_alloc_node(nr_node_ids)
      |-> for_each_node (node)
          |-> size = csv_early_percent_memory_on_node(node)
              ## 每个numanode 一个 array
              |-> struct cma_array *array;
              ## 然后整个 numa reserve 空间, 通过 1<<CSV_CMA_SHIFT 划分
              ## (PUD_SHIFT)分割,方便csv预留
              |-> count = DIV_ROUND_UP(size, 1 << CSV_CMA_SHIFT);
              |-> cma_array_size = count * sizeof(*csv_cma) + sizeof(*array);
              |-> array = memblock_alloc_node(cma_array_size, SMP_CACHE_BYTES, NUMA_NO_NODE);
              |-> csv_contiguous_pernuma_area[node] = array;
              ## 调用cma相关接口预留内存
              |-> for(i = 0; i < count; i++)
                  |-> csv_cma = &array->csv_cma[i];
                  |-> ret = cma_declare_contiguous_nid(0, CSV_CMA_SIZE, 0,
                           1 << CSV_MR_ALIGN_BITS, PMD_SHIFT - PAGE_SHIFT,
                           false, name, &(csv_cma->cma), node);
                  ## 比较每个csv_cma, 找到其最低的地址和最高的地址 -- [start,
                  ## end]

                  ## 并且记录最大的间隙
                  |-> spanned_size = end - start;
                  |-> if (spanned_size > max_spanned_size)
                      -- max_spanned_size = spanned_size;
          ## 将start end 赋值到 csv_smr
          |-> csv_smr[node].start = start
          |-> csv_smr[node].start = end
  ## 设置 smr_entry_shift, 这里说明 smr 数量为 NUM_SMR_ENTRIES 8192 个
  |-> csv_set_smr_entry_shift(ilog2(max_spanned_size / NUM_SMR_ENTRIES - 1) + 1);
      |-> smr_entry_shift = max_t(unsigned int, shift, MIN_SMR_ENTRY_SHIFT);

declare SMR, SMCR

猜测全称 secure Secure Memory Region, Secure Memory Control Register ```sh module_init(sp_mod_init) sp_mod_init psp_pci_init sev_pci_init |-> if (is_vendor_hygon() && boot_cpu_has(X86_FEATURE_CSV3)) csv_platform_cmd_set_secure_memory_region(sev, &error);

csv_platform_cmd_set_secure_memory_region

将csv_smr中的所有的region,通过 CSV3_CMD_SET_SMR 传递给 csv fw

|-> hygon_psp_hooks.sev_do_cmd(CSV3_CMD_SET_SMR, csv_smr)

从 cma 中分配 1 « CSV_MR_ALIGN_BITS(28) 内存 ,256M

##

这里,如果是按照 AMDSEV的 RMP 设计, 假设一个RMP page 可以容纳256 entry

那一共有

##

entry_num = (256 * 1024 / 4) * 256

mem_size = entry_num * 4 / 1024 /1024 = 64G

##

也就是覆盖64G内存, 内存覆盖率远远不够

##

所以其单位就不是page_size, 而是 1 « smr_entry_shift

##

有 NUM_SMR_ENTRIES (8192) 区域。假设1T内存

每个区域有 1 * 1024 * 1024 (M) / 8192 = 128M 内存

##

每个区域有 32K(8 page) SMCR

256M / 8192 = 256 * 1024 (k) / 8192 = 32K

|-> cmd_set_smcr->base_address = csv_alloc_from_contiguous(1« CSV_MR_ALIGN_BITS)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
### csv3_set_guest_private_memory
```sh
csv3_set_guest_private_memory
## 查看guest 内存所在的numanode
|-> for_each_set_bit(i, &csv->nodemask, BITS_PER_LONG)
      node_set(i, nodemask)

## 计算分配内存大小,注意,这里包含了页表大小
## 引用这段代码注释:
##
##  NPT secure memory size
## 
##  PTEs_entries = nr_pages
##  PDEs_entries = nr_pages / 512
##  PDPEs_entries = nr_pages / (512 * 512)
##  PML4Es_entries = nr_pages / (512 * 512 * 512)
## 
##  Totals_entries = nr_pages + nr_pages / 512 + nr_pages / (512 * 512) +
##       nr_pages / (512 * 512 * 512) <= nr_pages + nr_pages / 256
## 
##  Total_NPT_size = (Totals_entries / 512) * PAGE_SIZE = ((nr_pages +
##       nr_pages / 256) / 512) * PAGE_SIZE = nr_pages * 8 + nr_pages / 32
##       <= nr_pages * 9
##
## 这里分配时,也是按照 smr_entry_shift 进行分配
|-> size = ALIGN((nr_pages << PAGE_SHIFT), 1UL << smr_entry_shift) +
        ALIGN(nr_pages * 9, 1UL << smr_entry_shift);
|-> nr_smr = size >> smr_entry_shift;
|-> for(i = 0; i < nr_smr; i++)
    |-> smr = kzalloc(sizeof(*smr), GFP_KERNEL_ACCOUNT);
    ## 以 smr_entry_shift 颗粒分配连续内存
    |-> smr->hpa = csv_alloc_from_contiguous((1UL << smr_entry_shift),
                    nodemask_ptr,
                    get_order(1 << smr_entry_shift));
    ## 记录该内存信息到一个数组
    |-> regions[count].size = (1UL << smr_entry_shift);
    |-> regions[count].base_address = smr->hpa;
    |-> count++
    ## 当数组大小超过一个PAGE_SIZE时, 将该区域 调用 SET_GUEST_PRIVATE_MEMORY
    ## 提交给 csv fw
    |-> if (count >= (PAGE_SIZE / sizeof(regions[0])) || (remainder == count)) {
        ## 封装命令
        |-> set_guest_private_memory->nregions = count;
        |-> set_guest_private_memory->handle = sev->handle;
        |-> set_guest_private_memory->regions_paddr = __sme_pa(regions);
        ## 提交命令
        |-> ret = hygon_kvm_hooks.sev_issue_cmd(kvm,
               CSV3_CMD_SET_GUEST_PRIVATE_MEMORY,
               set_guest_private_memory, &argp->error);

QEMU

1
2
3
kvm_cpu_exec
|-> case KVM_EXIT_MEMORY_FAULT:
    |-> kvm_convert_memory

参考commit

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
[PATCH 0/4] x86: Cleanup and extend computing computing API
https://lore.kernel.org/all/20220222185740.26228-1-kirill.shutemov@linux.intel.com/

Add AMD Secure Nested Paging (SEV-SNP) Guest Support
https://lore.kernel.org/all/20220307213356.2797205-1-brijesh.singh@amd.com/


commit dc3f3d2474b80eaee8be89f4c5eb344f10648f42
Author: Brijesh Singh <brijesh.singh@amd.com>
Date:   Thu Feb 24 10:56:01 2022 -0600

    x86/mm: Validate memory when changing the C-bit

    Add the needed functionality to change pages state from shared
    to private and vice-versa using the Page State Change VMGEXIT as
    documented in the GHCB spec.
This post is licensed under CC BY 4.0 by the author.