6.828 Lab3 PartA (User Environments and Exception Handling) Writeup

User Environments and Exception Handling

Exercise 1

在JOS里面， struct Env 相当于是 linux kernel 里面的 task_struct，用于表示一个task。

struct Env {
  struct Trapframe env_tf;  // Saved registers
  struct Env *env_link;   // Next free Env
  envid_t env_id;     // Unique environment identifier
  envid_t env_parent_id;    // env_id of this env's parent
  enum EnvType env_type;    // Indicates special system environments
  unsigned env_status;    // Status of the environment
  uint32_t env_runs;    // Number of times environment has run

  // Address space
  pde_t *env_pgdir;   // Kernel virtual address of page dir
};

其中的 env_tf 用于保存当这个env的中间状态，恢复这个trapframe 就能够按照之前中断的地方继续执行了。如，进程切换的时候，env_tf 中保存着 task 运行所需要的各种寄存器的值。其中的 packed 表示取消编译器对结构体的优化对齐，取消优化对齐，结构体的访问速度可能会降低，这样做是为了和 Intel IA32 Manual 里面的 trapframe 保持一致。

struct PushRegs {
  /* registers as pushed by pusha */
  uint32_t reg_edi;
  uint32_t reg_esi;
  uint32_t reg_ebp;
  uint32_t reg_oesp;    /* Useless */
  uint32_t reg_ebx;
  uint32_t reg_edx;
  uint32_t reg_ecx;
  uint32_t reg_eax;
} __attribute__((packed));

struct Trapframe {
  struct PushRegs tf_regs;
  uint16_t tf_es;
  uint16_t tf_padding1;
  uint16_t tf_ds;
  uint16_t tf_padding2;
  uint32_t tf_trapno;
  /* below here defined by x86 hardware */
  uint32_t tf_err;
  uintptr_t tf_eip;
  uint16_t tf_cs;
  uint16_t tf_padding3;
  uint32_t tf_eflags;
  /* below here only when crossing rings, such as from user to kernel */
  uintptr_t tf_esp;
  uint16_t tf_ss;
  uint16_t tf_padding4;
} __attribute__((packed));

为了管理所有的Env，在JOS里面，维护了三个变量:

struct Env *envs = NULL;    // All environments
struct Env *curenv = NULL;    // The current env
static struct Env *env_free_list; // Free environment list

在 JOS 中，所有的 struct Env 都是通过 envs 这个全局的变量维护的，envs 是一个 struct Env 的数组。在 mem_init()的时候，同时初始化 envs。同时为了能够让 user 态能够访问自己的 Env ，所以需要在 kern_pgdir 中映射 UENVS 到 envs，同时权限是对 user RO 的。

// Make 'envs' point to an array of size 'NENV' of 'struct Env'.
// LAB 3: Your code here.
envs = (struct Env *)boot_alloc(sizeof(struct Env) * NENV);

// Map the 'envs' array read-only by the user at linear address UENVS
// (ie. perm = PTE_U | PTE_P).
// Permissions:
//    - the new image at UENVS  -- kernel R, user R
//    - envs itself -- kernel RW, user NONE
// LAB 3: Your code here.
boot_map_region(kern_pgdir, UENVS, PTSIZE, PADDR(envs), PTE_U);

Exercise 2

2.1 env_init()

在2中已经分配的 envs，env_init() 作用就是对 envs 数组 env_free_list 初始化。为了保证第一次调用 env_alloc() 的时候，能够返回的是 envs[0], 所以注意循环的顺序。

void
env_init(void)
{
  // Set up envs array
  // LAB 3: Your code here.
  memset((void*) envs, 0, sizeof(struct Env) * NENV);
  size_t i = 0;
  env_free_list = envs;

  for ( ;i+1 < NENV;i ++) {
    envs[i].env_id = 0;
    envs[i].env_status = ENV_FREE;
    envs[i].env_link = &envs[i+1];
  }
  
  envs[i].env_id = 0;
  envs[i].env_status = ENV_FREE;
  envs[i].env_link = NULL;

  // Per-CPU part of the initialization
  env_init_percpu(); // 初始化了每个CPU的 GDT，和一些段寄存器的值( gs, fs, es, ds, ss, cs)，同时将 LDT 清零。
}

2.2 env_setup_vm()

对于一个给定的 env ，初始化它的 env_pgdir，也就是虚拟地址空间。在这里注意需要将 kern_pgdir 作为模板初始化 env_pgdir ，这样做的原因是为了能够当在 kernel 态，访问 user 态的一个虚拟地址的时候，只要将页表切换为 user 的页表，同时能够保证 kernel 的代码能够继续执行。注意 kern_pgidr 权限的控制。

static int
env_setup_vm(struct Env *e)
{
  int i;
  struct PageInfo *p = NULL;

  if (!(p = page_alloc(ALLOC_ZERO)))
    return -E_NO_MEM;

  p->pp_ref ++;
  e->env_pgdir = page2kva(p);
  memmove(e->env_pgdir, kern_pgdir, PGSIZE);

  e->env_pgdir[PDX(UVPT)] = PADDR(e->env_pgdir) | PTE_P | PTE_U;

  return 0;
}

2.3 region_alloc()

在给定的 env 上，分配物理页，映射到给定的虚拟地址上。物理页不用 memset 为0。

static void
region_alloc(struct Env *e, void *va, size_t len)
{
  va = ROUNDDOWN(va, PGSIZE);
  void * va_end;
  va_end = va + ROUNDUP(len, PGSIZE);
  struct PageInfo *pp ;
  for(; va < va_end ;va += PGSIZE) {
    pp = page_alloc(0);
    if (!pp) 
      panic("region_alloc : page_alloc() out of memory.\n");
    int ret = page_insert(e->env_pgdir, pp, va, PTE_U | PTE_W);
    if (ret < 0) 
      panic("region_alloc : page_insert() %e.\n", ret);
  }

}

2.4 load_icode()

将一个ELF文件 load 到内存中,关于 ELF 文件的格式,可以参考这里.ELF 文件是有一个elfhdr，里面记录了Proghdr。elfhdr->e_phnum 表示有多少个Proghdr，最开始的Proghdr 的为值通过 elfhdr->e_phoff 这个偏移量给出。对于每个 Proghdr，只有 p_type 是 ELF_PROG_LOAD 的表示可以 load 到内存中。同时 Proghdr->p_memsz 表示占用内存的大小， Proghdr->p_filesz 表示在Proghdr的大小，这两个可以不一样，memsz - filesz 就是 BSS 段的大小，所以在初始化的时候，需要将 BSS 段清零。 Proghdr->p_va 表示这段对应的虚拟地址。初始化 va 对应的 PTE。

static void
load_icode(struct Env *e, uint8_t *binary, size_t size)
{
  struct Proghdr *ph, *eph;
  struct Elf *elfhdr;
  struct PageInfo *pp;
  
  elfhdr = (struct Elf *) binary;
  if (elfhdr->e_magic != ELF_MAGIC)
    panic("load_icode : not an valid ELF.\n");
  
  ph = (struct Proghdr *) (binary + elfhdr->e_phoff);
  eph = ph + elfhdr->e_phnum;

  // 因为memmove 操作都需要将一段kernel的内存复制到 user 的一个虚拟地址上
  // 所以在这里先将页表替换为 user 的页表，就能够访问 user 的虚拟地址了，
  // 又因为 user 的页表初始化的时候，模版是 kernel 的 kern_pgdir ，包含了
  // kernel 部分的页表，所以这个时候，kernel 还是可以继续执行的。
  lcr3(PADDR(e->env_pgdir));
  
  for (; ph < eph; ph++) {
    if (ph->p_type != ELF_PROG_LOAD)
      continue;
    region_alloc(e, (void*)ph->p_va, ph->p_memsz);
    memset((void *)ROUNDDOWN((uintptr_t)ph->p_va, PGSIZE), 0 ,
           ROUNDUP(ph->p_memsz, PGSIZE));
    memmove((void*)ph->p_va, binary+ph->p_offset, ph->p_filesz);
  }

  // e_entry 程序段的入口地址。
  e->env_tf.tf_eip = elfhdr->e_entry;

  // 初始化 user 的 栈。
  region_alloc(e, (void*)(USTACKTOP-PGSIZE), PGSIZE);
}

2.5 env_create()

通过 env_alloc 从 env_free_list 上分配 env 所需要的空间，初始化其中的一些状态信息（除了 env_pgdir 之外的信息）。调用 load_icode 将 ELF load 到对应的虚拟地址空间上。同时初始化对应的PTE。

void
env_create(uint8_t *binary, size_t size, enum EnvType type)
{
  // LAB 3: Your code here.
  struct Env *newenv;
  int r;
  if((r = env_alloc(&newenv, 0)) < 0 )
    panic("env_create : env_alloc %e.\n", r);
  
  load_icode(newenv, binary, size);
  newenv->env_type = type;
}

2.6 env_run()

更新 curenv，因为在第一个 env run 之前， curenv 为 NULL，需要加上判断 NULL 的情况。然后替换页表，替换各种寄存器，让新的env从上次中断的地方继续执行，调用 env_pop_tf 之后，就不会再返回，直接执行 env 的程序。

void
env_run(struct Env *e)
{
  // Step 1
  if ( curenv && curenv->env_status == ENV_RUNNING) 
    curenv->env_status = ENV_RUNNABLE;
  curenv = e;
  assert(curenv->env_status == ENV_RUNNABLE);
  curenv->env_status = ENV_RUNNING;
  curenv->env_runs += 1;
  lcr3(PADDR(curenv->env_pgdir));
  
  // Step 2
  env_pop_tf(&curenv->env_tf);
  panic("env_run not yet implemented");// should be never executed
}

env_pop_tf ：用 tf 的地址赋值给esp，从trapframe上恢复 struct PushRegs 和 ES， DS。然后跳过trapno 和 errcode，然后对于这个trap 是否又特权等级的变化，IRET 指令能够处理（参考Intel Instruction Set Reference A-M 中的IRET指令的说明)。简而言之就是，iret 会从栈上pop出 eip 和 cs，然后从这个地方开始执行。

void
env_pop_tf(struct Trapframe *tf)
{
  __asm __volatile("movl %0,%%esp\n"
    "\tpopal\n"
    "\tpopl %%es\n"
    "\tpopl %%ds\n"
    "\taddl $0x8,%%esp\n" /* skip tf_trapno and tf_errcode */
    "\tiret"
    : : "g" (tf) : "memory");
  panic("iret failed");  /* mostly to placate the compiler */
}

Exercise 4

4.1 初始化 IDT

在 trapentry.S 中初始化中断处理函数，根据是否有Error Code，中断处理函数分为两种 TRAPHANDLER_NOEC 和 TRAPHANDLER。看代码发现其本质就是初始化 trapframe 中的 tf_trapno 和 tf_err 字段，然后，跳到 _alltraps 统一处理。

#define TRAPHANDLER(name, num)            \
  .globl name;    /* define global symbol for 'name' */ \
  .type name, @function;  /* symbol type is function */   \
  .align 2;   /* align function definition */   \
  name:     /* function starts here */    \
  pushl $(num);             \
  jmp _alltraps

#define TRAPHANDLER_NOEC(name, num)         \
  .globl name;              \
  .type name, @function;            \
  .align 2;             \
  name:               \
  pushl $0;             \
  pushl $(num);             \
  jmp _alltraps

所以中断处理函数通过查 Intel 的中断那节的 Manual 可以这样：

/*
 * Lab 3: Your code here for generating entry points for the different traps.
 */
// check if has error code : http://pdos.csail.mit.edu/6.828/2012/readings/i386/s09_06.htm
// or http://pdos.csail.mit.edu/6.828/2012/readings/ia32/IA32-3A.pdf Chapter 5,
// table 5-1
TRAPHANDLER_NOEC(idt_divide, T_DIVIDE) 
TRAPHANDLER_NOEC(idt_debug, T_DEBUG) 
... 
TRAPHANDLER_NOEC(idt_irq_ide, IRQ_OFFSET + IRQ_IDE)
TRAPHANDLER_NOEC(idt_irq_error, IRQ_OFFSET + IRQ_ERROR)

对于 _alltraps 函数，因为 trapframe 的 eip , cs 及后面的字段都是硬件保存了，所以我们只要保存PushRegs 和 ds es 就可以。

.global _alltraps
_alltraps :
  // Build trap frame as an argument of trap(struct Trapframe *tf)
  // in trap.c , trapno and error code are set up by TRAPHANDLER
  // and TRAPHANDLER_NOEC
  pushl %ds
  pushl %es
  pushal
  
  // Set up data and per-cpu segment , cs and ss are set up by h/w
  movw $(GD_KD), %ax
  movw %ax, %ds
  movw %ax, %es
  movw %ax, %fs
  movw %ax, %gs

  // Call trap(struct Trapframe *tr)
  pushl %esp
  call trap
  addl $4, %esp

初始化 IDT

void
trap_init(void)
{
  extern struct Segdesc gdt[];

  // LAB 3: Your code here.
  // set up idt
  extern void idt_divide();
  extern void idt_debug();
  .... 
  extern void idt_irq_ide();
  extern void idt_irq_error();


  int i ;
  for (i = 0;i < 256 ;i ++)
    SETGATE(idt[i], 0, GD_KT, idt_default, 0);

  SETGATE(idt[T_DIVIDE], 1, GD_KT, idt_divide, 0);
  SETGATE(idt[T_DEBUG], 1, GD_KT, idt_debug, 0);
  ... 
  SETGATE(idt[IRQ_OFFSET + IRQ_IDE], 0, GD_KT, idt_irq_ide, 0);
  SETGATE(idt[IRQ_OFFSET + IRQ_ERROR], 0, GD_KT, idt_irq_error, 0);

  // Per-CPU setup 
  trap_init_percpu();
}

4.2 Question 1

不能知道是否含有 Error Code，不能够通过硬件屏蔽某个中断。

4.3 Question 2

softint 虽然 int $14，但是 14 这个中断的 DPL = 0，所以 user 没有这个权限访问这个段，就产生了 13 中断。如果 kernel 允许user直接产生 pagefault，主要看kernel怎么处理 user 产生的 pagefault 了，如果监测到 user 在访问一个不能够访问的地址，产生PF，另外user是没有权限写 CR2 寄存器的，但是CR2寄存器里面可能有值，kernel如果处理不当，会导致user得到这个地址的访问权限。

– EOF –