From b0de416682d74ae35fb68ad5be3021694260d36a Mon Sep 17 00:00:00 2001 From: Michael LeMay Date: Mon, 21 Mar 2016 17:13:36 -0700 Subject: [PATCH 1/5] x86: Adjust UEFI header size The UEFI GenFw program inserts headers ahead of the code in the UEFI binary. The linker script adjusts the starting address of the .text section to account for that. This prevents the symbols from being perturbed. This patch accounts for a recent change in the size of the headers added by the GenFw program. --- cpu/x86/quarkX1000.ld | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpu/x86/quarkX1000.ld b/cpu/x86/quarkX1000.ld index ba437d540..a7f2c2555 100644 --- a/cpu/x86/quarkX1000.ld +++ b/cpu/x86/quarkX1000.ld @@ -37,20 +37,20 @@ SECTIONS { OS-Dev Wiki says it is common for kernels to start at 1M. Addresses before that are used by BIOS/EFI, the bootloader and memory-mapped I/O. - The UEFI GenFw program inserts a 0x240-byte offset between the image base and + The UEFI GenFw program inserts a 0x220-byte offset between the image base and the .text section. We add that same offset here to align the symbols in the UEFI DLL with those in the final UEFI binary to make debugging easier. We also apply 32-byte alignments to sections rather than more conventional 4K-byte alignments to avoid symbols being shifted from the intermediate DLL to the final UEFI image as would occur if the GenFw program shifted the .text section - from a higher, 4K-aligned offset to the 0x240-byte offset from the image base. + from a higher, 4K-aligned offset to the 0x220-byte offset from the image base. Such shifting may make debugging more difficult by preventing the DLL from being a directly-useful source of symbol information. The debugging symbols are not included in the final UEFI image. The GenFw program uses a minimum section alignment of 32 bytes, so smaller alignment granularities may also result in symbol perturbation. */ - . = 1M + 0x240; + . = 1M + 0x220; .text ALIGN (32) : { From 39082530385deadc461e6ae8975b99949ab0d53c Mon Sep 17 00:00:00 2001 From: Michael LeMay Date: Mon, 10 Aug 2015 08:34:02 -0700 Subject: [PATCH 2/5] x86: Add support for (paging-based) protection domains This patch implements a simple, lightweight form of protection domains using a pluggable framework. Currently, the following plugin is available: - Flat memory model with paging. The overall goal of a protection domain implementation within this framework is to define a set of resources that should be accessible to each protection domain and to prevent that protection domain from accessing other resources. The details of each implementation of protection domains may differ substantially, but they should all be guided by the principle of least privilege. However, that idealized principle is balanced against the practical objectives of limiting the number of relatively time-consuming context switches and minimizing changes to existing code. For additional information, please refer to cpu/x86/mm/README.md. This patch also causes the C compiler to be used as the default linker and assembler. --- cpu/x86/Makefile.x86_common | 6 +- cpu/x86/Makefile.x86_quarkX1000 | 35 +- cpu/x86/bootstrap_quarkX1000.S | 15 +- cpu/x86/dma.h | 8 + cpu/x86/drivers/legacy_pc/pci.c | 77 ++- cpu/x86/drivers/legacy_pc/pci.h | 27 +- cpu/x86/drivers/legacy_pc/shared-isr.c | 10 +- cpu/x86/drivers/legacy_pc/uart-16x50.c | 63 +- cpu/x86/drivers/quarkX1000/eth.c | 174 ++++-- cpu/x86/drivers/quarkX1000/gpio.c | 46 +- cpu/x86/drivers/quarkX1000/i2c-registers.h | 2 + cpu/x86/drivers/quarkX1000/i2c.c | 55 +- cpu/x86/drivers/quarkX1000/imr-conf.c | 13 +- cpu/x86/drivers/quarkX1000/msg-bus.c | 40 +- cpu/x86/drivers/quarkX1000/msg-bus.h | 2 + cpu/x86/drivers/quarkX1000/uart.c | 14 +- cpu/x86/helpers.h | 11 + cpu/x86/init/common/cpu.c | 75 ++- cpu/x86/init/common/cpu.h | 4 +- cpu/x86/init/common/gdt.c | 144 ++--- cpu/x86/init/common/gdt.h | 30 +- cpu/x86/init/common/idt.c | 19 +- cpu/x86/init/common/idt.h | 8 +- cpu/x86/init/common/interrupt.h | 63 +- cpu/x86/mm/README.md | 669 +++++++++++++++++++++ cpu/x86/mm/gdt-layout.h | 105 ++++ cpu/x86/mm/paging-prot-domains.c | 297 +++++++++ cpu/x86/mm/paging-prot-domains.h | 114 ++++ cpu/x86/mm/paging.h | 65 ++ cpu/x86/mm/prot-domains.c | 69 +++ cpu/x86/mm/prot-domains.h | 275 +++++++++ cpu/x86/mm/segmentation.h | 131 ++++ cpu/x86/mm/stacks.c | 40 ++ cpu/x86/mm/stacks.h | 92 +++ cpu/x86/mm/syscalls-int-asm.S | 87 +++ cpu/x86/mm/syscalls-int.c | 298 +++++++++ cpu/x86/mm/syscalls-int.h | 109 ++++ cpu/x86/mm/syscalls.h | 115 ++++ cpu/x86/mm/tss.c | 65 ++ cpu/x86/mm/tss.h | 70 +++ cpu/x86/quarkX1000.ld | 12 +- cpu/x86/quarkX1000_dma.ld | 6 +- cpu/x86/quarkX1000_paging.ld | 204 +++++++ cpu/x86/uefi/bootstrap_uefi.c | 7 +- examples/galileo/Makefile | 4 + platform/galileo/README.md | 3 + platform/galileo/contiki-main.c | 68 ++- platform/galileo/net/eth-conf.c | 7 +- 48 files changed, 3558 insertions(+), 295 deletions(-) create mode 100644 cpu/x86/mm/README.md create mode 100644 cpu/x86/mm/gdt-layout.h create mode 100644 cpu/x86/mm/paging-prot-domains.c create mode 100644 cpu/x86/mm/paging-prot-domains.h create mode 100644 cpu/x86/mm/paging.h create mode 100644 cpu/x86/mm/prot-domains.c create mode 100644 cpu/x86/mm/prot-domains.h create mode 100644 cpu/x86/mm/segmentation.h create mode 100644 cpu/x86/mm/stacks.c create mode 100644 cpu/x86/mm/stacks.h create mode 100644 cpu/x86/mm/syscalls-int-asm.S create mode 100644 cpu/x86/mm/syscalls-int.c create mode 100644 cpu/x86/mm/syscalls-int.h create mode 100644 cpu/x86/mm/syscalls.h create mode 100644 cpu/x86/mm/tss.c create mode 100644 cpu/x86/mm/tss.h create mode 100644 cpu/x86/quarkX1000_paging.ld diff --git a/cpu/x86/Makefile.x86_common b/cpu/x86/Makefile.x86_common index d6e692e9a..c27626815 100644 --- a/cpu/x86/Makefile.x86_common +++ b/cpu/x86/Makefile.x86_common @@ -3,8 +3,10 @@ CONTIKI_CPU_DIRS += . init/common CONTIKI_SOURCEFILES += gdt.c helpers.S idt.c cpu.c CC = gcc -LD = gcc -AS = as +LD = $(CC) +# Use gcc to invoke the assembler so that the preprocessor will be run on each +# file first, enabling us to use macros within assembly language files: +AS = $(CC) OBJCOPY = objcopy SIZE = size STRIP = strip diff --git a/cpu/x86/Makefile.x86_quarkX1000 b/cpu/x86/Makefile.x86_quarkX1000 index e6e794206..4a7668cc6 100644 --- a/cpu/x86/Makefile.x86_quarkX1000 +++ b/cpu/x86/Makefile.x86_quarkX1000 @@ -1,13 +1,42 @@ +# See mm/README.md for a description of available settings: +X86_CONF_PROT_DOMAINS ?= none + include $(CONTIKI)/cpu/x86/Makefile.x86_common -CONTIKI_CPU_DIRS += drivers/legacy_pc drivers/quarkX1000 init/legacy_pc +CONTIKI_CPU_DIRS += drivers/legacy_pc drivers/quarkX1000 init/legacy_pc net mm CONTIKI_SOURCEFILES += bootstrap_quarkX1000.S rtc.c pit.c pic.c irq.c nmi.c pci.c uart-16x50.c uart.c gpio.c i2c.c eth.c shared-isr.c CONTIKI_SOURCEFILES += imr.c msg-bus.c +CONTIKI_SOURCEFILES += stacks.c + +ifneq ($(X86_CONF_PROT_DOMAINS),none) +CONTIKI_SOURCEFILES += prot-domains.c $(X86_CONF_PROT_DOMAINS)-prot-domains.c imr-conf.c + +ifeq ($(X86_CONF_PROT_DOMAINS),paging) +LINKERSCRIPT_SFX = _paging +X86_CONF_SYSCALLS_INT = 1 +ifeq ($(X86_CONF_USE_INVLPG),1) +CFLAGS += -DX86_CONF_USE_INVLPG +endif +# This matches the definition of X86_CONF_PROT_DOMAINS__PAGING in prot-domains.h: +CFLAGS += -DX86_CONF_PROT_DOMAINS=1 +else +$(error Unrecognized setting for X86_CONF_PROT_DOMAINS: \ + $(X86_CONF_PROT_DOMAINS). See cpu/x86/mm/README.md for \ + descriptions of available settings) +endif + +ifeq ($(X86_CONF_SYSCALLS_INT),1) +CONTIKI_SOURCEFILES += syscalls-int-asm.S tss.c +endif + +endif CFLAGS += -m32 -march=i586 -mtune=i586 -LDFLAGS += -m32 -Xlinker -T -Xlinker $(CONTIKI)/cpu/x86/quarkX1000.ld -ASFLAGS += --32 -march=i586 -mtune=i586 +LDFLAGS += -m32 -Xlinker -T -Xlinker $(CONTIKI)/cpu/x86/quarkX1000$(LINKERSCRIPT_SFX).ld +# The C compiler is used to invoke the assembler, so the CFLAGS should be +# passed to it on the command line: +ASFLAGS = -c $(CFLAGS) ifeq ($(X86_CONF_RESTRICT_DMA),1) CONTIKI_SOURCEFILES += imr-conf.c diff --git a/cpu/x86/bootstrap_quarkX1000.S b/cpu/x86/bootstrap_quarkX1000.S index 8def35843..4211e51a3 100644 --- a/cpu/x86/bootstrap_quarkX1000.S +++ b/cpu/x86/bootstrap_quarkX1000.S @@ -28,8 +28,7 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. */ -# Kernel -.set STACK_SIZE, 8192 +#include "stacks.h" # Multiboot .set MAGIC_NUMBER, 0x1BADB002 @@ -42,15 +41,9 @@ .long FLAGS .long CHECKSUM -# Reserve space for the C stack. -.lcomm c_stack, STACK_SIZE - -.section .text +.section .boot_text .global start start: cli - movl $(c_stack + STACK_SIZE), %esp - call main - - /* We're not expected to return from main(). But if we do we halt */ - call halt + mov $(stacks_main + STACKS_SIZE_MAIN), %esp + call cpu_boot_stage0 diff --git a/cpu/x86/dma.h b/cpu/x86/dma.h index a83ccd2eb..b0122fcdb 100644 --- a/cpu/x86/dma.h +++ b/cpu/x86/dma.h @@ -31,10 +31,18 @@ #ifndef CPU_X86_DMA_H_ #define CPU_X86_DMA_H_ +#include "prot-domains.h" + #ifdef X86_CONF_RESTRICT_DMA #define ATTR_BSS_DMA __attribute__((section(".dma_bss"))) #else +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__NONE #define ATTR_BSS_DMA +#else +#define ATTR_BSS_DMA ATTR_BSS_META +#endif #endif +extern int _sbss_dma_addr, _ebss_dma_addr; + #endif /* CPU_X86_DMA_H_ */ diff --git a/cpu/x86/drivers/legacy_pc/pci.c b/cpu/x86/drivers/legacy_pc/pci.c index 0507ad231..e94c9ecbe 100644 --- a/cpu/x86/drivers/legacy_pc/pci.c +++ b/cpu/x86/drivers/legacy_pc/pci.c @@ -32,12 +32,15 @@ #include "pci.h" #include "helpers.h" +#include "syscalls.h" /* I/O port for PCI configuration address */ #define PCI_CONFIG_ADDR_PORT 0xCF8 /* I/O port for PCI configuration data */ #define PCI_CONFIG_DATA_PORT 0xCFC +PROT_DOMAINS_ALLOC(dom_client_data_t, root_complex_drv); + /*---------------------------------------------------------------------------*/ /* Initialize PCI configuration register address in preparation for accessing * the specified register. @@ -101,40 +104,34 @@ pci_command_enable(pci_config_addr_t addr, uint32_t flags) * \param agent Interrupt Queue Agent to be used, IRQAGENT[0:3]. * \param pin Interrupt Pin Route to be used, INT[A:D]. * \param pirq PIRQ to be used, PIRQ[A:H]. - * \return Returns 0 on success and a negative number otherwise. */ -int -pci_irq_agent_set_pirq(IRQAGENT agent, INTR_PIN pin, PIRQ pirq) +SYSCALLS_DEFINE_SINGLETON(pci_irq_agent_set_pirq, + root_complex_drv, + IRQAGENT agent, INTR_PIN pin, PIRQ pirq) { - pci_config_addr_t pci; uint16_t value; uint32_t rcba_addr, offset = 0; + rcba_addr = PROT_DOMAINS_MMIO(root_complex_drv); + assert(agent >= IRQAGENT0 && agent <= IRQAGENT3); assert(pin >= INTA && pin <= INTD); assert(pirq >= PIRQA && pirq <= PIRQH); - pci.raw = 0; - pci.bus = 0; - pci.dev = 31; - pci.func = 0; - pci.reg_off = 0xF0; /* Root Complex Base Address Register */ - - /* masked to clear non-address bits. */ - rcba_addr = pci_config_read(pci) & ~0x3FFF; - switch(agent) { case IRQAGENT0: - if (pin != INTA) - return -1; + if(pin != INTA) { + halt(); + } offset = 0x3140; break; case IRQAGENT1: offset = 0x3142; break; case IRQAGENT2: - if (pin != INTA) - return -1; + if(pin != INTA) { + halt(); + } offset = 0x3144; break; case IRQAGENT3: @@ -163,8 +160,6 @@ pci_irq_agent_set_pirq(IRQAGENT agent, INTR_PIN pin, PIRQ pirq) } *(uint16_t*)(rcba_addr + offset) = value; - - return 0; } /*---------------------------------------------------------------------------*/ /** @@ -231,13 +226,51 @@ pci_pirq_set_irq(PIRQ pirq, uint8_t irq, uint8_t route_to_legacy) * firmware. * \param c_this Structure that will be initialized to represent the driver. * \param pci_addr PCI base address of device. + * \param mmio_sz Size of MMIO region. * \param meta Base address of optional driver-defined metadata. + * \param meta_sz Size of optional driver-defined metadata. */ void -pci_init(pci_driver_t *c_this, pci_config_addr_t pci_addr, uintptr_t meta) +pci_init(pci_driver_t *c_this, + pci_config_addr_t pci_addr, + size_t mmio_sz, + uintptr_t meta, + size_t meta_sz) { + uintptr_t mmio; + /* The BAR value is masked to clear non-address bits. */ - c_this->mmio = pci_config_read(pci_addr) & ~0xFFF; - c_this->meta = meta; + mmio = pci_config_read(pci_addr) & ~0xFFF; + + prot_domains_reg(c_this, mmio, mmio_sz, meta, meta_sz, false); +} +/*---------------------------------------------------------------------------*/ +/** + * \brief Initialize the PCI root complex driver. + */ +void +pci_root_complex_init(void) +{ + uint32_t rcba_addr; + pci_config_addr_t pci = { .raw = 0 }; + pci.dev = 31; + pci.reg_off = 0xF0; /* Root Complex Base Address Register */ + + /* masked to clear non-address bits. */ + rcba_addr = pci_config_read(pci) & ~0x3FFF; + + PROT_DOMAINS_INIT_ID(root_complex_drv); + prot_domains_reg(&root_complex_drv, rcba_addr, 0x4000, 0, 0, false); + SYSCALLS_INIT(pci_irq_agent_set_pirq); + SYSCALLS_AUTHZ(pci_irq_agent_set_pirq, root_complex_drv); +} +/*---------------------------------------------------------------------------*/ +/** + * \brief Prevent further invocations of pci_irq_agent_set_pirq. + */ +void +pci_root_complex_lock(void) +{ + SYSCALLS_DEAUTHZ(pci_irq_agent_set_pirq, root_complex_drv); } /*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/drivers/legacy_pc/pci.h b/cpu/x86/drivers/legacy_pc/pci.h index c938f9c6c..fff53a048 100644 --- a/cpu/x86/drivers/legacy_pc/pci.h +++ b/cpu/x86/drivers/legacy_pc/pci.h @@ -33,6 +33,8 @@ #include #include "helpers.h" +#include +#include "prot-domains.h" /** PCI configuration register identifier for Base Address Registers */ #define PCI_CONFIG_REG_BAR0 0x10 @@ -98,22 +100,23 @@ uint32_t pci_config_read(pci_config_addr_t addr); void pci_config_write(pci_config_addr_t addr, uint32_t data); void pci_command_enable(pci_config_addr_t addr, uint32_t flags); -/** - * PCI device driver instance with an optional single MMIO range and optional - * metadata. - */ -typedef struct pci_driver { - uintptr_t mmio; /**< MMIO range base address */ - uintptr_t meta; /**< Driver-defined metadata base address */ -} pci_driver_t; +typedef dom_client_data_t pci_driver_t; -void pci_init(pci_driver_t *c_this, pci_config_addr_t pci_addr, uintptr_t meta); -int pci_irq_agent_set_pirq(IRQAGENT agent, INTR_PIN pin, PIRQ pirq); +void pci_init(pci_driver_t *c_this, + pci_config_addr_t pci_addr, + size_t mmio_sz, + uintptr_t meta, + size_t meta_sz); +void pci_irq_agent_set_pirq(IRQAGENT agent, INTR_PIN pin, PIRQ pirq); void pci_pirq_set_irq(PIRQ pirq, uint8_t irq, uint8_t route_to_legacy); +void pci_root_complex_init(void); +void pci_root_complex_lock(void); #define PCI_MMIO_READL(c_this, dest, reg_addr) \ - dest = *((volatile uint32_t *)((c_this).mmio + (reg_addr))) + dest = *((volatile uint32_t *) \ + (((uintptr_t)PROT_DOMAINS_MMIO(c_this)) + (reg_addr))) #define PCI_MMIO_WRITEL(c_this, reg_addr, src) \ - *((volatile uint32_t *)((c_this).mmio + (reg_addr))) = (src) + *((volatile uint32_t *) \ + (((uintptr_t)PROT_DOMAINS_MMIO(c_this)) + (reg_addr))) = (src) #endif /* CPU_X86_DRIVERS_LEGACY_PC_PCI_H_ */ diff --git a/cpu/x86/drivers/legacy_pc/shared-isr.c b/cpu/x86/drivers/legacy_pc/shared-isr.c index e13c7ab93..f04f6aba8 100644 --- a/cpu/x86/drivers/legacy_pc/shared-isr.c +++ b/cpu/x86/drivers/legacy_pc/shared-isr.c @@ -62,7 +62,8 @@ shared_isr_init(void) void shared_isr_stub(void); __asm__ __volatile__ ( - ISR_STUB("shared_isr_stub", 0, "shared_handler") + ISR_STUB("shared_isr_stub", 0, "shared_handler", 0) + : ); while(client < &_edata_shared_isr) { @@ -91,11 +92,10 @@ shared_isr_init(void) (client->pin == consistency_check_client->pin) && (client->pirq == consistency_check_client->pirq)); } else { - idt_set_intr_gate_desc(PIC_INT(client->irq), (uint32_t)shared_isr_stub); + idt_set_intr_gate_desc(PIC_INT(client->irq), (uint32_t)shared_isr_stub, + GDT_SEL_CODE_INT, PRIV_LVL_INT); - assert(pci_irq_agent_set_pirq(client->agent, - client->pin, - client->pirq) == 0); + pci_irq_agent_set_pirq(client->agent, client->pin, client->pirq); pci_pirq_set_irq(client->pirq, client->irq, 1); diff --git a/cpu/x86/drivers/legacy_pc/uart-16x50.c b/cpu/x86/drivers/legacy_pc/uart-16x50.c index 296719faa..d1f2c498d 100644 --- a/cpu/x86/drivers/legacy_pc/uart-16x50.c +++ b/cpu/x86/drivers/legacy_pc/uart-16x50.c @@ -28,9 +28,12 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "uart-16x50.h" #include #include "helpers.h" +#include "paging.h" +#include "prot-domains.h" +#include "syscalls.h" +#include "uart-16x50.h" /* Refer to Intel Quark SoC X1000 Datasheet, Chapter 18 for more details on * UART operation. @@ -64,24 +67,22 @@ typedef struct uart_16x50_regs { volatile uint32_t mcr, lsr, msr, scr, usr, htx, dmasa; } uart_16x50_regs_t; -/*---------------------------------------------------------------------------*/ -/** - * \brief Initialize an MMIO-programmable 16X50 UART. - * \param c_this Structure that will be initialized to represent the device. - * \param pci_addr PCI address of device. - * \param dl Divisor setting to configure the baud rate. +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING +/* When paging-based protection domains are in use, at least one page of memory + * must be reserved to facilitate access to the MMIO region, since that is the + * smallest unit of memory that can be managed with paging: */ -void -uart_16x50_init(uart_16x50_driver_t *c_this, - pci_config_addr_t pci_addr, - uint16_t dl) -{ - /* This assumes that the UART had an MMIO range assigned to it by the - * firmware during boot. - */ - pci_init(c_this, pci_addr, 0); +#define UART_MMIO_SZ MIN_PAGE_SIZE +#else +#define UART_MMIO_SZ sizeof(uart_16x50_regs_t) +#endif - uart_16x50_regs_t *regs = (uart_16x50_regs_t *)c_this->mmio; +void uart_16x50_setup(uart_16x50_driver_t c_this, uint16_t dl); + +/*---------------------------------------------------------------------------*/ +SYSCALLS_DEFINE(uart_16x50_setup, uart_16x50_driver_t c_this, uint16_t dl) +{ + uart_16x50_regs_t *regs = (uart_16x50_regs_t *)PROT_DOMAINS_MMIO(c_this); /* Set the DLAB bit to enable access to divisor settings. */ regs->lcr = UART_LCR_7_DLAB; @@ -109,10 +110,9 @@ uart_16x50_init(uart_16x50_driver_t *c_this, * This procedure will block indefinitely until the UART is ready * to accept the character to be transmitted. */ -void -uart_16x50_tx(uart_16x50_driver_t c_this, uint8_t c) +SYSCALLS_DEFINE(uart_16x50_tx, uart_16x50_driver_t c_this, uint8_t c) { - struct uart_16x50_regs *regs = (uart_16x50_regs_t *)c_this.mmio; + uart_16x50_regs_t *regs = (uart_16x50_regs_t *)PROT_DOMAINS_MMIO(c_this); /* Wait for space in TX FIFO. */ while((regs->lsr & UART_LSR_5_THRE) == 0); @@ -121,3 +121,26 @@ uart_16x50_tx(uart_16x50_driver_t c_this, uint8_t c) regs->rbr_thr_dll = c; } /*---------------------------------------------------------------------------*/ +/** + * \brief Initialize an MMIO-programmable 16X50 UART. + * \param c_this Structure that will be initialized to represent the device. + * \param pci_addr PCI address of device. + * \param dl Divisor setting to configure the baud rate. + */ +void +uart_16x50_init(uart_16x50_driver_t *c_this, + pci_config_addr_t pci_addr, + uint16_t dl) +{ + /* This assumes that the UART had an MMIO range assigned to it by the + * firmware during boot. + */ + pci_init(c_this, pci_addr, UART_MMIO_SZ, 0, 0); + SYSCALLS_INIT(uart_16x50_setup); + SYSCALLS_AUTHZ(uart_16x50_setup, *c_this); + SYSCALLS_INIT(uart_16x50_tx); + SYSCALLS_AUTHZ(uart_16x50_tx, *c_this); + + uart_16x50_setup(*c_this, dl); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/drivers/quarkX1000/eth.c b/cpu/x86/drivers/quarkX1000/eth.c index c9322d6a7..5c16b10a5 100644 --- a/cpu/x86/drivers/quarkX1000/eth.c +++ b/cpu/x86/drivers/quarkX1000/eth.c @@ -35,6 +35,7 @@ #include "dma.h" #include "eth.h" #include "helpers.h" +#include "syscalls.h" #include "net/ip/uip.h" #include "pci.h" @@ -158,12 +159,29 @@ typedef struct quarkX1000_eth_meta { /* Transmit descriptor */ volatile quarkX1000_eth_tx_desc_t tx_desc; /* Transmit DMA packet buffer */ - volatile uint8_t tx_buf[UIP_BUFSIZE]; + volatile uint8_t tx_buf[ALIGN(UIP_BUFSIZE, 4)]; /* Receive descriptor */ volatile quarkX1000_eth_rx_desc_t rx_desc; /* Receive DMA packet buffer */ - volatile uint8_t rx_buf[UIP_BUFSIZE]; -} quarkX1000_eth_meta_t; + volatile uint8_t rx_buf[ALIGN(UIP_BUFSIZE, 4)]; + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING + /* Domain-defined metadata must fill an even number of pages, since that is + * the minimum granularity of access control supported by paging. However, + * using the "aligned(4096)" attribute causes the alignment of the kernel + * data section to increase, which causes problems when generating UEFI + * binaries, as is described in the linker script. Thus, it is necessary + * to manually pad the structure to fill a page. This only works if the + * sizes of the actual fields of the structure are collectively less than a + * page. + */ + uint8_t pad[MIN_PAGE_SIZE - + (sizeof(quarkX1000_eth_tx_desc_t) + + ALIGN(UIP_BUFSIZE, 4) + + sizeof(quarkX1000_eth_rx_desc_t) + + ALIGN(UIP_BUFSIZE, 4))]; +#endif +} __attribute__((packed)) quarkX1000_eth_meta_t; #define LOG_PFX "quarkX1000_eth: " @@ -188,37 +206,18 @@ typedef struct quarkX1000_eth_meta { #define REG_ADDR_TX_DESC_LIST 0x1010 #define REG_ADDR_DMA_OPERATION 0x1018 -static quarkX1000_eth_driver_t drv; +PROT_DOMAINS_ALLOC(quarkX1000_eth_driver_t, drv); static quarkX1000_eth_meta_t ATTR_BSS_DMA meta; +void quarkX1000_eth_setup(uintptr_t meta_phys_base); + /*---------------------------------------------------------------------------*/ -/** - * \brief Initialize the first Quark X1000 Ethernet MAC. - * - * This procedure assumes that an MMIO range for the device was - * previously assigned, e.g. by firmware. - */ -void -quarkX1000_eth_init(void) +SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_setup, drv, uintptr_t meta_phys_base) { - pci_config_addr_t pci_addr = { .raw = 0 }; uip_eth_addr mac_addr; uint32_t mac_tmp1, mac_tmp2; - - /* PCI address from section 15.4 of Intel Quark SoC X1000 Datasheet. */ - - pci_addr.dev = 20; - pci_addr.func = 6; - - /* Activate MMIO and DMA access. */ - pci_command_enable(pci_addr, PCI_CMD_2_BUS_MST_EN | PCI_CMD_1_MEM_SPACE_EN); - - printf(LOG_PFX "Activated MMIO and DMA access.\n"); - - pci_addr.reg_off = PCI_CONFIG_REG_BAR0; - - /* Configure the device MMIO range and initialize the driver structure. */ - pci_init(&drv, pci_addr, (uintptr_t)&meta); + quarkX1000_eth_meta_t *loc_meta = + (quarkX1000_eth_meta_t *)PROT_DOMAINS_META(drv); /* Read the MAC address from the device. */ PCI_MMIO_READL(drv, mac_tmp1, REG_ADDR_MACADDR_HI); @@ -246,29 +245,37 @@ quarkX1000_eth_init(void) uip_setethaddr(mac_addr); /* Initialize transmit descriptor. */ - meta.tx_desc.tdes0 = 0; - meta.tx_desc.tdes1 = 0; + loc_meta->tx_desc.tdes0 = 0; + loc_meta->tx_desc.tdes1 = 0; - meta.tx_desc.buf1_ptr = (uint8_t *)meta.tx_buf; - meta.tx_desc.tx_end_of_ring = 1; - meta.tx_desc.first_seg_in_frm = 1; - meta.tx_desc.last_seg_in_frm = 1; - meta.tx_desc.tx_end_of_ring = 1; + loc_meta->tx_desc.buf1_ptr = + (uint8_t *)PROT_DOMAINS_META_OFF_TO_PHYS( + (uintptr_t)&loc_meta->tx_buf, meta_phys_base); + loc_meta->tx_desc.tx_end_of_ring = 1; + loc_meta->tx_desc.first_seg_in_frm = 1; + loc_meta->tx_desc.last_seg_in_frm = 1; + loc_meta->tx_desc.tx_end_of_ring = 1; /* Initialize receive descriptor. */ - meta.rx_desc.rdes0 = 0; - meta.rx_desc.rdes1 = 0; + loc_meta->rx_desc.rdes0 = 0; + loc_meta->rx_desc.rdes1 = 0; - meta.rx_desc.buf1_ptr = (uint8_t *)meta.rx_buf; - meta.rx_desc.own = 1; - meta.rx_desc.first_desc = 1; - meta.rx_desc.last_desc = 1; - meta.rx_desc.rx_buf1_sz = UIP_BUFSIZE; - meta.rx_desc.rx_end_of_ring = 1; + loc_meta->rx_desc.buf1_ptr = + (uint8_t *)PROT_DOMAINS_META_OFF_TO_PHYS( + (uintptr_t)&loc_meta->rx_buf, meta_phys_base); + loc_meta->rx_desc.own = 1; + loc_meta->rx_desc.first_desc = 1; + loc_meta->rx_desc.last_desc = 1; + loc_meta->rx_desc.rx_buf1_sz = UIP_BUFSIZE; + loc_meta->rx_desc.rx_end_of_ring = 1; /* Install transmit and receive descriptors. */ - PCI_MMIO_WRITEL(drv, REG_ADDR_RX_DESC_LIST, (uint32_t)&meta.rx_desc); - PCI_MMIO_WRITEL(drv, REG_ADDR_TX_DESC_LIST, (uint32_t)&meta.tx_desc); + PCI_MMIO_WRITEL(drv, REG_ADDR_RX_DESC_LIST, + PROT_DOMAINS_META_OFF_TO_PHYS( + (uintptr_t)&loc_meta->rx_desc, meta_phys_base)); + PCI_MMIO_WRITEL(drv, REG_ADDR_TX_DESC_LIST, + PROT_DOMAINS_META_OFF_TO_PHYS( + (uintptr_t)&loc_meta->tx_desc, meta_phys_base)); PCI_MMIO_WRITEL(drv, REG_ADDR_MAC_CONF, /* Set the RMII speed to 100Mbps */ @@ -302,28 +309,32 @@ quarkX1000_eth_init(void) * If a frame is received, this procedure copies it into the * global uip_buf buffer. */ -void -quarkX1000_eth_poll(uint16_t *frame_len) +SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_poll, drv, uint16_t * frame_len) { + uint16_t *loc_frame_len; uint16_t frm_len = 0; + quarkX1000_eth_meta_t *loc_meta = + (quarkX1000_eth_meta_t *)PROT_DOMAINS_META(drv); + + PROT_DOMAINS_VALIDATE_PTR(loc_frame_len, frame_len, sizeof(*frame_len)); /* Check whether the RX descriptor is still owned by the device. If not, * process the received frame or an error that may have occurred. */ - if(meta.rx_desc.own == 0) { - if(meta.rx_desc.err_summary) { + if(loc_meta->rx_desc.own == 0) { + if(loc_meta->rx_desc.err_summary) { fprintf(stderr, LOG_PFX "Error receiving frame: RDES0 = %08x, RDES1 = %08x.\n", - meta.rx_desc.rdes0, meta.rx_desc.rdes1); + loc_meta->rx_desc.rdes0, loc_meta->rx_desc.rdes1); assert(0); } - frm_len = meta.rx_desc.frm_len; + frm_len = loc_meta->rx_desc.frm_len; assert(frm_len <= UIP_BUFSIZE); - memcpy(uip_buf, (void *)meta.rx_buf, frm_len); + memcpy(uip_buf, (void *)loc_meta->rx_buf, frm_len); /* Return ownership of the RX descriptor to the device. */ - meta.rx_desc.own = 1; + loc_meta->rx_desc.own = 1; /* Request that the device check for an available RX descriptor, since * ownership of the descriptor was just transferred to the device. @@ -331,7 +342,7 @@ quarkX1000_eth_poll(uint16_t *frame_len) PCI_MMIO_WRITEL(drv, REG_ADDR_RX_POLL_DEMAND, 1); } - *frame_len = frm_len; + *loc_frame_len = frm_len; } /*---------------------------------------------------------------------------*/ /** @@ -343,27 +354,29 @@ quarkX1000_eth_poll(uint16_t *frame_len) * buffer and signals to the device that a new frame is available to be * transmitted. */ -void -quarkX1000_eth_send(void) +SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_send, drv) { + quarkX1000_eth_meta_t *loc_meta = + (quarkX1000_eth_meta_t *)PROT_DOMAINS_META(drv); + /* Wait until the TX descriptor is no longer owned by the device. */ - while(meta.tx_desc.own == 1); + while(loc_meta->tx_desc.own == 1); /* Check whether an error occurred transmitting the previous frame. */ - if(meta.tx_desc.err_summary) { + if(loc_meta->tx_desc.err_summary) { fprintf(stderr, LOG_PFX "Error transmitting frame: TDES0 = %08x, TDES1 = %08x.\n", - meta.tx_desc.tdes0, meta.tx_desc.tdes1); + loc_meta->tx_desc.tdes0, loc_meta->tx_desc.tdes1); assert(0); } /* Transmit the next frame. */ assert(uip_len <= UIP_BUFSIZE); - memcpy((void *)meta.tx_buf, uip_buf, uip_len); + memcpy((void *)loc_meta->tx_buf, uip_buf, uip_len); - meta.tx_desc.tx_buf1_sz = uip_len; + loc_meta->tx_desc.tx_buf1_sz = uip_len; - meta.tx_desc.own = 1; + loc_meta->tx_desc.own = 1; /* Request that the device check for an available TX descriptor, since * ownership of the descriptor was just transferred to the device. @@ -371,3 +384,40 @@ quarkX1000_eth_send(void) PCI_MMIO_WRITEL(drv, REG_ADDR_TX_POLL_DEMAND, 1); } /*---------------------------------------------------------------------------*/ +/** + * \brief Initialize the first Quark X1000 Ethernet MAC. + * + * This procedure assumes that an MMIO range for the device was + * previously assigned, e.g. by firmware. + */ +void +quarkX1000_eth_init(void) +{ + pci_config_addr_t pci_addr = { .raw = 0 }; + + /* PCI address from section 15.4 of Intel Quark SoC X1000 Datasheet. */ + + pci_addr.dev = 20; + pci_addr.func = 6; + + /* Activate MMIO and DMA access. */ + pci_command_enable(pci_addr, PCI_CMD_2_BUS_MST_EN | PCI_CMD_1_MEM_SPACE_EN); + + printf(LOG_PFX "Activated MMIO and DMA access.\n"); + + pci_addr.reg_off = PCI_CONFIG_REG_BAR0; + + PROT_DOMAINS_INIT_ID(drv); + /* Configure the device MMIO range and initialize the driver structure. */ + pci_init(&drv, pci_addr, MMIO_SZ, + (uintptr_t)&meta, sizeof(quarkX1000_eth_meta_t)); + SYSCALLS_INIT(quarkX1000_eth_setup); + SYSCALLS_AUTHZ(quarkX1000_eth_setup, drv); + SYSCALLS_INIT(quarkX1000_eth_poll); + SYSCALLS_AUTHZ(quarkX1000_eth_poll, drv); + SYSCALLS_INIT(quarkX1000_eth_send); + SYSCALLS_AUTHZ(quarkX1000_eth_send, drv); + + quarkX1000_eth_setup(prot_domains_lookup_meta_phys_base(&drv)); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/drivers/quarkX1000/gpio.c b/cpu/x86/drivers/quarkX1000/gpio.c index 7d7d7dd64..642cad310 100644 --- a/cpu/x86/drivers/quarkX1000/gpio.c +++ b/cpu/x86/drivers/quarkX1000/gpio.c @@ -30,8 +30,11 @@ #include "gpio.h" +#include #include "helpers.h" +#include "paging.h" #include "shared-isr.h" +#include "syscalls.h" /* GPIO Controler Registers */ #define SWPORTA_DR 0x00 @@ -51,25 +54,55 @@ #define GPIO_IRQ 9 +#define HIGHEST_REG LS_SYNC + +#define MMIO_SZ MIN_PAGE_SIZE + +PROT_DOMAINS_ALLOC(pci_driver_t, drv); + struct gpio_internal_data { - pci_driver_t pci; quarkX1000_gpio_callback callback; }; static struct gpio_internal_data data; +void quarkX1000_gpio_mmin(uint32_t offset, uint32_t *res); +SYSCALLS_DEFINE_SINGLETON(quarkX1000_gpio_mmin, drv, + uint32_t offset, uint32_t *res) +{ + uint32_t *loc_res; + + PROT_DOMAINS_VALIDATE_PTR(loc_res, res, sizeof(*res)); + if(HIGHEST_REG < offset) { + halt(); + } + + PCI_MMIO_READL(drv, *loc_res, offset); +} + static inline uint32_t read(uint32_t offset) { uint32_t res; - PCI_MMIO_READL(data.pci, res, offset); + quarkX1000_gpio_mmin(offset, &res); return res; } +void quarkX1000_gpio_mmout(uint32_t offset, uint32_t val); +SYSCALLS_DEFINE_SINGLETON(quarkX1000_gpio_mmout, drv, + uint32_t offset, uint32_t val) +{ + if(HIGHEST_REG < offset) { + halt(); + } + + PCI_MMIO_WRITEL(drv, offset, val); +} + static inline void write(uint32_t offset, uint32_t val) { - PCI_MMIO_WRITEL(data.pci, offset, val); + quarkX1000_gpio_mmout(offset, val); } /* value must be 0x0 or 0x1 */ @@ -231,7 +264,12 @@ quarkX1000_gpio_init(void) pci_command_enable(pci_addr, PCI_CMD_1_MEM_SPACE_EN); - pci_init(&data.pci, pci_addr, 0); + PROT_DOMAINS_INIT_ID(drv); + pci_init(&drv, pci_addr, MMIO_SZ, 0, 0); + SYSCALLS_INIT(quarkX1000_gpio_mmin); + SYSCALLS_AUTHZ(quarkX1000_gpio_mmin, drv); + SYSCALLS_INIT(quarkX1000_gpio_mmout); + SYSCALLS_AUTHZ(quarkX1000_gpio_mmout, drv); data.callback = 0; diff --git a/cpu/x86/drivers/quarkX1000/i2c-registers.h b/cpu/x86/drivers/quarkX1000/i2c-registers.h index 7b9e4cec0..3ff7746ec 100644 --- a/cpu/x86/drivers/quarkX1000/i2c-registers.h +++ b/cpu/x86/drivers/quarkX1000/i2c-registers.h @@ -61,6 +61,8 @@ #define QUARKX1000_IC_ENABLE_STATUS 0x9C #define QUARKX1000_IC_FS_SPKLEN 0xA0 +#define QUARKX1000_IC_HIGHEST QUARKX1000_IC_FS_SPKLEN + /* IC_CON */ #define QUARKX1000_IC_CON_MASTER_MODE_SHIFT 0 #define QUARKX1000_IC_CON_MASTER_MODE_MASK 0x01 diff --git a/cpu/x86/drivers/quarkX1000/i2c.c b/cpu/x86/drivers/quarkX1000/i2c.c index 4e5669079..746e52b96 100644 --- a/cpu/x86/drivers/quarkX1000/i2c.c +++ b/cpu/x86/drivers/quarkX1000/i2c.c @@ -32,7 +32,9 @@ #include "i2c.h" #include "i2c-registers.h" +#include "paging.h" #include "shared-isr.h" +#include "syscalls.h" #define I2C_CLOCK_SPEED 25 /* kHz */ #define I2C_FIFO_DEPTH 16 @@ -49,11 +51,15 @@ #define I2C_IRQ 9 +#define MMIO_SZ MIN_PAGE_SIZE + typedef enum { I2C_DIRECTION_READ, I2C_DIRECTION_WRITE } I2C_DIRECTION; +PROT_DOMAINS_ALLOC(pci_driver_t, drv); + struct quarkX1000_i2c_config { QUARKX1000_I2C_SPEED speed; QUARKX1000_I2C_ADDR_MODE addressing_mode; @@ -66,8 +72,6 @@ struct quarkX1000_i2c_config { struct i2c_internal_data { struct quarkX1000_i2c_config config; - pci_driver_t pci; - I2C_DIRECTION direction; uint8_t rx_len; @@ -82,18 +86,46 @@ struct i2c_internal_data { static struct i2c_internal_data device; -static uint32_t +static int inited = 0; + +void quarkX1000_i2c_mmin(uint32_t offset, uint32_t *res); +SYSCALLS_DEFINE_SINGLETON(quarkX1000_i2c_mmin, drv, + uint32_t offset, uint32_t *res) +{ + uint32_t *loc_res; + + PROT_DOMAINS_VALIDATE_PTR(loc_res, res, sizeof(*res)); + if(QUARKX1000_IC_HIGHEST < offset) { + halt(); + } + + PCI_MMIO_READL(drv, *loc_res, offset); +} + +static inline uint32_t read(uint32_t offset) { uint32_t res; - PCI_MMIO_READL(device.pci, res, offset); + quarkX1000_i2c_mmin(offset, &res); + return res; } -static void +void quarkX1000_i2c_mmout(uint32_t offset, uint32_t val); +SYSCALLS_DEFINE_SINGLETON(quarkX1000_i2c_mmout, drv, + uint32_t offset, uint32_t val) +{ + if(QUARKX1000_IC_HIGHEST < offset) { + halt(); + } + + PCI_MMIO_WRITEL(drv, offset, val); +} + +static inline void write(uint32_t offset, uint32_t val) { - PCI_MMIO_WRITEL(device.pci, offset, val); + quarkX1000_i2c_mmout(offset, val); } static uint32_t @@ -504,7 +536,7 @@ quarkX1000_i2c_polling_read(uint8_t *buf, uint8_t len, uint16_t addr) int quarkX1000_i2c_is_available(void) { - return device.pci.mmio ? 1 : 0; + return inited; } DEFINE_SHARED_IRQ(I2C_IRQ, IRQAGENT3, INTC, PIRQC, i2c_isr); @@ -522,7 +554,14 @@ quarkX1000_i2c_init(void) pci_command_enable(pci_addr, PCI_CMD_1_MEM_SPACE_EN); - pci_init(&device.pci, pci_addr, 0); + PROT_DOMAINS_INIT_ID(drv); + pci_init(&drv, pci_addr, MMIO_SZ, 0, 0); + SYSCALLS_INIT(quarkX1000_i2c_mmin); + SYSCALLS_AUTHZ(quarkX1000_i2c_mmin, drv); + SYSCALLS_INIT(quarkX1000_i2c_mmout); + SYSCALLS_AUTHZ(quarkX1000_i2c_mmout, drv); + + inited = 1; return 0; } diff --git a/cpu/x86/drivers/quarkX1000/imr-conf.c b/cpu/x86/drivers/quarkX1000/imr-conf.c index b2646e892..8c5b6703a 100644 --- a/cpu/x86/drivers/quarkX1000/imr-conf.c +++ b/cpu/x86/drivers/quarkX1000/imr-conf.c @@ -28,9 +28,9 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "dma.h" #include "imr.h" - -extern int _sbss_dma_addr, _ebss_dma_addr; +#include "msg-bus.h" /*---------------------------------------------------------------------------*/ void @@ -49,6 +49,8 @@ quarkX1000_imr_conf(void) imr.rdmsk.cpu0 = imr.rdmsk.cpu_0 = 1; imr.wrmsk.cpu0 = imr.wrmsk.cpu_0 = 1; + quarkX1000_msg_bus_init(); + imr.lo.addr = 0; imr.hi.addr = (((uint32_t)&_sbss_dma_addr) - 1) >> QUARKX1000_IMR_SHAMT; quarkX1000_imr_write(imr_idx, imr); @@ -69,5 +71,12 @@ quarkX1000_imr_conf(void) quarkX1000_imr_write(imr_idx, imr); imr_idx++; } + +#ifndef DBG_IMRS + /* The IMRs are locked by the hardware, but the message bus could still + * provide access to other potentially-sensitive functionality. + */ + quarkX1000_msg_bus_lock(); +#endif } /*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/drivers/quarkX1000/msg-bus.c b/cpu/x86/drivers/quarkX1000/msg-bus.c index e7a4bca44..fc64a6f8c 100644 --- a/cpu/x86/drivers/quarkX1000/msg-bus.c +++ b/cpu/x86/drivers/quarkX1000/msg-bus.c @@ -30,6 +30,9 @@ #include "msg-bus.h" #include "pci.h" +#include "syscalls.h" + +PROT_DOMAINS_ALLOC(dom_client_data_t, quarkX1000_msg_bus); /** Message bus control register */ #define MCR_PCI_REG_ADDR 0xD0 @@ -83,15 +86,21 @@ request_op(uint8_t port, uint32_t reg_off, uint8_t opcode) * \param reg_off Register/offset identifier of message bus register to read. * \param val Storage location for value that has been read. */ -void -quarkX1000_msg_bus_read(uint8_t port, uint32_t reg_off, uint32_t *val) +SYSCALLS_DEFINE_SINGLETON(quarkX1000_msg_bus_read, + quarkX1000_msg_bus, + uint8_t port, + uint32_t reg_off, + uint32_t *val) { + uint32_t *loc_val; pci_config_addr_t pci_addr = { .raw = 0 }; + PROT_DOMAINS_VALIDATE_PTR(loc_val, val, sizeof(*val)); + request_op(port, reg_off, 0x10); pci_addr.reg_off = MDR_PCI_REG_ADDR; - *val = pci_config_read(pci_addr); + *loc_val = pci_config_read(pci_addr); } /*---------------------------------------------------------------------------*/ /** @@ -100,8 +109,11 @@ quarkX1000_msg_bus_read(uint8_t port, uint32_t reg_off, uint32_t *val) * \param reg_off Register/offset identifier of message bus register to write. * \param val Value to write. */ -void -quarkX1000_msg_bus_write(uint8_t port, uint32_t reg_off, uint32_t val) +SYSCALLS_DEFINE_SINGLETON(quarkX1000_msg_bus_write, + quarkX1000_msg_bus, + uint8_t port, + uint32_t reg_off, + uint32_t val) { pci_config_addr_t pci_addr = { .raw = 0 }; @@ -111,3 +123,21 @@ quarkX1000_msg_bus_write(uint8_t port, uint32_t reg_off, uint32_t val) request_op(port, reg_off, 0x11); } /*---------------------------------------------------------------------------*/ +void +quarkX1000_msg_bus_init(void) +{ + PROT_DOMAINS_INIT_ID(quarkX1000_msg_bus); + prot_domains_reg(&quarkX1000_msg_bus, 0, 0, 0, 0, true); + SYSCALLS_INIT(quarkX1000_msg_bus_read); + SYSCALLS_AUTHZ(quarkX1000_msg_bus_read, quarkX1000_msg_bus); + SYSCALLS_INIT(quarkX1000_msg_bus_write); + SYSCALLS_AUTHZ(quarkX1000_msg_bus_write, quarkX1000_msg_bus); +} +/*---------------------------------------------------------------------------*/ +void +quarkX1000_msg_bus_lock(void) +{ + SYSCALLS_DEAUTHZ(quarkX1000_msg_bus_read, quarkX1000_msg_bus); + SYSCALLS_DEAUTHZ(quarkX1000_msg_bus_write, quarkX1000_msg_bus); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/drivers/quarkX1000/msg-bus.h b/cpu/x86/drivers/quarkX1000/msg-bus.h index 0b810e06d..11bab0849 100644 --- a/cpu/x86/drivers/quarkX1000/msg-bus.h +++ b/cpu/x86/drivers/quarkX1000/msg-bus.h @@ -44,6 +44,8 @@ * the message bus. */ +void quarkX1000_msg_bus_init(void); +void quarkX1000_msg_bus_lock(void); void quarkX1000_msg_bus_read(uint8_t port, uint32_t reg_off, uint32_t *val); void quarkX1000_msg_bus_write(uint8_t port, uint32_t reg_off, uint32_t val); diff --git a/cpu/x86/drivers/quarkX1000/uart.c b/cpu/x86/drivers/quarkX1000/uart.c index 23731ba93..dcd0af8f2 100644 --- a/cpu/x86/drivers/quarkX1000/uart.c +++ b/cpu/x86/drivers/quarkX1000/uart.c @@ -32,8 +32,8 @@ #include "uart-16x50.h" #include -static uart_16x50_driver_t quarkX1000_uart0; -static uart_16x50_driver_t quarkX1000_uart1; +PROT_DOMAINS_ALLOC(uart_16x50_driver_t, quarkX1000_uart0); +PROT_DOMAINS_ALLOC(uart_16x50_driver_t, quarkX1000_uart1); /* Divisor setting for 115200 baud from section 18.2.2 of Intel Quark SoC * X1000 Datasheet. @@ -49,6 +49,7 @@ void quarkX1000_uart_init(quarkX1000_uart_dev_t dev) { pci_config_addr_t pci_addr; + uart_16x50_driver_t *drv; assert((dev == QUARK_X1000_UART_0) || (dev == QUARK_X1000_UART_1)); @@ -59,7 +60,14 @@ quarkX1000_uart_init(quarkX1000_uart_dev_t dev) pci_addr.func = (dev == QUARK_X1000_UART_0) ? 1 : 5; pci_addr.reg_off = PCI_CONFIG_REG_BAR0; - uart_16x50_init((dev == QUARK_X1000_UART_0) ? &quarkX1000_uart0 : &quarkX1000_uart1, pci_addr, QUARK_X1000_UART_DL_115200); + if(dev == QUARK_X1000_UART_0) { + drv = &quarkX1000_uart0; + PROT_DOMAINS_INIT_ID(quarkX1000_uart0); + } else { + drv = &quarkX1000_uart1; + PROT_DOMAINS_INIT_ID(quarkX1000_uart1); + } + uart_16x50_init(drv, pci_addr, QUARK_X1000_UART_DL_115200); } /*---------------------------------------------------------------------------*/ /** diff --git a/cpu/x86/helpers.h b/cpu/x86/helpers.h index 91b120a9e..1ef312e14 100644 --- a/cpu/x86/helpers.h +++ b/cpu/x86/helpers.h @@ -37,6 +37,17 @@ void halt(void) __attribute__((__noreturn__)); +#define STRINGIFY(x) #x +/* The C preprocessor will not expand macro arguments that are converted to + * strings in the macro body using the '#' operator. The EXP_STRINGIFY macro + * introduces an additional level of argument expansion for instances where + * the developer wishes to convert the expanded argument to a string. + */ +#define EXP_STRINGIFY(x) STRINGIFY(x) + +#define ALIGN(x, amt) \ + (((x) & ~((amt) - 1)) + ((((x) & ((amt) - 1)) == 0) ? 0 : (amt))) + /** Wrappers for the assembly 'out' instruction. */ void outb(uint16_t port, uint8_t val); void outl(uint16_t port, uint32_t val); diff --git a/cpu/x86/init/common/cpu.c b/cpu/x86/init/common/cpu.c index a174853cc..94ec2ddab 100644 --- a/cpu/x86/init/common/cpu.c +++ b/cpu/x86/init/common/cpu.c @@ -28,11 +28,13 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "cpu.h" #include "gdt.h" #include "helpers.h" #include "idt.h" #include "interrupt.h" #include "irq.h" +#include "stacks.h" static void double_fault_handler(struct interrupt_context context) @@ -40,16 +42,79 @@ double_fault_handler(struct interrupt_context context) halt(); } /*---------------------------------------------------------------------------*/ -void -cpu_init(void) +/* The OS has switched to its own segment descriptors. However, the protection + * domain support, if enabled, has not yet been fully activated. + */ +static void +boot_stage1(void) { - gdt_init(); idt_init(); /* Set an interrupt handler for Double Fault exception. This way, we avoid * the system to triple fault, leaving no trace about what happened. */ - SET_INTERRUPT_HANDLER(8, 1, double_fault_handler); + SET_EXCEPTION_HANDLER(8, 1, double_fault_handler); - irq_init(); + /* Initialize protection domain support, if enabled */ + prot_domains_init(); + + prot_domains_leave_boot_stage1(); +} +/*---------------------------------------------------------------------------*/ +int main(void); +/* This routine runs with the initial, flat address space, even if protection + * domain support is enabled. The goal behind the design of this routine is to + * keep it as short as possible, since it is unable to directly reference data + * and invoke functions that are intended to be accessible later after the + * system has booted when a multi-segment protection domain model is in use. + */ +void +cpu_boot_stage0(void) +{ + /* Reserve three stack slots for return addresses */ + uintptr_t top_of_stack = STACKS_INIT_TOP; + +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE + uintptr_t *top_of_stack_ptr = (uintptr_t *)top_of_stack; + + top_of_stack_ptr[0] = (uintptr_t)prot_domains_launch_kernel; + top_of_stack_ptr[1] = (uintptr_t)prot_domains_launch_app; +#endif + + /* Perform common GDT initialization */ + gdt_init(); + + /* Switch all data segment registers to the newly-initialized flat data + * descriptor. + */ + __asm__( + "mov %0, %%ds\n\t" + "mov %0, %%es\n\t" + "mov %0, %%fs\n\t" + "mov %0, %%gs\n\t" + : + : "r" (GDT_SEL_DATA_FLAT) + ); + + /** + * Perform specific GDT initialization tasks for the protection domain + * implementation that is enabled, if any. + */ + prot_domains_gdt_init(); + + /* Do not pass memory operands to the asm block below, since it is + * switching from the flat address space to a multi-segment address space + * model if such a model is used by the enabled protection domain + * implementation, if any. + */ + __asm__( + "mov %[_ss_], %%ss\n\t" + "mov %[_esp_], %%esp\n\t" + "ljmp %[_cs_], %[_stage1_]\n\t" + : + : [_ss_] "r" (GDT_SEL_STK_EXC), + [_esp_] "r" (top_of_stack), + [_cs_] "i" ((uint16_t)GDT_SEL_CODE_EXC), + [_stage1_] "i" (boot_stage1) + ); } diff --git a/cpu/x86/init/common/cpu.h b/cpu/x86/init/common/cpu.h index a56d0db5a..4fd9b835b 100644 --- a/cpu/x86/init/common/cpu.h +++ b/cpu/x86/init/common/cpu.h @@ -31,6 +31,8 @@ #ifndef CPU_H #define CPU_H -void cpu_init(void); +#include "prot-domains.h" + +void cpu_boot_stage0(void) ATTR_CODE_BOOT; #endif /* CPU_H */ diff --git a/cpu/x86/init/common/gdt.c b/cpu/x86/init/common/gdt.c index 39a8a7ce4..f7fa10342 100644 --- a/cpu/x86/init/common/gdt.c +++ b/cpu/x86/init/common/gdt.c @@ -29,45 +29,15 @@ */ #include +#include "gdt.h" +#include "gdt-layout.h" +#include "helpers.h" +#include "prot-domains.h" +#include "segmentation.h" -#define NUM_DESC 3 - -#define GDT_IDX_NULL 0 -#define GDT_IDX_CODE 1 -#define GDT_IDX_DATA 2 - -/* All code in the x86 port of Contiki runs at ring (privilege) level 0 */ -#define PRIV_LVL 0 - -/* Compute GDT selector from descriptor index and requested privilege level */ -#define GDT_SEL(IDX, RPL) (((IDX) << 3) | (RPL)) - -#define GDT_SEL_NULL GDT_SEL(GDT_IDX_NULL, 0) -#define GDT_SEL_CODE GDT_SEL(GDT_IDX_CODE, PRIV_LVL) -#define GDT_SEL_DATA GDT_SEL(GDT_IDX_DATA, PRIV_LVL) - -/* Each define here is for a specific flag in the descriptor. Refer to Intel - * Combined Manual (Intel 64 and IA-32 Architectures Software Developer's - * Manual), Vol. 3, Section 3.4.5 for a description of each flag. - */ -#define SEG_DESCTYPE(x) ((x) << 0x04) /* Descriptor type (0 for system, 1 for code/data) */ -#define SEG_PRES(x) ((x) << 0x07) /* Present */ -#define SEG_SAVL(x) ((x) << 0x0C) /* Available for system use */ -#define SEG_LONG(x) ((x) << 0x0D) /* Long mode */ -#define SEG_SIZE(x) ((x) << 0x0E) /* Size (0 for 16-bit, 1 for 32) */ -#define SEG_GRAN(x) ((x) << 0x0F) /* Granularity (0 for 1B - 1MB, 1 for 4KB - 4GB) */ -#define SEG_PRIV(x) (((x) & 0x03) << 0x05) /* Set privilege level (0 - 3) */ - -#define SEG_DATA_RDWR 0x02 /* Read/Write */ -#define SEG_CODE_EXRD 0x0A /* Execute/Read */ - -#define GDT_CODE_PL0 SEG_DESCTYPE(1) | SEG_PRES(1) | SEG_SAVL(0) | \ - SEG_LONG(0) | SEG_SIZE(1) | SEG_GRAN(1) | \ - SEG_PRIV(0) | SEG_CODE_EXRD - -#define GDT_DATA_PL0 SEG_DESCTYPE(1) | SEG_PRES(1) | SEG_SAVL(0) | \ - SEG_LONG(0) | SEG_SIZE(1) | SEG_GRAN(1) | \ - SEG_PRIV(0) | SEG_DATA_RDWR +#define GDT_MEM_PL0 (SEG_DESCTYPE_NSYS | SEG_GRAN_PAGE) +#define GDT_CODE_PL0 (GDT_MEM_PL0 | SEG_TYPE_CODE_EXRD) +#define GDT_DATA_PL0 (GDT_MEM_PL0 | SEG_TYPE_DATA_RDWR) typedef struct gdtr { @@ -75,41 +45,53 @@ typedef struct gdtr uint32_t base; } __attribute__((packed)) gdtr_t; -typedef uint64_t segment_desc_t; - /* From Intel Combined Manual, Vol. 3 , Section 3.5.1: The base addresses of * the GDT should be aligned on an eight-byte boundary to yield the best * processor performance. */ -static segment_desc_t gdt[NUM_DESC] __attribute__ ((aligned (8))); +segment_desc_t __attribute__ ((aligned(8))) ATTR_BSS_GDT_START + gdt[GDT_NUM_FIXED_DESC]; -static void -set_descriptor(unsigned int index, uint32_t base, uint32_t limit, uint16_t flag) +#define GDT_LEN \ + ((((uintptr_t)&_ebss_gdt_addr) - \ + (uintptr_t)gdt)/sizeof(segment_desc_t)) + +/*---------------------------------------------------------------------------*/ +static void ATTR_CODE_BOOT +set_descriptor(unsigned int index, + uint32_t base, + uint32_t len, + uint16_t flag) { segment_desc_t descriptor; - if (index >= NUM_DESC) - return; + if(GDT_LEN <= index) { + halt(); + } - /* Create the high 32 bit segment */ - descriptor = limit & 0x000F0000; /* set limit bits 19:16 */ - descriptor |= (flag << 8) & 0x00F0FF00; /* set type, p, dpl, s, g, d/b, l and avl fields */ - descriptor |= (base >> 16) & 0x000000FF; /* set base bits 23:16 */ - descriptor |= base & 0xFF000000; /* set base bits 31:24 */ - - /* Shift by 32 to allow for low part of segment */ - descriptor <<= 32; - - /* Create the low 32 bit segment */ - descriptor |= base << 16; /* set base bits 15:0 */ - descriptor |= limit & 0x0000FFFF; /* set limit bits 15:0 */ + segment_desc_init(&descriptor, base, len, flag); /* Save descriptor into gdt */ gdt[index] = descriptor; } +/*---------------------------------------------------------------------------*/ +void +gdt_copy_desc_change_dpl(unsigned int dest_idx, + unsigned int src_idx, + unsigned dpl) +{ + segment_desc_t desc; + if((GDT_LEN <= dest_idx) || (GDT_LEN <= src_idx)) { + halt(); + } -/* This function initializes the Global Offset Table. For simplicity, the + desc = gdt[src_idx]; + SEG_SET_FLAG(desc, DPL, dpl); + gdt[dest_idx] = desc; +} +/*---------------------------------------------------------------------------*/ +/* This function initializes the Global Descriptor Table. For simplicity, the * memory is organized following the flat model. Thus, memory appears to * Contiki as a single continuous address space. Code, data, and stack * are all contained in this address space (so called linear address space). @@ -120,29 +102,35 @@ gdt_init(void) gdtr_t gdtr; /* Initialize gdtr structure */ - gdtr.limit = sizeof(segment_desc_t) * NUM_DESC - 1; + gdtr.limit = sizeof(segment_desc_t) * GDT_LEN - 1; gdtr.base = (uint32_t) &gdt; /* Initialize descriptors */ set_descriptor(GDT_IDX_NULL, 0, 0, 0); - set_descriptor(GDT_IDX_CODE, 0, 0x0FFFFF, GDT_CODE_PL0); - set_descriptor(GDT_IDX_DATA, 0, 0x0FFFFF, GDT_DATA_PL0); + set_descriptor(GDT_IDX_CODE_FLAT, 0, 0x100000, GDT_CODE_PL0); + set_descriptor(GDT_IDX_DATA_FLAT, 0, 0x100000, GDT_DATA_PL0); - /* Load GDTR register and update segment registers. - * - * CS register cannot be changed directly. For that reason, we do a far jump. - */ - __asm__ ("lgdt %[_gdtr_]\n\t" - "jmp %[_cs_], $1f\n\t" - "1:\n\t" - "mov %[_ds_], %%ds\n\t" - "mov %[_ds_], %%ss\n\t" - "mov %[_ds_], %%es\n\t" - "mov %[_ds_], %%fs\n\t" - "mov %[_ds_], %%gs\n\t" - : - : [_gdtr_] "m" (gdtr), - [_cs_] "i" (GDT_SEL_CODE), - [_ds_] "r" (GDT_SEL_DATA) - ); + /* Load GDTR */ + __asm__ __volatile__ ("lgdt %0" :: "m" (gdtr)); } +/*---------------------------------------------------------------------------*/ +void +gdt_insert(unsigned int idx, segment_desc_t desc) +{ + if(GDT_LEN <= idx) { + halt(); + } + + gdt[idx] = desc; +} +/*---------------------------------------------------------------------------*/ +void +gdt_lookup(unsigned int idx, segment_desc_t *desc) +{ + if((GDT_LEN <= idx) || (desc == NULL)) { + halt(); + } + + *desc = gdt[idx]; +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/init/common/gdt.h b/cpu/x86/init/common/gdt.h index 3db17f08c..37f1f4dbe 100644 --- a/cpu/x86/init/common/gdt.h +++ b/cpu/x86/init/common/gdt.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015, Intel Corporation. All rights reserved. + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -31,6 +31,32 @@ #ifndef GDT_H #define GDT_H -void gdt_init(void); +#include "gdt-layout.h" +#include "prot-domains.h" +#include "segmentation.h" + +extern segment_desc_t gdt[]; +extern int _ebss_gdt_addr; + +#define GDT_IDX_OF_DESC(ptr) \ + ((((uintptr_t)(ptr)) - ((uintptr_t)&gdt))/ \ + sizeof(segment_desc_t)) + +/** + * \brief Compute the selector for a GDT entry allocated somewhere besides gdt.c. + * \param ptr Pointer to GDT descriptor. + * \param rpl Requested Privilege Level. + */ +#define GDT_SEL_OF_DESC(ptr, rpl) GDT_SEL(GDT_IDX_OF_DESC(ptr), rpl) + +#define ATTR_BSS_GDT __attribute__((section(".gdt_bss"))) +#define ATTR_BSS_GDT_START __attribute__((section(".gdt_bss_start"))) + +void gdt_copy_desc_change_dpl(unsigned int dest_idx, + unsigned int src_idx, + unsigned dpl); +void gdt_init(void) ATTR_CODE_BOOT; +void gdt_insert(unsigned int idx, segment_desc_t desc); +void gdt_lookup(unsigned int idx, segment_desc_t *desc); #endif /* GDT_H */ diff --git a/cpu/x86/init/common/idt.c b/cpu/x86/init/common/idt.c index d561f49f7..441668a75 100644 --- a/cpu/x86/init/common/idt.c +++ b/cpu/x86/init/common/idt.c @@ -28,9 +28,13 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "gdt-layout.h" +#include "prot-domains.h" #include #include "helpers.h" +#include "segmentation.h" +#include "idt.h" #define NUM_DESC 256 @@ -55,22 +59,27 @@ typedef struct intr_gate_desc { * of the IDT should be aligned on an 8-byte boundary to maximize performance * of cache line fills. */ -static intr_gate_desc_t idt[NUM_DESC] __attribute__ ((aligned(8))); +static intr_gate_desc_t __attribute__((aligned(8))) ATTR_BSS_KERN + idt[NUM_DESC]; +/*---------------------------------------------------------------------------*/ /* XXX: If you change this function prototype, make sure you fix the assembly - * code in SET_INTERRUPT_HANDLER macro in interrupt.h. Otherwise, you might + * code in SET_INT_EXC_HANDLER macro in interrupt.h. Otherwise, you might * face a very-hard-to-find bug in the interrupt handling system. */ void -idt_set_intr_gate_desc(int intr_num, uint32_t offset) +idt_set_intr_gate_desc(int intr_num, + uint32_t offset, + uint16_t cs, + uint16_t dpl) { intr_gate_desc_t *desc = &idt[intr_num]; desc->offset_low = offset & 0xFFFF; - desc->selector = 0x08; /* Offset in GDT for code segment */ + desc->selector = cs; desc->fixed = BIT(9) | BIT(10); desc->d = 1; - desc->dpl = 0; + desc->dpl = dpl; desc->p = 1; desc->offset_high = (offset >> 16) & 0xFFFF; } diff --git a/cpu/x86/init/common/idt.h b/cpu/x86/init/common/idt.h index d29b97153..18f168ad8 100644 --- a/cpu/x86/init/common/idt.h +++ b/cpu/x86/init/common/idt.h @@ -32,8 +32,12 @@ #define IDT_H #include +#include "prot-domains.h" -void idt_init(void); -void idt_set_intr_gate_desc(int intr_num, uint32_t offset); +void idt_init(void) ATTR_CODE_BOOT; +void idt_set_intr_gate_desc(int intr_num, + uint32_t offset, + uint16_t cs, + uint16_t dpl); #endif /* IDT_H */ diff --git a/cpu/x86/init/common/interrupt.h b/cpu/x86/init/common/interrupt.h index 601695bde..10b906be4 100644 --- a/cpu/x86/init/common/interrupt.h +++ b/cpu/x86/init/common/interrupt.h @@ -32,10 +32,17 @@ #define INTERRUPT_H #include +#include "gdt-layout.h" #include "idt.h" struct interrupt_context { + /* The general-purpose register values are saved by the pushal instruction in + * the interrupt dispatcher. Having access to these saved values may be + * useful in some future interrupt or exception handler, and saving and later + * restoring them also enables the ISR to freely overwrite the EAX, ECX, and + * EDX registers as is permitted by the cdecl calling convention. + */ uint32_t edi; uint32_t esi; uint32_t ebp; @@ -44,16 +51,28 @@ struct interrupt_context { uint32_t edx; uint32_t ecx; uint32_t eax; + /* These two values are pushed on the stack by the CPU when it delivers an + * exception with an associated error code. Currently, only the double fault + * handler accepts this structure as a parameter, and that type of exception + * does have an associated error code. + */ uint32_t error_code; uint32_t eip; + /* The CPU pushes additional values beyond these on the stack, specifically + * the code segment descriptor and flags. If a privilege-level change occurs + * during delivery, the CPU additionally pushes the stack pointer and stack + * segment descriptor. + */ }; -#define ISR_STUB(label_str, has_error_code, handler_str) \ +#define ISR_STUB(label_str, has_error_code, handler_str, exc) \ "jmp 2f\n\t" \ ".align 4\n\t" \ label_str ":\n\t" \ " pushal\n\t" \ + PROT_DOMAINS_ENTER_ISR(exc) \ " call " handler_str "\n\t" \ + PROT_DOMAINS_LEAVE_ISR(exc) \ " popal\n\t" \ " .if " #has_error_code "\n\t" \ " add $4, %%esp\n\t" \ @@ -72,6 +91,14 @@ struct interrupt_context { * void handler(void) * Otherwise, it should be: * void handler(struct interrupt_context context) + * exc: 0 if this is an interrupt, which should be handled + * at the interrupt privilege level. 1 if this is an + * exception, which should be handled at the + * exception privilege level. + * dpl: Privilege level for IDT descriptor, which is the + * numerically-highest privilege level that can + * generate this interrupt with a software interrupt + * instruction. * * Since there is no easy way to write an Interrupt Service Routines * (ISR) in C (for further information on this, see [1]), we provide @@ -81,18 +108,30 @@ struct interrupt_context { * * [1] http://wiki.osdev.org/Interrupt_Service_Routines */ -#define SET_INTERRUPT_HANDLER(num, has_error_code, handler) \ - do { \ - __asm__ __volatile__ ( \ - "push $1f\n\t" \ - "push %0\n\t" \ - "call %P1\n\t" \ - "add $8, %%esp\n\t" \ - ISR_STUB("1", has_error_code, "%P2") \ - :: "g" (num), "i" (idt_set_intr_gate_desc), "i" (handler) \ - : "eax", "ecx", "edx" \ - ); \ +#define SET_INT_EXC_HANDLER(num, has_error_code, handler, exc, dpl) \ + do { \ + __asm__ __volatile__ ( \ + "pushl %[_dpl_]\n\t" \ + "pushl %[_cs_]\n\t" \ + "pushl $1f\n\t" \ + "pushl %[_isr_num_]\n\t" \ + "call idt_set_intr_gate_desc\n\t" \ + "add $16, %%esp\n\t" \ + ISR_STUB("1", has_error_code, "%P[_handler_]", exc) \ + : \ + : [_isr_num_] "g" (num), \ + [_handler_] "i" (handler), \ + [_cs_] "i" (exc ? GDT_SEL_CODE_EXC : GDT_SEL_CODE_INT), \ + [_dpl_] "i" (dpl) \ + /* the invocation of idt_set_intr_gate_desc may clobber */ \ + /* the caller-saved registers: */ \ + : "eax", "ecx", "edx" \ + ); \ } while (0) +#define SET_INTERRUPT_HANDLER(num, has_error_code, handler) \ + SET_INT_EXC_HANDLER(num, has_error_code, handler, 0, PRIV_LVL_INT) +#define SET_EXCEPTION_HANDLER(num, has_error_code, handler) \ + SET_INT_EXC_HANDLER(num, has_error_code, handler, 1, PRIV_LVL_EXC) /* Disable maskable hardware interrupts */ #define DISABLE_IRQ() \ diff --git a/cpu/x86/mm/README.md b/cpu/x86/mm/README.md new file mode 100644 index 000000000..8990beec9 --- /dev/null +++ b/cpu/x86/mm/README.md @@ -0,0 +1,669 @@ +X86 Lightweight Protection Domain Support for Contiki +===================================================== + +Introduction +------------ + +The X86 port of Contiki implements a simple, lightweight form of +protection domains using a pluggable framework. Currently, the +following plugin is available: + + - Flat memory model with paging. + +For an introduction to paging and possible ways in which it can be +used, refer to the following resources: + + - Intel Combined Manual (Intel 64 and IA-32 Architectures Software + Developer's Manual), Vol. 3, Chapter 4 + - Programming the 80386, by John H. Crawford and Patrick + P. Gelsinger, Chapter 5 + +The overall goal of a protection domain implementation within this +framework is to define a set of resources that should be accessible to +each protection domain and to prevent that protection domain from +accessing other resources. The details of each implementation of +protection domains may differ substantially, but they should all be +guided by the principle of least privilege [1]. However, that +idealized principle is balanced against the practical objectives of +limiting the number of relatively time-consuming context switches and +minimizing changes to existing code. In fact, no changes were made to +code outside of the CPU- and platform-specific code directories for +the initial plugin. + +Each protection domain can optionally be associated with a metadata +and/or MMIO region. The hardware can support additional regions per +protection domain, but that would increase complexity and is unneeded +for the existing protection domains. + +After boot, all code runs in the context of some protection domain. +Two default protection domains are implemented: + +- kern: Kernel protection domain that is more privileged than any + other protection domain. As little code as possible should be placed + in this protection domain. +- app: Application protection domain used whenever special privileges + are not required. + +Additional protection domains are defined as needed. For example, +each driver may reside in a separate protection domain, although not +all drivers require additional privileges beyond those available in +the relevant scheduling context in the app protection domain. The +Ethernet and UART drivers are assigned separate protection domains. +Non-driver protection domains can also be defined. Other drivers only +require access to programmed IO ports accessible via the IN* and OUT* +instructions, and such drivers do not require separate protection +domains. They run in the Contiki preemptive scheduling context and +the kernel protection domain, both of which are granted access to all +IO ports. + +Each protection domain may have associated system calls. A system +call transfers control from a client protection domain to a defined +entrypoint in a server protection domain. As their name suggests, +system calls adhere to a synchronous call-return model (rather than +some alternative such as an asynchronous message-passing model). To +invoke a system call, the client provides two identifiers to the +system call dispatcher. The first identifies the server domain and +the second identifies the system call to be invoked. The protection +domain implementation should associate allowable system calls with +particular server protection domains and reject any system call +requests that are not within that set of allowable system calls. The +system call implementations do not restrict the clients that are +permitted to invoke each system call. No modifications that the +client can make to the server domain and system call identifiers can +open up new entrypoints into the server domain. The entrypoints are +fixed at boot time. + +However, if the identifiers were stored in shared memory, it may be +possible for a protection domain to influence the system calls issued +by some other protection domain, which may be undesirable. Thus, the +server domain identifiers are stored in memory that can only be +written by the kernel protection domain and the system call +identifiers are embedded in the code. + +The system call dispatcher is responsible for reconfiguring the system +to enforce the appropriate resource access controls for the server +protection domain. It should then transfer control to the approved +entrypoint for the requested system call. + +Contiki defines a process concept that is orthogonal to protection +domains [2]. A single Contiki process may run code in multiple +protection domains at various points in time. Contiki processes run +in a cooperative scheduling context. Contiki also defines a +preemptive scheduling context for interrupt handlers and real-time +timers. When protection domain support is enabled, interrupts are +only enabled when the application protection domain is active and is +running code in the cooperative scheduling context. Code running in +the preemptive context may also invoke multiple protection domains. +Contiki can also support preemptive multithreading, but support for +that has not yet been added to the X86 port so we do not discuss it +further. + +A single stack is shared by all code that runs in the cooperative +scheduling context in all protection domains, and separate stacks are +defined for short interrupt dispatchers in the preemptive scheduling +context and for exception handlers and software system call +dispatchers. Except for the interrupt dispatchers, code in the +preemptive scheduling context also shares the same stack with the +cooperative scheduling context. All protection domains also share a +main data section, so similar considerations are also relevant to +that. + +Introducing multi-core support would complicate things further, since +another core running a protection domain that the first core never +invoked could access data from the protection domain on the first +core. It may be possible to adequately address such concerns by +allocating per-core stacks. + +Note that this stack arrangement means that a given protection domain +may read and write data written to the stack by some other protection +domain. For example, a protection domain B may push data onto the +stack and later pop that data off of the stack, but a protection +domain A that invoked protection domain B may still be able to read +the data that was pushed and popped to and from the stack, since +popping the data off of the stack does not automatically erase that +stack memory location. Another possibility is that protection domain +B may modify a stack entry pushed by protection domain A before it +invoked protection domain B, and protection domain A may later use the +modified value. Permitting legitimate accesses to callers' stacks is +in fact the primary motivation for this stack arrangement, in that it +makes it simple for A to pass data to and from B (on the shared stack) +when requesting services from B. A system call invocation is nearly +transparent to the developer, appearing almost identical to an +ordinary function call. However, B can access any data on the stack. +The third case is that A can read data placed on the stack by B after +B returns, unless B wipes that data from the stack before returning. +A related sub-case is that if an interrupt handler is invoked, it +pushes the current contents of the general-purpose registers onto the +stack, which may then be revealed to other protection domains besides +the one that was interrupted. However, interrupts are only actually +enabled in the application protection domain. + +Similarly, register contents may be accessed and modified across +protection domain boundaries. + +For the reasons described above, each protection domain should only +invoke other protection domains that it trusts to properly handle data +on the stack. + +Design +------ + +### Boot Process + +The system boots in the following phases. + +#### UEFI Bootstrap + +Primary implementation sources: + + - cpu/x86/uefi/bootstrap_uefi.c + +When the OS is compiled as a UEFI binary, a short bootstrap phase that +is UEFI-compliant is run initially. It simply performs a minimal set +of functions to exit the UEFI boot services and then transfer control +to the Multiboot bootstrap phase. + +#### Multiboot Bootstrap + +Primary implementation sources: + + - cpu/x86/bootstrap_quarkX1000.S + +This phase disables interrupts, sets the stack pointer to the top of +the main stack, and then invokes boot stage 0. + +#### Boot Stage 0 + +Primary implementation sources: + + - cpu/x86/init/common/cpu.c + - cpu/x86/init/common/gdt.c + +The UEFI firmware or Multiboot-compliant bootloader should have +configured an initial Global Descriptor Table (GDT) with flat segments +and configured the CPU to operate in protected mode with paging +disabled. Flat segments each map the whole 4GiB physical memory +space. This is the state of the system when the OS enters boot stage +0. This stage is responsible for setting up a new GDT and loading the +segment registers with the appropriate descriptors from the new GDT to +enable boot stage 1 to run. + +#### Boot Stage 1 + +Primary implementation sources: + + - cpu/x86/init/common/cpu.c + - cpu/x86/init/common/idt.c + - cpu/x86/mm/prot-domains.c + +Boot stage 1 intializes the Interrupt Descriptor Table (IDT) and +installs a handler for double-fault exceptions. Handlers for +additional interrupts and exceptions are installed later in boot +stages 1 and 2. + +This stage also initializes protection domain support and enters the +kernel protection domain. + +#### Boot Stage 2 + +Primary implementation sources: + + - cpu/x86/init/common/cpu.c + - platform/galileo/contiki-main.c + +The entrypoint for the kernel protection domain is 'main'. Boot stage +2 initializes hardware devices and associated interrupts. It then +transfers control to the application protection domain. Note that +this is a transfer of control, not a call that would be matched with +some future return. This is an important distinction, because +protection domains are not reentrant. Thus, if the kernel protection +domain called the application protection domain, it would not be +possible to invoke any kernel system calls until the system is reset, +since the application protection domain never exits/returns while the +system is running. There are not actually any kernel system calls +provided in the initial implementation of protection domains, but they +may be added in the future. + +The core protection domain configuration (e.g. allowable system calls +and entrypoints, registered protection domains, etc.) is frozen by the +conclusion of boot stage 2 to help prevent erroneous changes that +could reduce the robustness of the system. The way that it is frozen +is that there are no kernel system calls that are intended to permit +changes to the core protection domain configuration. Thus, once the +kernel protection domain has exited, the only way the core protection +domain configuration can change would be due to undesirable memory +manipulations (e.g. due to a faulty device driver). + +#### Boot Stage 3 + +Primary implementation sources: + + - platform/galileo/contiki-main.c + +Boot stage 3 performs initialization procedures that are less +tightly-coupled to hardware. For example, it launches Contiki +processes and invokes Contiki configuration routines. + +### Privilege Levels + +When protection domain support is inactive, all code runs at +ring/privilege level 0. When protection domain support is active, +only exception handlers and system call dispatchers (including +dispatchers for system call returns) run at ring level 0. Code in the +preemptive scheduling context runs at ring level 2 and code in the +cooperative scheduling context runs at ring level 3. Ring levels with +higher numbers are less privileged than those with lower numbers. +Ring level 1 is unused. + +### IO and Interrupt Privileges + +The kernel protection domain cooperative scheduling context needs +access to IO ports, for device initialization. Other protection +domains may also require such access. The IO Privilege Level (IOPL) +that is assigned to a protection domain using the relevant bits in the +EFLAGS field could be set according to whether IO port access is +required in that protection domain. However, this would introduce +additional complexity and overhead in the critical system call and +return dispatchers. Instead, the IOPL is always set to block IO +access from the cooperative scheduling context. Port IO instructions +in that context will then generate general protection faults, and the +exception handler decodes and emulates authorized port IO +instructions. + +Interrupts are handled at ring level 2, since they do not use any +privileged instructions. They do cause the interrupt flag to be +cleared as they are delivered. The interrupt flag can only be +modified by instructions executing at a ring level that is numerically +less than or equal to the IOPL. Each interrupt handler needs to set +the interrupt flag using the IRET instruction when it returns. +Protection domains that require access to port IO (currently just the +kernel protection domain) are configured with an IOPL of 3 whereas +others are configured with an IOPL of 2. That is why interrupts are +configured to run at ring level 2. Interrupts are only enabled in the +application protection domain. + +Some interrupt handlers require access to port IO, and all are +permitted such access, since they need it anyway for restoring the +interrupt flag when returning. IO port access is a very powerful +privilege, since it can be used to remap MMIO regions of PCI devices, +reconfigure PCI devices, etc. Thus, further restricting access to IO +ports may improve the robustness of the system, but would increase +complexity and space requirements and possibly necessitate additional +context switches, since IO port access is controlled by the combined +settings of IOPL as well as an optional IO bitmap in the TSS. + +### Interrupt and Exception Dispatching + +Primary implementation sources: + - cpu/x86/init/common/interrupt.h + +Separate stacks are allocated for dispatching interrupts and +exceptions. However, to save space, the main bodies of some interrupt +and exception handlers are run on the main stack. A handler may +expect to have access to data from the interrupt or exception stack, +so the interrupt or exception dispatcher copies that data prior to +pivoting to the main stack and executing the handler. + +### Protection Domain Control Structures (PDCSes) + +Each protection domain is managed by the kernel and privileged +functions using a PDCS. The PDCS structure is entirely +software-defined. The initial protection domain plugin does not +support re-entrant protection domains to simplify the implementation +of the plugin by enabling domain-specific information (e.g. system +call return address) to be trivially stored in each PDCS. + +### Paging-Based Protection Domains + +Primary implementation sources: + + - cpu/x86/mm/paging-prot-domains.c + - cpu/x86/mm/syscalls-int.c + - cpu/x86/mm/syscalls-int-asm.S + +#### Introduction + +Only a single page table is used for all protection domains. A flat +memory model is used. Almost all linear-to-physical address mappings +are identity mappings, with the exceptions being the MMIO and metadata +regions. The X86 port of Contiki currently only supports at most one +MMIO and one metadata range per driver, and the paging-based +protection domain implementation always starts at particular linear +addresses when mapping an MMIO or metadata range. This may reduce +overhead, due to the way protection domain switches are implemented. + +#### System Call and Return Dispatching + +The system call dispatcher executes at ring level 0, since it uses the +privileged INVLPG or MOV CR3 instructions to invalidate TLB entries. +The dispatcher modifies page table entries to grant only the +permissions required by the protection domain being activated. It +then optionally uses the INVLPG instruction to invalidate any TLB +entries for any page table entries that were modified. If INVLPG is +not used to invalidate specific TLB entries, then CR3 is reloaded to +invalidate the entire TLB (global entries would be excluded, but they +are not used in this implementation). + +It is more efficient to always start at a particular linear address +when mapping an MMIO or metadata region, since the page table entries +for that region can be updated to unmap any previous region of that +type, map the new region, and then invalidated to cause the new +settings to take effect. The alternative using an identity +linear-to-physical address mapping for regions would be to unmap the +previous region by editing one set of page table entries and to then +map the new region by editing a different set of page table entries +and to finally perform invalidations for both sets of page table +entries. Another drawback of such an identity address mapping is that +additional page tables may need to be allocated to represent the +various MMIO regions, since page tables are indexed by linear address +and MMIO regions are often at high physical addresses. Note that this +is specific to MMIO regions, since metadata regions are not at +particularly high physical addresses. Additionally, if different base +linear addresses are used, it is necessary to communicate those to the +system call handler code so that the regions can be accessed. This +would require care to prevent an adversary from manipulating the +addresses and it may increase complexity. + +The overall process of handling a system call can be illustrated at a +high level as follows. Some minor steps are omitted in the interest +of clarity and brevity. + +``` + == BEGIN Client protection domain ========================================== + -- BEGIN Caller ------------------------------------------------------------ + 1. Call system call stub. + -- + 20. Continue execution... + -- END Caller -------------------------------------------------------------- + -- BEGIN System call stub -------------------------------------------------- + 2. Already in desired (server) protection domain? + - No: Issue software interrupt #100 to request system call. + - Yes: Jump to system call body. + -- END System call stub ---------------------------------------------------- + == END Client protection domain ============================================ + == BEGIN Ring level 0 ====================================================== + -- BEGIN System call dispatcher--------------------------------------------- + 3. Check that the requested system call is allowed. Get entrypoint. + 4. Check that the server protection domain is available (not yet present + in the protection domain call stack) and then mark it as busy. + 5. Save the caller return address from the main stack into the client + PDCS. + 6. Overwrite the caller return address on the main stack to point to + system call return stub. + 7. Push server protection domain onto protection domain call stack. + 8. Update the interrupt return stack EIP to start of system call body. + 9. Update and invalidate page table entries to grant only the permissions + required by the server protection domain. + 10. Update interrupt flag to disable interrupts, since interrupts are only + enabled in app protection domain, which exports no system calls. + 11. Perform interrupt return (IRET). + -- END System call dispatcher ---------------------------------------------- + -- BEGIN System call return dispatcher ------------------------------------- + 15. Mark protection domain on top of protection domain call stack as + available. + 16. Retrieve the caller return address from the kernel data structure for + the client protection domain and use it to overwrite the EIP in the + interrupt return stack. + 17. Update and invalidate page table entries to grant only the permissions + required by the client protection domain. + 18. Update interrupt flag to only enable interrupts if returning to app + protection domain cooperative scheduling context. + 19. Perform interrupt return (IRET). + -- END System call dispatcher ---------------------------------------------- + == END Ring level 0 ======================================================== + == BEGIN Server protection domain ========================================== + -- BEGIN System call body -------------------------------------------------- + 12. Execute the work for the requested system call. + 13. Return (to system call return stub, unless invoked from server + protection domain, in which case return is to caller). + -- END System call body ---------------------------------------------------- + -- BEGIN System call return stub ------------------------------------------- + 14. Issue software interrupt #101 to request system call return. + -- END System call return stub --------------------------------------------- + == END Server protection domain ============================================ +``` + +The first step in performing a system call is to invoke a system call +stub that actually issues the software interrupt to request a system +call dispatch. This approach reduces disruption to existing code, +since macros are used to generate separate stubs and corresponding +system call bodies with a single system call signature definition. + +#### Memory Layout + +The approximate memory layout of the system is depicted below, +starting with the highest physical addresses and proceeding to lower +physical addresses. Optional permissions are denoted with +parentheses. See cpu/x86/quarkX1000_paging.ld for details of how this +memory layout is implemented. + +``` + | Kernel | App | Other | + ... +--------+--------+--------+ + +------------------------------------------+ | | | | + | Domain X MMIO | | | | (RW) | + +------------------------------------------+ | | | | + ... | | | | + +------------------------------------------+ | | | | + | Domain X DMA-accessible metadata | | | | (RW) | + | (section .dma_bss) | | | | | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Domain X metadata (section .meta_bss) | | | | (RW) | + +------------------------------------------+ | | | | + ... | | | | + +------------------------------------------+ | | | | + | Kernel-private data | | RW | | | + | (sections .prot_dom_bss, .gdt_bss, etc.) | | | | | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | System call data (section .syscall_bss) | | RW | R | R | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Kernel-owned data (section .kern_bss) | | RW | R | R | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Page-aligned, Kernel-owned data | | RW | R | R | + | (section .page_aligned_kern_bss) | | | | | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Common data | | RW | RW | RW | + | (sections .data, .rodata*, .bss, etc.) | | | | | + +------------------------------------------+ | | | | + (not-present guard band page) | | | | + +------------------------------------------+ | | | | + | Exception stack | | RW | RW | RW | + | (section .exc_stack) | | | | | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Interrupt stack | | RW | RW | RW | + | (section .int_stack) | | | | | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Main stack (section .main_stack) | | RW | RW | RW | + +------------------------------------------+ | | | | + (not-present guard band page) | | | | + +------------------------------------------+ | | | | + | Main code (.text) | | RX | RX | RX | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Bootstrap code (section .boot_text) | | | | | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Multiboot header | | | | | + +------------------------------------------+ | | | | + ... +``` + +The only protection domain that is permitted to access kernel-owned +data is the kernel protection domain. Some devices can also be +instructed to perform DMA to kernel-owned data, although that is an +incorrect configuration. + +Paging only differentiates between memory accesses from ring 3 (user +level) and those from rings 0-2 (supervisor level). To avoid granting +code running in the preemptive scheduling context supervisory write +access to kernel data structures (including the page tables), those +structures are marked read-only (except when the kernel protection +domain is active) and the Write Protect (WP) bit in Control Register 0 +(CR0) is cleared only when it is necessary to update a write-protected +structure. Only ring 0 is allowed to modify CR0. + +Optional metadata for each protection domain is intended to only be +accessible from the associated protection domain and devices. + +Read accesses to executable code have not been observed to be needed +in at least a limited set of tests, but they are permitted, since +paging does not support an execute-only permission setting. On the +other hand, the Execute-Disable feature is used to prevent execution +of non-code memory regions. All non-startup code is mapped in all +protection domains. Limiting the code that is executable within each +protection domain to just the code that is actually needed within that +protection domain could improve the robustness of the system, but it +is challenging to determine all code that may be needed in a given +protection domain (e.g. all needed library routines). + +Stack accesses to non-stack memory are not needed, but they are +permitted. However, one page of unmapped linear address space is +placed above and below the stacks to detect erroneous stack accesses +to those linear address regions, which are the types of accesses most +likely to occur during a stack overflow or underflow condition. The +main stack is placed just below the interrupt stack, which is just +below the exception stack. Stack overflows are more common than stack +underflows, which motivates arranging the stacks such that an overflow +from a less-critical stack will not affect a more-critical stack. +Furthermore, the main stack is the most likely to overflow, since the +code that uses it is typically the most voluminous and difficult to +characterize. That provides additional motivation for positioning it +such that an overflow results in an immediate page fault. An +alternative design placing each stack on a separate group of +contiguous pages may improve the robustness of the system by +permitting the insertion of unmapped guard pages around them to +generate page faults in the event an overflow or underflow occurs on +any stack. However, that would consume additional memory. + +Data in the .rodata sections is marked read/write, even though it may +be possible to improve the robustness of the system by marking that +data as read-only. Doing so would introduce additional complexity +into the system. + +### Pointer Validation + +Primary implementation sources: + - cpu/x86/mm/syscalls.h + +At the beginning of each system call routine, it is necessary to check +that any untrusted pointer that could have been influenced by a caller +(i.e. a stack parameter or global variable) refers to a location above +the return address and to halt otherwise. This is to prevent a +protection domain from calling a different protection domain and +passing a pointer that references a location in the callee's stack +other than its parameters to influence the execution of the callee in +an unintended manner. For example, if an incoming pointer referenced +the return address, it could potentially redirect execution with the +privileges of the callee protection domain. + +It is also necessary to check that the pointer is either within the +stack region or the shared data region (or a guard band region, since +that will generate a fault) to prevent redirection of data accesses to +MMIO or metadata regions. + +The pointer is both validated and copied to a new storage location, +which must be within the callee's local stack region (excluding the +parameter region). This is to mitigate scenarios such as two pointers +being validated and an adversary later inducing a write through one of +the pointers to the other pointer to corrupt the latter pointer before +it is used. + +Any pointer whose value is fixed at link or load time does not need to +be validated prior to use, since no adversary within the defined +threat model is able to influence the link or load process. + +### DMA Restrictions + +Primary implementation sources: + - cpu/x86/drivers/quarkX1000/imr.c + - cpu/x86/drivers/quarkX1000/imr-conf.c + +The CPU is not the only agent with the ability to issue requests to +the interconnect within the SoC. For example, SoC peripherals such as +the Ethernet driver use DMA to efficiently access memory buffers. +This could introduce a risk that DMA could be used to bypass the +memory protections enforced on the CPU by segmentation or paging. For +example, a device driver could instruct a device to access a memory +region to which the kernel has not granted the driver's protection +domain permission to access. + +The Isolated Memory Region (IMR) feature is configured to restrict the +memory that can be accessed by system agents other than the CPU [3]. +It only allows those system agents to access portions of the Contiki +memory space that are specifically intended to be used with DMA. The +source code for each protection domain specifies that its optional +metadata region needs to be accessible from other system agents +besides the CPU by using ATTR_BSS_DMA instead of ATTR_BSS_META when +allocating storage for the metadata. + +Extending the Framework +----------------------- + +### Adding a New Protection Domain + +The following steps are required. See the existing device drivers for +examples of various types of protection domains and how they are +initialized. + + - Allocate storage for the PDCS and the corresponding + client-accessible data structure using the PROT_DOMAINS_ALLOC + macro. + - Apply the ATTR_BSS_META attribute to the metadata structure, if + applicable. Apply the ATTR_BSS_DMA attribute instead if the + metadata structure needs to be DMA-accessible. Pad the metadata + structure to completely fill an integer multiple of the minimum + page size, 4096, when paging-based protection domains are in use. + See the definition of quarkX1000_eth_meta_t for an example. + - Perform the following steps during boot stage 2: + - Initialize the protection domain ID in the client-accessible data + structure using the PROT_DOMAINS_INIT_ID macro. + - Register the domain. See prot-domains.c:prot_domains_init for an + example of registering a non-driver protection domain. See + cpu/x86/drivers/quarkX1000/eth.c:quarkX1000_eth_init for an + example of registering a PCI driver protection domain with an + MMIO region and a metadata region. + +### Adding a New System Call + +The following steps are required: + + - Define the system call procedure using the SYSCALLS_DEFINE or + SYSCALLS_DEFINE_SINGLETON macro. See + cpu/x86/drivers/legacy_pc/uart-16x50.c:uart_16x50_tx for an example + of a non-singleton system call. See + cpu/x86/drivers/quarkX1000/eth.c:quarkX1000_eth_send for an example + of a singleton system call. A singleton system call is one for + which at most one server protection domain will be associated with + it. + - During boot phase 2, associate the system call with one or more + server protection domains using the SYSCALLS_AUTHZ macro. + +Usage +----- + +To enable protection domain support, add +"X86_CONF_PROT_DOMAINS=paging" to the command line. + +The paging option accepts a sub-option to determine whether the TLB is +fully- or selectively-invalidated during protection domain switches. +By default, full invalidation is selected. Set the +X86_CONF_USE_INVLPG variable to 1 to override the default. + +References +---------- + +[1] J. H. Saltzer, "Protection and the Control of Information Sharing + in Multics," Commun. ACM, vol. 17, no. 7, pp. 388-402, Jul. 1974. + +[2] https://github.com/contiki-os/contiki/wiki/Processes + +[3] "Intel(R) Quark(TM) SoC X1000 Secure Boot Programmer's Reference + Manual," + http://www.intel.com/support/processors/quark/sb/CS-035228.htm diff --git a/cpu/x86/mm/gdt-layout.h b/cpu/x86/mm/gdt-layout.h new file mode 100644 index 000000000..8a5af6cbf --- /dev/null +++ b/cpu/x86/mm/gdt-layout.h @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_GDT_LAYOUT_H_ +#define CPU_X86_MM_GDT_LAYOUT_H_ + +#include "prot-domains.h" + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING +/** + * Number of fixed GDT descriptors. Additional descriptors may be defined + * outside of gdt.c. + */ +#define GDT_NUM_FIXED_DESC 7 +#else +#define GDT_NUM_FIXED_DESC 3 +#endif + +#define GDT_IDX_NULL 0 +/** + * Flat code segment, used at boot and also for the rest of the system's + * runtime when protection domains are disabled + */ +#define GDT_IDX_CODE_FLAT 1 +/** + * Flat data segment, used at boot and also for the rest of the system's + * runtime when protection domains are disabled + */ +#define GDT_IDX_DATA_FLAT 2 + +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE +/** Default (post-boot) code segment */ +#define GDT_IDX_CODE 3 +/** + * Same bounds and permissions as default code segment, but at the interrupt + * handler privilege level + */ +#define GDT_IDX_CODE_INT 4 +/** Stack segment for interrupt handlers */ +#define GDT_IDX_STK_INT 5 + +#define GDT_IDX_CODE_EXC GDT_IDX_CODE_FLAT +/** Default data segment used by code at all privilege levels */ +#define GDT_IDX_DATA 6 +#define GDT_IDX_STK GDT_IDX_DATA +#define GDT_IDX_STK_EXC GDT_IDX_DATA_FLAT +#else +#define GDT_IDX_CODE GDT_IDX_CODE_FLAT +#define GDT_IDX_CODE_INT GDT_IDX_CODE_FLAT +#define GDT_IDX_CODE_EXC GDT_IDX_CODE_FLAT +#define GDT_IDX_DATA GDT_IDX_DATA_FLAT +#define GDT_IDX_STK GDT_IDX_DATA_FLAT +#define GDT_IDX_STK_INT GDT_IDX_DATA_FLAT +#define GDT_IDX_STK_EXC GDT_IDX_DATA_FLAT +#endif + +#define GDT_SEL(idx, rpl) (((idx) << 3) | (rpl)) + +#define DT_SEL_GET_IDX(sel) ((sel) >> 3) + +#define DT_SEL_GET_RPL(sel) ((sel) & 3) + +#define GDT_SEL_NULL GDT_SEL(GDT_IDX_NULL, 0) +#define GDT_SEL_CODE_FLAT GDT_SEL(GDT_IDX_CODE_FLAT, PRIV_LVL_EXC) +#define GDT_SEL_DATA_FLAT GDT_SEL(GDT_IDX_DATA_FLAT, PRIV_LVL_EXC) + +#define GDT_SEL_CODE GDT_SEL(GDT_IDX_CODE, PRIV_LVL_USER) +#define GDT_SEL_CODE_INT GDT_SEL(GDT_IDX_CODE_INT, PRIV_LVL_INT) +#define GDT_SEL_CODE_EXC GDT_SEL(GDT_IDX_CODE_EXC, PRIV_LVL_EXC) + +#define GDT_SEL_DATA GDT_SEL(GDT_IDX_DATA, PRIV_LVL_EXC) + +#define GDT_SEL_STK GDT_SEL(GDT_IDX_STK, PRIV_LVL_USER) +#define GDT_SEL_STK_INT GDT_SEL(GDT_IDX_STK_INT, PRIV_LVL_INT) +#define GDT_SEL_STK_EXC GDT_SEL(GDT_IDX_STK_EXC, PRIV_LVL_EXC) + +#endif /* CPU_X86_MM_GDT_LAYOUT_H_ */ + diff --git a/cpu/x86/mm/paging-prot-domains.c b/cpu/x86/mm/paging-prot-domains.c new file mode 100644 index 000000000..6c28c03e2 --- /dev/null +++ b/cpu/x86/mm/paging-prot-domains.c @@ -0,0 +1,297 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "dma.h" +#include "gdt.h" +#include "gdt-layout.h" +#include "helpers.h" +#include "idt.h" +#include "paging.h" +#include "prot-domains.h" +#include "segmentation.h" +#include "stacks.h" +#include "syscalls.h" +#include "tss.h" + +/*#define DBG_PAGE_ALLOC*/ + +/* Enable PAE-mode paging */ +#define CR4_PAE BIT(5) + +/* Extended Feature Enables MSR */ +#define MSR_EFER 0xC0000080 + +/* Enable Execute Disable bit support */ +#define EFER_NXE BIT(11) + +/* Root page-directory-pointer table */ +static pdpt_t root_pgtbl __attribute__((aligned(32))) ATTR_BSS_KERN; +/* Although the following page tables must be page-aligned, it is infeasible to + * apply the "aligned(4096)" attribute for the reasons described in the linker + * script. + */ +/* Second-level page directory */ +static page_table_t + second_lvl_pgtbl ATTR_BSS_KERN_PAGE_ALIGNED; +/* Leaf-level page table */ +static page_table_t leaf_pgtbl ATTR_BSS_KERN_PAGE_ALIGNED; + +#define LINEAR_ADDR_BOUND (MIN_PAGE_SIZE * ENTRIES_PER_PAGE_TABLE) + +/*---------------------------------------------------------------------------*/ +void +prot_domains_reg(dom_client_data_t *dcd, + uintptr_t mmio, + size_t mmio_sz, + uintptr_t meta, + size_t meta_sz, + bool pio) +{ + dom_id_t dom_id = dcd->dom_id; + volatile struct dom_kern_data *dkd = + prot_domains_kern_data + dom_id; + + /* All addresses and sizes must be page-aligned */ + if((PROT_DOMAINS_ACTUAL_CNT <= dom_id) || + ((mmio & (MIN_PAGE_SIZE - 1)) != 0) || + ((mmio_sz & (MIN_PAGE_SIZE - 1)) != 0) || + ((meta & (MIN_PAGE_SIZE - 1)) != 0) || + ((meta_sz & (MIN_PAGE_SIZE - 1)) != 0) || + (PROT_DOMAINS_MAX_MMIO_SZ < mmio_sz) || + (LINEAR_ADDR_BOUND < (PROT_DOMAINS_META_LINEAR_BASE + meta_sz))) { + halt(); + } + + if((dkd->flags & PROT_DOMAINS_FLAG_INITED) == PROT_DOMAINS_FLAG_INITED) { + halt(); + } + + dkd->mmio = mmio; + dkd->mmio_sz = mmio_sz; + dkd->meta = meta; + dkd->meta_sz = meta_sz; + dkd->flags = PROT_DOMAINS_FLAG_INITED; + if(pio) { + dkd->flags |= PROT_DOMAINS_FLAG_PIO; + } +} +/*---------------------------------------------------------------------------*/ +static void __attribute__((regparm(3))) +set_ptes(uintptr_t start_la, uintptr_t start_pa, uintptr_t end_pa, + pte_t template) +{ +#ifdef DBG_PAGE_ALLOC +#warning Checking page allocations at runtime. + + if(((start_la & (MIN_PAGE_SIZE - 1)) != 0) || + ((start_pa & (MIN_PAGE_SIZE - 1)) != 0) || + ((start_la & (MIN_PAGE_SIZE - 1)) != 0) || + ((end_pa & (MIN_PAGE_SIZE - 1)) != 0) || + (LINEAR_ADDR_BOUND <= (start_la + (end_pa - start_pa)))) { + halt(); + } +#endif + + while(start_pa < end_pa) { + template.addr = start_pa >> 12; + + leaf_pgtbl[start_la >> MIN_PAGE_SIZE_SHAMT] = template; + +#ifdef X86_CONF_USE_INVLPG + __asm__("invlpg %0" :: "m" (*(uint8_t *)start_la)); +#endif + + start_la += MIN_PAGE_SIZE; + start_pa += MIN_PAGE_SIZE; + } +} +/*---------------------------------------------------------------------------*/ +static void __attribute__((fastcall)) +set_ptes_identity_map(uintptr_t start_pa, uintptr_t end_pa, pte_t template) +{ + set_ptes(start_pa, start_pa, end_pa, template); +} +/*---------------------------------------------------------------------------*/ +static inline uint32_t __attribute__((always_inline)) +prot_domains_switch(dom_id_t from_id, dom_id_t to_id, + interrupt_stack_t *intr_stk) +{ + volatile dom_kern_data_t *from, *to; + + from = prot_domains_kern_data + from_id; + to = prot_domains_kern_data + to_id; + + if((from_id == DOM_ID_kern) || + (to_id == DOM_ID_kern)) { + pte_t to_kern_data_pte = { .raw = 0 }; + to_kern_data_pte.present = 1; + to_kern_data_pte.exec_disable = 1; + /* The kernel data region should always be accessible to supervisory code, + * but it is only accessible to user mode in the kernel protection domain. + */ + to_kern_data_pte.user_accessible = 1; + if(to_id == DOM_ID_kern) { + to_kern_data_pte.writable = 1; + } + + set_ptes_identity_map((uintptr_t)&_sbss_kern_addr, + (uintptr_t)&_ebss_syscall_addr, + to_kern_data_pte); + + if(to_id != DOM_ID_kern) { + to_kern_data_pte.user_accessible = 0; + to_kern_data_pte.writable = 0; + } + + set_ptes_identity_map((uintptr_t)&_ebss_syscall_addr, + (uintptr_t)&_ebss_kern_addr, + to_kern_data_pte); + } + + if(to->mmio_sz != 0) { + pte_t pte = { .raw = 0 }; + pte.present = 1; + pte.exec_disable = 1; + pte.user_accessible = 1; + pte.writable = 1; + /* disable caching of MMIO accesses */ + pte.pcd = 1; + + set_ptes(PROT_DOMAINS_MMIO_LINEAR_BASE, + to->mmio, + to->mmio + to->mmio_sz, + pte); + } + if(to->mmio_sz < from->mmio_sz) { + pte_t pte = { .raw = 0 }; + + set_ptes_identity_map(PROT_DOMAINS_MMIO_LINEAR_BASE + to->mmio_sz, + PROT_DOMAINS_MMIO_LINEAR_BASE + from->mmio_sz, + pte); + } + + if(to->meta_sz != 0) { + pte_t pte = { .raw = 0 }; + pte.present = 1; + pte.exec_disable = 1; + pte.user_accessible = 1; + pte.writable = 1; + + set_ptes(PROT_DOMAINS_META_LINEAR_BASE, + to->meta, + to->meta + to->meta_sz, + pte); + } + if(to->meta_sz < from->meta_sz) { + pte_t pte = { .raw = 0 }; + + set_ptes_identity_map(PROT_DOMAINS_META_LINEAR_BASE + to->mmio_sz, + PROT_DOMAINS_META_LINEAR_BASE + from->mmio_sz, + pte); + } + +#ifndef X86_CONF_USE_INVLPG + __asm__ __volatile__ ("mov %%cr3, %%eax\n\t" + "mov %%eax, %%cr3\n\t" ::: "eax"); +#endif + + return 0; +} +/*---------------------------------------------------------------------------*/ +void +prot_domains_gdt_init(void) +{ + gdt_copy_desc_change_dpl(GDT_IDX_DATA, GDT_IDX_DATA_FLAT, PRIV_LVL_USER); + gdt_copy_desc_change_dpl(GDT_IDX_STK_INT, GDT_IDX_STK_EXC, PRIV_LVL_INT); +} +/*---------------------------------------------------------------------------*/ +void +prot_domains_impl_init(void) +{ + pte_t pte = { .raw = 0 }; + + syscalls_int_init(); + + /* Initialize page table: */ + + pte.present = 1; + pte.addr = ((uint32_t)second_lvl_pgtbl) >> MIN_PAGE_SIZE_SHAMT; + + root_pgtbl[0] = pte; + + pte.writable = 1; + pte.user_accessible = 1; + pte.addr = ((uint32_t)leaf_pgtbl) >> MIN_PAGE_SIZE_SHAMT; + + second_lvl_pgtbl[0] = pte; + + /* Map code sections: */ + + pte.writable = 0; + set_ptes_identity_map((uintptr_t)&_stext_addr, (uintptr_t)&_etext_addr, pte); + + /* Map data sections: */ + + pte.writable = 1; + pte.exec_disable = 1; + set_ptes_identity_map((uintptr_t)stacks_main, + (uintptr_t)stacks_main + + STACKS_SIZE_MAIN + + STACKS_SIZE_EXC + + STACKS_SIZE_INT, + pte); + set_ptes_identity_map((uintptr_t)&_sdata_addr, (uintptr_t)&_edata_addr, pte); + + /* Enable XD bit support */ + __asm__ __volatile__ ("wrmsr" :: "c" (MSR_EFER), "a" (EFER_NXE), "d" (0)); + + /* Enable PAE */ + __asm__ __volatile__ ("mov %%cr4, %%eax\n\t" + "or %0, %%eax\n\t" + "mov %%eax, %%cr4\n\t" + : + : "r" (CR4_PAE) + : "eax"); + + /* Load CR3 */ + __asm__ __volatile__ ("mov %0, %%cr3" :: "r" (root_pgtbl)); +} +/*---------------------------------------------------------------------------*/ +uintptr_t +prot_domains_lookup_meta_phys_base(dom_client_data_t *drv) +{ + return prot_domains_kern_data[drv->dom_id].meta; +} +/*---------------------------------------------------------------------------*/ + +/* Enable inter-procedural optimization with procedures in the following file: + */ +#include "syscalls-int.c" diff --git a/cpu/x86/mm/paging-prot-domains.h b/cpu/x86/mm/paging-prot-domains.h new file mode 100644 index 000000000..0f7f54ea3 --- /dev/null +++ b/cpu/x86/mm/paging-prot-domains.h @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_PAGING_PROT_DOMAINS_H_ +#define CPU_X86_MM_PAGING_PROT_DOMAINS_H_ + +#include +#include +#include +#include "dma.h" +#include "helpers.h" +#include "paging.h" +#include "syscalls-int.h" + +struct dom_kern_data { + /** Base physical address of optional MMIO region */ + uintptr_t mmio; + /** Number of (contiguous) pages in MMIO region */ + size_t mmio_sz; + /** Base physical address of optional metadata region */ + uintptr_t meta; + /** Number of (contiguous) pages in metadata region */ + size_t meta_sz; + /** Flags are defined with the prefix PROT_DOMAINS_FLAG in prot-domains.h */ + uint32_t flags; + /** + * Original return address from call stack when this protection domain + * invoked some other protection domain. This serves to control the return + * entrypoint. The callee is not permitted to modify this value (unless the + * callee is the kernel protection domain). + */ + uintptr_t orig_ret_addr; + + /* align to next-larger power of 2 to enable usage of shifting instead of + * multiplication to index an array of these structures. + */ +} __attribute__((aligned(32))); + +/** Linear base address at which to map the MMIO region. */ +#define PROT_DOMAINS_MMIO_LINEAR_BASE (MIN_PAGE_SIZE + (uintptr_t)&_ebss_kern_addr) + +/** Maximum supported size of MMIO region */ +#define PROT_DOMAINS_MAX_MMIO_SZ 0x4000 + +/** Linear base address at which to map the metadata region */ +#define PROT_DOMAINS_META_LINEAR_BASE \ + (MIN_PAGE_SIZE + (PROT_DOMAINS_MMIO_LINEAR_BASE + PROT_DOMAINS_MAX_MMIO_SZ)) + +#define PROT_DOMAINS_META_OFF_TO_PHYS(off, meta_phys_base) \ + ((meta_phys_base) + ((off) - PROT_DOMAINS_META_LINEAR_BASE)) + +/** Any MMIO region mapping always starts at a particular linear address. */ +#define PROT_DOMAINS_MMIO(dcd) PROT_DOMAINS_MMIO_LINEAR_BASE +/** + * Any metadata region mapping always starts at a particular linear address. + */ +#define PROT_DOMAINS_META(dcd) PROT_DOMAINS_META_LINEAR_BASE + +#define PROT_DOMAINS_ENTER_ISR(exc) \ + PROT_DOMAINS_ENTER_ISR_COMMON(exc) +#define PROT_DOMAINS_LEAVE_ISR(exc) PROT_DOMAINS_LEAVE_ISR_COMMON(exc) + +/* Enable paging */ +#define CR0_PG BIT(31) +/* Enable write protection in supervisor mode */ +#define CR0_WP BIT(16) +/* Enable protected mode */ +#define CR0_PE BIT(0) + +/** + * \brief Enable or disable write protection enforcement in supervisor mode. + * When disabled, supervisory code (i.e. code running at ring levels + * 0-2) is permitted to write to pages that are marked read-only in + * page tables. + * + * \param en Set to true to enable write protection enforcement. + */ +static inline void prot_domains_set_wp(bool en) +{ + uint32_t cr0_val = CR0_PG | CR0_PE; + if(en) { + cr0_val |= CR0_WP; + } + __asm__ __volatile__ ("mov %0, %%cr0" :: "r"(cr0_val)); +} + +#endif /* CPU_X86_MM_PAGING_PROT_DOMAINS_H_ */ diff --git a/cpu/x86/mm/paging.h b/cpu/x86/mm/paging.h new file mode 100644 index 000000000..7882ceab2 --- /dev/null +++ b/cpu/x86/mm/paging.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_PAGING_H_ +#define CPU_X86_MM_PAGING_H_ + +#include + +/** + * Page table entry format for PAE mode page table. See Intel Combined Manual, + * Vol. 3, Section 4.4 for more details. + */ +typedef union pte { + struct { + uint64_t present : 1; + uint64_t writable : 1; + uint64_t user_accessible : 1; + uint64_t pwt : 1; /**< Specify write-through cache policy */ + uint64_t pcd : 1; /**< Disable caching */ + uint64_t accessed : 1; + uint64_t dirty : 1; + uint64_t : 5; + uint64_t addr : 51; + uint64_t exec_disable : 1; + }; + uint64_t raw; +} pte_t; + +#define ENTRIES_PER_PDPT 4 +#define ENTRIES_PER_PAGE_TABLE 512 + +typedef pte_t pdpt_t[ENTRIES_PER_PDPT]; +typedef pte_t page_table_t[ENTRIES_PER_PAGE_TABLE]; + +#define MIN_PAGE_SIZE_SHAMT 12 +#define MIN_PAGE_SIZE (1 << MIN_PAGE_SIZE_SHAMT) + +#endif /* CPU_X86_MM_PAGING_H_ */ diff --git a/cpu/x86/mm/prot-domains.c b/cpu/x86/mm/prot-domains.c new file mode 100644 index 000000000..593da98e2 --- /dev/null +++ b/cpu/x86/mm/prot-domains.c @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "prot-domains.h" + +#include "gdt.h" +#include +#include "interrupt.h" +#include +#include +#include "syscalls.h" +#include "stacks.h" + +static dom_kern_data_t __attribute__((section(".kern_prot_dom_bss"))) + PROT_DOMAINS_PDCS_NM(kern_dcd); +static dom_client_data_t ATTR_BSS_KERN kern_dcd; +static dom_kern_data_t __attribute__((section(".app_prot_dom_bss"))) + PROT_DOMAINS_PDCS_NM(app_dcd); +static dom_client_data_t ATTR_BSS_KERN app_dcd; + +/*---------------------------------------------------------------------------*/ +void +prot_domains_init(void) +{ + segment_desc_t desc; + + gdt_lookup(GDT_IDX_CODE_EXC, &desc); + + SEG_SET_FLAG(desc, DPL, PRIV_LVL_INT); + gdt_insert(GDT_IDX_CODE_INT, desc); + + SEG_SET_FLAG(desc, DPL, PRIV_LVL_USER); + gdt_insert(GDT_IDX_CODE, desc); + + PROT_DOMAINS_INIT_ID(kern_dcd); + prot_domains_reg(&kern_dcd, 0, 0, 0, 0, true); + PROT_DOMAINS_INIT_ID(app_dcd); + prot_domains_reg(&app_dcd, 0, 0, 0, 0, false); + + prot_domains_impl_init(); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/mm/prot-domains.h b/cpu/x86/mm/prot-domains.h new file mode 100644 index 000000000..f7dc84e3c --- /dev/null +++ b/cpu/x86/mm/prot-domains.h @@ -0,0 +1,275 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_PROT_DOMAINS_H_ +#define CPU_X86_MM_PROT_DOMAINS_H_ + +#if !__ASSEMBLER__ +#include +#include +#include +#include "helpers.h" +#endif + +#define X86_CONF_PROT_DOMAINS__NONE 0 +#define X86_CONF_PROT_DOMAINS__PAGING 1 + +/** Privilege level (ring) for exception handlers and other supervisory code */ +#define PRIV_LVL_EXC 0 +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE +/** Privilege level for interrupt handlers */ +#define PRIV_LVL_INT 2 +/** Default privilege level */ +#define PRIV_LVL_USER 3 +#else +#define PRIV_LVL_INT PRIV_LVL_EXC +#define PRIV_LVL_USER PRIV_LVL_EXC +#endif + +#define DOM_ID_kern 0 +#define DOM_ID_app 1 + +/** I/O Privilege Level */ +#define EFLAGS_IOPL(pl) ((pl) << 12) +/** Interrupt Enable Flag */ +#define EFLAGS_IF (1u << 9) + +#if !__ASSEMBLER__ + +/** Protection domain ID */ +typedef uint32_t dom_id_t; + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING +#include "paging-prot-domains.h" +#endif + +/* The following symbols are defined in the linker script */ +/** Bounds for .text section */ +extern uint32_t _stext_addr, _etext_addr; + +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE + +/** Metadata that should not be DMA-accessible */ +#define ATTR_BSS_META __attribute__((section(".meta_bss"))) +/** Kernel-owned data */ +#define ATTR_BSS_KERN __attribute__((section(".kern_bss"))) +/** Code that should only be executable during bootup */ +#define ATTR_CODE_BOOT __attribute__((section(".boot_text"))) + +/** + * Domain-defined metadata must be page-aligned, which is implemented by the + * linker script for variables with this attribute. + */ +#define ATTR_BSS_KERN_PAGE_ALIGNED \ + __attribute__((section(".page_aligned_kern_bss"))) + +/** Bounds for .kern_data, .syscall_data, and .prot_dom_data sections */ +extern uint32_t _sbss_kern_addr, _ebss_kern_addr; +/** End of .syscall_data section */ +extern uint32_t _ebss_syscall_addr; +/** Bounds for other data sections */ +extern uint32_t _sdata_addr, _edata_addr; + +/** + * If set, this protection domain is already in the call stack and is not + * available for nested invocations. + */ +#define PROT_DOMAINS_FLAG_BUSY BIT(0) +/** If set, this protection domain requires port I/O access. */ +#define PROT_DOMAINS_FLAG_PIO BIT(1) +/** If set, this protection domain is initialized. */ +#define PROT_DOMAINS_FLAG_INITED BIT(2) + +/** + * Data associated with each protection domain that should be fully accessible + * only to the kernel, with limited accesses and modifications permitted from + * other domains. Includes storage for system data structures. + */ +typedef struct dom_kern_data dom_kern_data_t; + +extern volatile dom_kern_data_t prot_domains_kern_data[]; +extern volatile dom_kern_data_t prot_domains_kern_data_end[]; + +#define PROT_DOMAINS_ACTUAL_CNT \ + (prot_domains_kern_data_end - prot_domains_kern_data) + +#define PROT_DOMAINS_GET_DOM_ID(dkd) \ + ((dom_id_t)((dkd) - prot_domains_kern_data)) + +void prot_domains_syscall_dispatcher(void); + +/** + * Data associated with each protection domain that is owned by clients of that + * domain and used to identify the domain. + */ +struct dom_client_data { + dom_id_t dom_id; +} __attribute__((packed)); + +/** Allocate the client-owned protection domain data structure. */ +#define PROT_DOMAINS_PDCS_NM(nm) _pdcs_##nm +#define PROT_DOMAINS_ALLOC(typ, nm) \ + static dom_kern_data_t __attribute__((section(".prot_dom_bss"))) \ + PROT_DOMAINS_PDCS_NM(nm); \ + static typ ATTR_BSS_KERN nm +#define PROT_DOMAINS_INIT_ID(nm) \ + (nm).dom_id = PROT_DOMAINS_GET_DOM_ID(&PROT_DOMAINS_PDCS_NM(nm)) + +/** + * Perform early initialization during boot stage 0 to prepare for boot stage 1 + */ +void prot_domains_gdt_init() ATTR_CODE_BOOT; +/** + * Perform initialization during boot stage 1 to prepare for kernel launch + */ +void prot_domains_init(); +void prot_domains_impl_init(); + +/* Return from cpu_boot_stage1 will invoke prot_domains_launch_kernel due to + * that return address being pushed on the stack by cpu_boot_stage0. + */ +#define prot_domains_leave_boot_stage1() + +/* Return from main will invoke prot_domains_launch_app due to that return + * address being pushed on the stack by cpu_boot_stage0. + */ +#define prot_domains_leave_main() + +void prot_domains_launch_kernel(void); + +/* Whenever changing this, update syscalls-int-asm.S:prot_domains_launch_kernel + * to match: + */ +#define PROT_DOMAINS_INIT_RET_ADDR_CNT 2 + +void app_main(void); +#define prot_domains_launch_app app_main + +#else + +#define ATTR_BSS_META +#define ATTR_BSS_KERN +#define ATTR_CODE_BOOT + +struct dom_client_data { + uintptr_t mmio; /**< MMIO range base address */ + uintptr_t meta; /**< Domain-defined metadata base address */ +}; + +/** Retrieve the MMIO base address for the specified protection domain. */ +#define PROT_DOMAINS_MMIO(dcd) ((dcd).mmio) + +/** Retrieve the metadata base address for the specified protection domain. */ +#define PROT_DOMAINS_META(dcd) ((dcd).meta) + +#define PROT_DOMAINS_ALLOC(typ, nm) static typ nm +#define PROT_DOMAINS_INIT_ID(nm) + +#define prot_domains_gdt_init() + +#define prot_domains_init() + +int main(void); +#define prot_domains_leave_boot_stage1 main +#define prot_domains_leave_main ENABLE_IRQ(); app_main + +#define PROT_DOMAINS_INIT_RET_ADDR_CNT 0 + +#endif + +/** + * Protection domain data readable by the client. It is used to control + * execution, so it should be protected from modifications by clients. + * Otherwise, there is a risk that one client could modify one of these + * structures used by another client to issue a system call, which could then + * cause the latter client to perform an unintended system call. + */ +typedef struct dom_client_data dom_client_data_t; + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__NONE +#define prot_domains_reg(dcd, mmio_, mmio_sz, meta_, meta_sz, pio) \ + (dcd)->mmio = (mmio_); \ + (dcd)->meta = (meta_) +#else +/** + * \brief Register a protection domain, which involves creating the + * necessary system data structures for it. + * + * \param dcd Client-accessible domain information + * \param mmio Optional base address for per-domain memory-mapped IO region + * \param mmio_sz Size of MMIO region + * \param meta Optional base address for per-domain metadata + * \param meta_sz Size of metadata + * \param pio Set to true if protection domain requires port IO access + */ +void prot_domains_reg(dom_client_data_t *dcd, + uintptr_t mmio, + size_t mmio_sz, + uintptr_t meta, + size_t meta_sz, + bool pio); +#endif + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__NONE +#define prot_domains_lookup_meta_phys_base(drv) 0 +#else +/** Lookup base physical address of metadata region for specified domain */ +uintptr_t prot_domains_lookup_meta_phys_base(dom_client_data_t *drv); +#endif + +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__PAGING +#define PROT_DOMAINS_META_OFF_TO_PHYS(off, meta_phys_base) \ + ((meta_phys_base) + (off)) +#endif + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__NONE +#define PROT_DOMAINS_ENTER_ISR(...) +#define PROT_DOMAINS_LEAVE_ISR(...) +#else +#define PROT_DOMAINS_ENTER_ISR_COMMON(exc) \ + ".if !" #exc "\n\t" \ + /* Save the current stack pointer into a callee-saved register. */ \ + "mov %%esp, %%ebx\n\t" \ + /* Pivot to the main stack of the interrupted context. */ \ + /* Interrupts never have an error code, so the offset is always 44. */ \ + /* No interrupt handlers use anything from the original interrupt stack, */ \ + /* so there is no need to copy anything from it to the main stack. */ \ + "mov 44(%%esp), %%esp\n\t" \ + ".endif\n\t" +#define PROT_DOMAINS_LEAVE_ISR_COMMON(exc) \ + /* Restore the interrupt/exception stack pointer. */ \ + ".if !" #exc "\n\t" \ + "mov %%ebx, %%esp\n\t" \ + ".endif\n\t" +#endif + +#endif /* !__ASSEMBLER__ */ + +#endif /* CPU_X86_MM_PROT_DOMAINS_H_ */ diff --git a/cpu/x86/mm/segmentation.h b/cpu/x86/mm/segmentation.h new file mode 100644 index 000000000..57b1b8aea --- /dev/null +++ b/cpu/x86/mm/segmentation.h @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_SEGMENTATION_H_ +#define CPU_X86_MM_SEGMENTATION_H_ + +#include + +#define SEG_FLAG(lbl, val) \ + (((val) & (~0u >> (32 - SEG_WIDTH_##lbl))) << SEG_SHAMT_##lbl) + +#define SEG_SET_FLAG(desc, lbl, val) \ + (desc).flags = ((desc).flags & ~SEG_FLAG(lbl, ~0u)) | SEG_FLAG(lbl, val) + +#define SEG_WIDTH_TYPE 4 +#define SEG_SHAMT_TYPE 0 +#define SEG_WIDTH_DESCTYPE 1 +#define SEG_SHAMT_DESCTYPE 4 +#define SEG_WIDTH_DPL 2 +#define SEG_SHAMT_DPL 5 +#define SEG_WIDTH_PRESENT 1 +#define SEG_SHAMT_PRESENT 7 +#define SEG_WIDTH_LIMIT_HI 4 +#define SEG_SHAMT_LIMIT_HI 8 +#define SEG_WIDTH_AVL 1 +#define SEG_SHAMT_AVL 12 +#define SEG_WIDTH_LONG_MODE 1 +#define SEG_SHAMT_LONG_MODE 13 +/* also used to indicate default operand and address size */ +#define SEG_WIDTH_DIRECTION 1 +#define SEG_SHAMT_DIRECTION 14 +#define SEG_WIDTH_GRAN 1 +#define SEG_SHAMT_GRAN 15 + +#define SEG_TYPE_DATA_RDWR SEG_FLAG(TYPE, 0x02) /* Read/Write */ +#define SEG_TYPE_CODE_EXRD SEG_FLAG(TYPE, 0x0A) /* Execute/Read */ +#define SEG_TYPE_TSS32_AVAIL SEG_FLAG(TYPE, 0x09) + +#define SEG_DESCTYPE_SYS SEG_FLAG(DESCTYPE, 0) +#define SEG_DESCTYPE_NSYS SEG_FLAG(DESCTYPE, 1) + +#define SEG_PRESENT SEG_FLAG(PRESENT, 1) + +#define SEG_DEFL_OPSZ_32BIT SEG_FLAG(DIRECTION, 1) + +#define SEG_GRAN_BYTE SEG_FLAG(GRAN, 0) +#define SEG_GRAN_PAGE SEG_FLAG(GRAN, 1) + +/** + * Segment descriptor. See Intel Combined Manual, + * Vol. 3, Section 3.4.5 for more details. + */ +typedef union segment_desc { + struct { + uint32_t lim_lo : 16; + uint32_t base_lo : 16; + uint32_t base_mid : 8; + uint32_t flags : 16; + uint32_t base_hi : 8; + }; + struct { + uint32_t raw_lo, raw_hi; + }; + uint64_t raw; +} segment_desc_t; + +static inline void +segment_desc_set_limit(segment_desc_t *c_this, uint32_t len) +{ + uint32_t limit = len - 1; + + SEG_SET_FLAG(*c_this, LIMIT_HI, limit >> 16); /* set limit bits 19:16 */ + c_this->lim_lo = limit; /* set limit bits 15:0 */ +} +/** + * \brief Initialize a segment descriptor. + * \param c_this Segment descriptor to be initialized. + * \param base Base address of region to be covered by segment descriptor. + * \param len Length to be specified by segment descriptor. The units may + * be bytes or pages, depending on the flags. + * \param flags Flags to be added to the default flags: present, default + * operand size of 32 bits, and high limit bits. + */ +static inline void +segment_desc_init(segment_desc_t *c_this, + uint32_t base, uint32_t len, uint16_t flags) +{ + c_this->raw = 0; + + /* Create the high 32 bit segment */ + c_this->base_mid = base >> 16; /* set base bits 23:16 */ + c_this->base_hi = base >> 24; /* set base bits 31:24 */ + + /* Create the low 32 bit segment */ + c_this->base_lo = base; /* set base bits 15:0 */ + + c_this->flags = SEG_FLAG(PRESENT, 1) | SEG_DEFL_OPSZ_32BIT | flags; + + /* This must be done after setting the other flags, or else it + * would be partially overridden. + */ + segment_desc_set_limit(c_this, len); +} +#endif /* CPU_X86_MM_SEGMENTATION_H_ */ diff --git a/cpu/x86/mm/stacks.c b/cpu/x86/mm/stacks.c new file mode 100644 index 000000000..60ccb0ebc --- /dev/null +++ b/cpu/x86/mm/stacks.c @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "stacks.h" + +uint8_t stacks_main[STACKS_SIZE_MAIN] + __attribute__((section(".main_stack"), aligned(4))); +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE +uint8_t stacks_int[STACKS_SIZE_INT] + __attribute__((section(".int_stack"), aligned(4))); +uint8_t stacks_exc[STACKS_SIZE_EXC] + __attribute__((section(".exc_stack"), aligned(4))); +#endif diff --git a/cpu/x86/mm/stacks.h b/cpu/x86/mm/stacks.h new file mode 100644 index 000000000..a1005d8e0 --- /dev/null +++ b/cpu/x86/mm/stacks.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_STACKS_H_ +#define CPU_X86_MM_STACKS_H_ + +#include "prot-domains.h" + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__NONE +#define STACKS_SIZE_INT 0 +#else +/** + * The necessary amount of space for the interrupt and exception stacks is + * determined by the amount of data pushed on the stack by the CPU when + * delivering an interrupt or exception, and by the additional data pushed + * on the stack by the interrupt dispatcher. See interrupt.h for more details. + */ +#define STACKS_SIZE_INT (14 * 4) +#endif + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING +/** + * The system call and return dispatchers use this stack, so its size was + * determined by observing their behavior. It is possible that the dispatchers + * could overflow the stack and overwrite data on the other stacks. An + * alternative design that would facilitate detection of such overflows would + * place the exception handler stack on a separate page surrounded by guard + * bands, but that would consume a substantial amount of additional memory. + * + * All stack sizes should be a multiple of 4 to accommodate a 4-byte alignment. + */ +#ifdef __clang__ +#define STACKS_SIZE_EXC 512 +#else +#define STACKS_SIZE_EXC 256 +#endif +#else +#define STACKS_SIZE_EXC STACKS_SIZE_INT +#endif +/** + * The combined size of the stacks should be an even multiple of the 4K page + * size so that they precisely fill some number of pages when paging-based + * protection domains are in use. The stacks are arranged contiguously by + * the linker scripts. See those and README.md for more details. + */ +#define STACKS_SIZE_MAIN (8192 - (STACKS_SIZE_INT + STACKS_SIZE_EXC)) + +#if !__ASSEMBLER__ +/** + * Stack for exception handlers. Also used for system call and return + * dispatchers when paging-based protection domains are enabled. + */ +extern uint8_t stacks_exc[STACKS_SIZE_EXC]; +/** Stack for interrupt handlers. */ +extern uint8_t stacks_int[STACKS_SIZE_INT]; +/** Main C stack. */ +extern uint8_t stacks_main[STACKS_SIZE_MAIN]; + +#define STACKS_INIT_TOP \ + ((uintptr_t)stacks_main + STACKS_SIZE_MAIN - \ + (PROT_DOMAINS_INIT_RET_ADDR_CNT * sizeof(uintptr_t))) + +#endif + +#endif /* CPU_X86_MM_STACKS_H_ */ diff --git a/cpu/x86/mm/syscalls-int-asm.S b/cpu/x86/mm/syscalls-int-asm.S new file mode 100644 index 000000000..1fe80310f --- /dev/null +++ b/cpu/x86/mm/syscalls-int-asm.S @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "syscalls-int.h" +#include "prot-domains.h" +#include "gdt-layout.h" +#include "stacks.h" + +.text + +/* Invoke the system call return dispatcher from the default privilege + * level + */ +.global prot_domains_sysret_stub +prot_domains_sysret_stub: + int $PROT_DOMAINS_SYSRET_DISPATCH_INT + +/* Invoke the system call dispatcher C routine */ +.global prot_domains_syscall_dispatcher +prot_domains_syscall_dispatcher: + mov %esp, %ecx /*< interrupt_stack_t *intr_stk */ + /* EDX already set to "dom_client_data_t to_dcd" by syscall stub */ + push %eax /*< syscalls_id_t syscall_id */ + call prot_domains_syscall_dispatcher_impl + /* fastcall convention, so callee pops arguments */ + iret + +/* Invoke the system call return dispatcher C routine */ +.global prot_domains_sysret_dispatcher +prot_domains_sysret_dispatcher: + mov %esp, %ecx /*< interrupt_stack_t *intr_stk */ + call prot_domains_sysret_dispatcher_impl + /* Zero caller-saved registers in case they contain secrets. The system call + * handlers and dispatchers need to preserve the callee-saved registers. + */ + xor %eax, %eax + xor %ecx, %ecx + xor %edx, %edx + iret + +.global prot_domains_launch_kernel +prot_domains_launch_kernel: + mov $GDT_SEL_DATA, %eax + mov %eax, %ds + mov %eax, %es + mov %eax, %fs + mov %eax, %gs + /* init interrupt return stack: */ + pushl $GDT_SEL_STK + lea stacks_main, %eax + /* matches STACKS_INIT_TOP, plus 4 since an address has been consumed: */ + add $(STACKS_SIZE_MAIN - 4), %eax + pushl %eax + pushl $EFLAGS_IOPL(PRIV_LVL_INT) + pushl $GDT_SEL_CODE + pushl $0 /* will be overwritten by syscall_dispatcher_impl */ + /* fastcall convention: */ + mov %esp, %ecx + call prot_domains_launch_kernel_impl + iretl diff --git a/cpu/x86/mm/syscalls-int.c b/cpu/x86/mm/syscalls-int.c new file mode 100644 index 000000000..1d1c77efb --- /dev/null +++ b/cpu/x86/mm/syscalls-int.c @@ -0,0 +1,298 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "prot-domains.h" +#include "tss.h" +#include "helpers.h" +#include "stacks.h" +#include "idt.h" +#include "syscalls.h" +#include "gdt.h" +#include "gdt-layout.h" +#include "interrupt.h" + +/** + * Current protection domain. Not protected, since it is just a convenience + * variable to avoid unneeded protection domain switches. + */ +dom_id_t cur_dom = DOM_ID_app; + +/* defined in syscalls-int-asm.S */ +void prot_domains_sysret_dispatcher(void); + +/* Maximum depth of inter-domain call stack */ +#define MAX_INTER_DOM_CALL_STK_SZ 4 + +/* Protected call stack for inter-domain system calls. The stack grows up. */ +static volatile dom_id_t ATTR_BSS_KERN + inter_dom_call_stk[MAX_INTER_DOM_CALL_STK_SZ]; + +/* Pointer to the next (free) slot in the inter-domain call stack */ +static int ATTR_BSS_KERN inter_dom_call_stk_ptr; + +/*---------------------------------------------------------------------------*/ +static inline void __attribute__((always_inline)) +update_eflags(dom_id_t from_id, dom_id_t to_id, interrupt_stack_t *intr_stk) +{ + if((to_id == DOM_ID_app) && + (DT_SEL_GET_RPL(intr_stk->cs) == PRIV_LVL_USER)) { + /* Only enable interrupts in the application protection domain cooperative + * scheduling context. + */ + intr_stk->eflags |= EFLAGS_IF; + } else { + intr_stk->eflags &= ~EFLAGS_IF; + } +} +/*---------------------------------------------------------------------------*/ +static inline void __attribute__((always_inline)) +dispatcher_tail(dom_id_t from_id, dom_id_t to_id, interrupt_stack_t *intr_stk) +{ + cur_dom = to_id; + + prot_domains_switch(from_id, to_id, intr_stk); + + prot_domains_set_wp(true); + + update_eflags(from_id, to_id, intr_stk); +} +/*---------------------------------------------------------------------------*/ +int main(void); +static inline void __attribute__((always_inline)) +syscall_dispatcher_tail(interrupt_stack_t *intr_stk, + dom_id_t to_id, + uint32_t syscall_eip) +{ + dom_id_t from_id; + volatile dom_kern_data_t *from_dkd, *to_dkd; + + to_dkd = prot_domains_kern_data + to_id; + + /* This implementation of protection domains is non-reentrant. For example, + * it stores the return address taken from the stack of a caller domain + * while dispatching a system call and stores it in a single field in the + * kernel data associated with that protection domain. That model does not + * permit reentrancy. + */ + if((to_dkd->flags & PROT_DOMAINS_FLAG_BUSY) == PROT_DOMAINS_FLAG_BUSY) { + halt(); + } + to_dkd->flags |= PROT_DOMAINS_FLAG_BUSY; + + /* Update the interrupt stack so that the IRET instruction will return to the + * system call entrypoint. + */ + intr_stk->eip = syscall_eip; + + /* Lookup the information for the caller */ + from_id = inter_dom_call_stk[inter_dom_call_stk_ptr - 1]; + from_dkd = prot_domains_kern_data + from_id; + + /* Save the current return address from the unprivileged stack to a protected + * location in the kernel-owned data structure. This enforces return + * entrypoint control. + */ + from_dkd->orig_ret_addr = *(uintptr_t *)intr_stk->esp; + /* Update the unprivileged stack so that when the system call body is + * complete, it will invoke the system call return stub. + */ + *((uintptr_t *)intr_stk->esp) = (uintptr_t)prot_domains_sysret_stub; + + if(MAX_INTER_DOM_CALL_STK_SZ <= inter_dom_call_stk_ptr) { + halt(); + } + inter_dom_call_stk[inter_dom_call_stk_ptr] = to_id; + + inter_dom_call_stk_ptr++; + + dispatcher_tail(from_id, to_id, intr_stk); +} +/*---------------------------------------------------------------------------*/ +void __attribute__((fastcall)) +prot_domains_syscall_dispatcher_impl(interrupt_stack_t *intr_stk, + dom_id_t to_id, + syscalls_entrypoint_t *syscall) +{ + uint32_t syscall_eip; + + if(PROT_DOMAINS_ACTUAL_CNT <= to_id) { + halt(); + } + + /* Get the approved entrypoint for the system call being invoked */ + + if(!((((uintptr_t)syscalls_entrypoints) <= (uintptr_t)syscall) && + (((uintptr_t)syscall) < (uintptr_t)syscalls_entrypoints_end) && + (((((uintptr_t)syscall) - (uintptr_t)syscalls_entrypoints) + % sizeof(syscalls_entrypoint_t)) == 0))) { + /* Assert is not usable when switching protection domains */ + halt(); + } + + if((BIT(to_id) & syscall->doms) == 0) { + halt(); + } + + syscall_eip = syscall->entrypoint; + + prot_domains_set_wp(false); + + syscall_dispatcher_tail(intr_stk, to_id, syscall_eip); +} +/*---------------------------------------------------------------------------*/ +int main(void); +void __attribute__((fastcall)) +prot_domains_launch_kernel_impl(interrupt_stack_t *intr_stk) +{ + inter_dom_call_stk[0] = DOM_ID_app; + + inter_dom_call_stk_ptr = 1; + + syscall_dispatcher_tail(intr_stk, DOM_ID_kern, (uint32_t)main); +} +/*---------------------------------------------------------------------------*/ +void __attribute__((fastcall)) +prot_domains_sysret_dispatcher_impl(interrupt_stack_t *intr_stk) +{ + dom_id_t from_id, to_id; + if(inter_dom_call_stk_ptr <= 1) { + halt(); + } + + from_id = inter_dom_call_stk[inter_dom_call_stk_ptr - 1]; + to_id = inter_dom_call_stk[inter_dom_call_stk_ptr - 2]; + + intr_stk->eip = prot_domains_kern_data[to_id].orig_ret_addr; + + prot_domains_set_wp(false); + + prot_domains_kern_data[from_id].flags &= ~PROT_DOMAINS_FLAG_BUSY; + + inter_dom_call_stk_ptr--; + + dispatcher_tail(from_id, to_id, intr_stk); +} +/*---------------------------------------------------------------------------*/ +/** + * \brief Lookup the current protection domain. + * \return Kernel data structure for the current protection domain. + */ +static volatile dom_kern_data_t * +get_current_domain(void) +{ + dom_id_t id; + id = inter_dom_call_stk[inter_dom_call_stk_ptr - 1]; + return prot_domains_kern_data + id; +} +/*---------------------------------------------------------------------------*/ +/** + * \brief Check whether the protection domain is authorized to perform port + * I/O from the cooperative scheduling context. + * \param dkd Protection domain to check + * \return Result of the check as a Boolean value + */ +static bool +needs_port_io(volatile dom_kern_data_t *dkd) +{ + return (dkd->flags & PROT_DOMAINS_FLAG_PIO) == PROT_DOMAINS_FLAG_PIO; +} +/*---------------------------------------------------------------------------*/ +/* Mark the context parameter as volatile so that writes to it will not get + * optimized out. This parameter is not handled like ordinary function + * parameters. It actually partially includes the contents of the exception + * stack, so updates to those locations can affect the operation of the + * subsequent interrupt return. + */ +static void +gp_fault_handler(volatile struct interrupt_context context) +{ + uint32_t cs_lim; + uint8_t opcode; + + volatile dom_kern_data_t *dkd = get_current_domain(); + if (needs_port_io(dkd)) { + __asm__ __volatile__ ( + "mov %%cs, %0\n\t" + "lsl %0, %0\n\t" + : "=r"(cs_lim)); + + if (cs_lim < context.eip) { + halt(); + } + + /* Load first byte of faulting instruction */ + __asm__ __volatile__ ( + "movb %%cs:%1, %0" + : "=q"(opcode) + : "m"(*(uint8_t *)context.eip)); + + switch (opcode) { + case 0xEC: /* inb */ + context.eax = (context.eax & ~0xFF) | inb((uint16_t)context.edx); + break; + case 0xED: /* inl */ + context.eax = inl((uint16_t)context.edx); + break; + case 0xEE: /* outb */ + outb((uint16_t)context.edx, (uint8_t)context.eax); + break; + case 0xEF: /* outl */ + outl((uint16_t)context.edx, context.eax); + break; + default: + halt(); + } + + /* Skip the faulting port I/O instruction that was emulated. */ + context.eip++; + } else { + halt(); + } +} +/*---------------------------------------------------------------------------*/ +void +syscalls_int_init(void) +{ + tss_init(); + + SET_EXCEPTION_HANDLER(13, 1, gp_fault_handler); + + /* Register system call dispatchers: */ + + idt_set_intr_gate_desc(PROT_DOMAINS_SYSCALL_DISPATCH_INT, + (uint32_t)prot_domains_syscall_dispatcher, + GDT_SEL_CODE_EXC, + PRIV_LVL_USER); + idt_set_intr_gate_desc(PROT_DOMAINS_SYSRET_DISPATCH_INT, + (uint32_t)prot_domains_sysret_dispatcher, + GDT_SEL_CODE_EXC, + PRIV_LVL_USER); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/mm/syscalls-int.h b/cpu/x86/mm/syscalls-int.h new file mode 100644 index 000000000..7ee4bcb36 --- /dev/null +++ b/cpu/x86/mm/syscalls-int.h @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_SYSCALLS_INT_H_ +#define CPU_X86_MM_SYSCALLS_INT_H_ + +/** Software interrupt number for dispatching a system call */ +#define PROT_DOMAINS_SYSCALL_DISPATCH_INT 100 +/** Software interrupt number for returning from a system call */ +#define PROT_DOMAINS_SYSRET_DISPATCH_INT 101 + +#if !__ASSEMBLER__ + +#include + +extern dom_id_t cur_dom; + +#define SYSCALLS_STUB_EPILOGUE(nm) \ + /* Load the system call identifier into EAX, as required by */ \ + /* prot_domains_syscall_dispatcher: */ \ + " mov $" EXP_STRINGIFY(_syscall_ent_##nm) ", %eax\n\t" \ + /* Check whether the server protection domain is already active: */ \ + " cmp %edx, cur_dom\n\t" \ + /* If so, skip the system call dispatcher and directly invoke the */ \ + /* system call body: */ \ + " je _syscall_" #nm "\n\t" \ + " int $" EXP_STRINGIFY(PROT_DOMAINS_SYSCALL_DISPATCH_INT) "\n\t" + +#define SYSCALLS_STUB(nm) \ + SYSCALLS_ALLOC_ENTRYPOINT(nm); \ + asm ( \ + ".text\n\t" \ + ".global " #nm "\n\t" \ + #nm ":\n\t" \ + /* First, load server protection domain ID into EDX, as required by */ \ + /* prot_domains_syscall_dispatcher: */ \ + /* Skip past return address on stack to obtain address of protection */ \ + /* domain ID parameter: */ \ + " mov 4(%esp), %edx\n\t" \ + SYSCALLS_STUB_EPILOGUE(nm)) + +#define SYSCALLS_STUB_SINGLETON(nm, dcd) \ + SYSCALLS_ALLOC_ENTRYPOINT(nm); \ + asm ( \ + ".text\n\t" \ + ".global " #nm "\n\t" \ + #nm ":\n\t" \ + /* First, load server protection domain ID into EDX, as required by */ \ + /* prot_domains_syscall_dispatcher: */ \ + " mov " #dcd ", %edx\n\t" \ + SYSCALLS_STUB_EPILOGUE(nm)) + +void syscalls_int_init(void); + +void prot_domains_sysret_stub(void); + +/* Inter-privilege level interrupt stack with no error code. */ +typedef struct interrupt_stack { + uint32_t eip; + uint32_t cs; + uint32_t eflags; + uint32_t esp; + uint32_t ss; +} interrupt_stack_t; + +#if 0 +/* Declaration only included for documentation purposes: */ +/** + * \brief Switch to a different protection domain. + * \param from_id Origin protection domain. + * \param to_id Destination protection domain. + * \return Segment selector for kernel data access (only used for + * multi-segment implementations). + */ +uint32_t prot_domains_switch(dom_id_t from_id, + dom_id_t to_id, + interrupt_stack_t *intr_stk); +#endif + +#endif + +#endif /* CPU_X86_MM_SYSCALLS_INT_H_ */ diff --git a/cpu/x86/mm/syscalls.h b/cpu/x86/mm/syscalls.h new file mode 100644 index 000000000..83be7a47e --- /dev/null +++ b/cpu/x86/mm/syscalls.h @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_SYSCALLS_H_ +#define CPU_X86_MM_SYSCALLS_H_ + +#include "helpers.h" +#include "prot-domains.h" + +typedef uint32_t dom_id_bitmap_t; + +typedef struct syscalls_entrypoint { + uintptr_t entrypoint; + dom_id_bitmap_t doms; +} syscalls_entrypoint_t; +extern syscalls_entrypoint_t syscalls_entrypoints[]; +extern syscalls_entrypoint_t syscalls_entrypoints_end[]; + +#define SYSCALLS_ACTUAL_CNT (syscalls_entrypoints_end - syscalls_entrypoints) + +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE + +#define SYSCALLS_ALLOC_ENTRYPOINT(nm) \ + syscalls_entrypoint_t __attribute__((section(".syscall_bss"))) \ + _syscall_ent_##nm + +#define SYSCALLS_INIT(nm) \ + _syscall_ent_##nm.entrypoint = (uintptr_t)_syscall_##nm; \ + _syscall_ent_##nm.doms = 0 + +#define SYSCALLS_DEFINE(nm, ...) \ + void _syscall_##nm(__VA_ARGS__); \ + SYSCALLS_STUB(nm); \ + void _syscall_##nm(__VA_ARGS__) + +#define SYSCALLS_DEFINE_SINGLETON(nm, dcd, ...) \ + void _syscall_##nm(__VA_ARGS__); \ + SYSCALLS_STUB_SINGLETON(nm, dcd); \ + void _syscall_##nm(__VA_ARGS__) + +#define SYSCALLS_AUTHZ(nm, drv) _syscall_ent_##nm.doms |= BIT((drv).dom_id) +#define SYSCALLS_DEAUTHZ(nm, drv) _syscall_ent_##nm.doms &= ~BIT((drv).dom_id) + +/** + * Check that any untrusted pointer that could have been influenced by a caller + * (i.e. a stack parameter or global variable) refers to a location at or above + * a certain stack boundary and halt otherwise. This is used to prevent a + * protection domain from calling a different protection domain and passing a + * pointer that references a location in the callee's stack other than its + * parameters. + * + * This also checks that the pointer is either within the stack region or the + * shared data region, which is important for preventing redirection of data + * accesses to MMIO or metadata regions. + * + * The pointer is both validated and copied to a new storage location, which + * must be within the callee's local stack region (excluding the parameter + * region). This is to mitigate scenarios such as two pointers being validated + * and an adversary later inducing a write through one of the pointers to the + * other pointer to corrupt the latter pointer before it is used. + * + * The frame address is adjusted to account for the first word pushed on the + * local frame and the return address, since neither of those should ever be + * referenced by an incoming pointer. In particular, if an incoming pointer + * references the return address, it could potentially redirect execution with + * the privileges of the callee protection domain. + */ +#define PROT_DOMAINS_VALIDATE_PTR(validated, untrusted, sz) \ + validated = untrusted; \ + if((((uintptr_t)(validated)) < \ + ((2 * sizeof(uintptr_t)) + (uintptr_t)__builtin_frame_address(0))) || \ + (((uintptr_t)&_edata_addr) <= (((uintptr_t)(validated)) + (sz)))) { \ + halt(); \ + } + +#else + +#define SYSCALLS_ALLOC_ENTRYPOINT(nm) +#define SYSCALLS_INIT(nm) +#define SYSCALLS_DEFINE(nm, ...) void nm(__VA_ARGS__) +#define SYSCALLS_DEFINE_SINGLETON(nm, dcd, ...) void nm(__VA_ARGS__) +#define SYSCALLS_AUTHZ(nm, drv) +#define SYSCALLS_DEAUTHZ(nm, drv) +#define PROT_DOMAINS_VALIDATE_PTR(validated, untrusted, sz) validated = untrusted + +#endif + +#endif /* CPU_X86_MM_SYSCALLS_H_ */ diff --git a/cpu/x86/mm/tss.c b/cpu/x86/mm/tss.c new file mode 100644 index 000000000..c3628fa8a --- /dev/null +++ b/cpu/x86/mm/tss.c @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "gdt.h" +#include "gdt-layout.h" +#include "prot-domains.h" +#include "segmentation.h" +#include "stacks.h" +#include "tss.h" + +/* System-wide TSS */ +tss_t ATTR_BSS_KERN sys_tss; + +static segment_desc_t ATTR_BSS_GDT sys_tss_desc; + +/*---------------------------------------------------------------------------*/ +/** + * \brief Initialize system-wide TSS. + */ +void +tss_init(void) +{ + sys_tss.iomap_base = sizeof(sys_tss); + sys_tss.esp2 = ((uint32_t)stacks_int) + STACKS_SIZE_INT; + sys_tss.ss2 = GDT_SEL_STK_INT; + sys_tss.esp0 = ((uint32_t)stacks_exc) + STACKS_SIZE_EXC; + sys_tss.ss0 = GDT_SEL_STK_EXC; + + segment_desc_init(&sys_tss_desc, (uint32_t)&sys_tss, sizeof(sys_tss), + SEG_FLAG(DPL, PRIV_LVL_EXC) | + SEG_DESCTYPE_SYS | SEG_TYPE_TSS32_AVAIL); + + __asm__ __volatile__ ( + "ltr %0" + : + : "r" ((uint16_t)GDT_SEL_OF_DESC(&sys_tss_desc, 0))); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/mm/tss.h b/cpu/x86/mm/tss.h new file mode 100644 index 000000000..e8431d388 --- /dev/null +++ b/cpu/x86/mm/tss.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_TSS_H_ +#define CPU_X86_MM_TSS_H_ + +#include + +/** + * Task State Segment. Used by the CPU to manage switching between + * different protection domains (tasks). The current task is referenced + * by the Task Register. When the CPU switches away from a task due to + * a far call, etc., it updates the associated in-memory TSS with the + * current state of the task. It then loads CPU state from the TSS for + * the new task. See Intel Combined Manual, Vol. 3, Chapter 7 for more + * details. + */ +typedef struct tss { + uint32_t prev_tsk; /**< The selector of the task that called this one, if applicable */ + uint32_t esp0; /**< Stack pointer for ring 0 code in this task */ + uint32_t ss0; /**< Stack segment selector for ring 0 code in this task */ + uint32_t esp1; /**< Stack pointer for ring 1 code in this task */ + uint32_t ss1; /**< Stack segment selector for ring 1 code in this task */ + uint32_t esp2; /**< Stack pointer for ring 2 code in this task */ + uint32_t ss2; /**< Stack segment selector for ring 2 code in this task */ + uint32_t cr3; /**< CR3 for this task when paging is enabled */ + uint32_t eip; /**< Stored instruction pointer value */ + uint32_t eflags; /**< Settings for EFLAGS register */ + /** General purpose register values */ + uint32_t eax, ecx, edx, ebx, esp, ebp, esi, edi; + /** Segment register selector values */ + uint32_t es, cs, ss, ds, fs, gs; + /** Selector for Local Descriptor Table */ + uint32_t ldt; + /** Debug-related flag */ + uint16_t t; + /** Offset from base of TSS to base of IO permission bitmap, if one is installed */ + uint16_t iomap_base; +} tss_t; + +void tss_init(void); + +#endif /* CPU_X86_TSS_H_ */ diff --git a/cpu/x86/quarkX1000.ld b/cpu/x86/quarkX1000.ld index a7f2c2555..2f90b7c70 100644 --- a/cpu/x86/quarkX1000.ld +++ b/cpu/x86/quarkX1000.ld @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015, Intel Corporation. All rights reserved. + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -55,6 +55,7 @@ SECTIONS { .text ALIGN (32) : { KEEP(*(.multiboot)) + *(.boot_text) *(.text*) } @@ -75,6 +76,15 @@ SECTIONS { .bss ALIGN (32) : { *(COMMON) + *(.main_stack) *(.bss*) + + *(.gdt_bss_start) + /* + The other GDT-related sections defined in gdt.h are only used when + protection domain support is enabled. Thus, they do not need to be + included here. + */ + _ebss_gdt_addr = .; } } diff --git a/cpu/x86/quarkX1000_dma.ld b/cpu/x86/quarkX1000_dma.ld index 71ebd04b1..fe3b79861 100644 --- a/cpu/x86/quarkX1000_dma.ld +++ b/cpu/x86/quarkX1000_dma.ld @@ -37,8 +37,12 @@ SECTIONS { Using 1K-alignment perturbs the symbols, hindering debugging. Thus, this section is simply padded out to the desired alignment and declared to have a section alignment of only 32 bytes. + + The alignment directives used here suffice even when paging is in use, + because this is the last section and directly follows one (.bss.meta) + that is 4K-aligned. */ - .bss.dma ALIGN (32) (NOLOAD) : + .bss.dma (NOLOAD) : ALIGN (32) { /* The IMR feature operates at 1K granularity. */ . = ALIGN(1K); diff --git a/cpu/x86/quarkX1000_paging.ld b/cpu/x86/quarkX1000_paging.ld new file mode 100644 index 000000000..0352cbf64 --- /dev/null +++ b/cpu/x86/quarkX1000_paging.ld @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +OUTPUT_FORMAT("elf32-i386") + +ENTRY(start) + +SECTIONS { + /* + OS-Dev Wiki says it is common for kernels to start at 1M. Addresses before that + are used by BIOS/EFI, the bootloader and memory-mapped I/O. + + The UEFI GenFw program inserts a 0x220-byte offset between the image base and + the .text section. We add that same offset here to align the symbols in the + UEFI DLL with those in the final UEFI binary to make debugging easier. + */ + . = 1M + 0x220; + + .text.boot : ALIGN (32) + { + *(.multiboot) + *(.boot_text) + + /* + Fill out the section to the next 4K boundary so that the UEFI GenFw + program does not shift the following .text section forward into the + gap and perturb the symbols. This only works if the size of this + section is less than 4K - 0x220 bytes. + */ + . = 4K - 0x220; + } + + /* + It is actually desired that each of the following sections be page- + aligned. However, the UEFI GenFw program ratchets up its alignment + granularity to the maximum granularity discovered in its input file. + Using page-alignment perturbs the symbols, hindering debugging. Thus, + this file simply pads each section out to the desired page alignment and + declares a section alignment granularity of 32 bytes. + */ + + .text : ALIGN (32) + { + *(.text*) + + . = ALIGN(4K); + } + + _stext_addr = ADDR(.text); + _etext_addr = ADDR(.text) + SIZEOF(.text); + + .data.stack : ALIGN (32) + { + /* + Introduce a guard band page before the stacks to facilitate stack + overflow detection. This approach wastes a page of memory for each + guard band, but has the advantage of enabling an identity mapping + for all linear to physical addresses except those in the MMIO + regions. The guard bands are marked not-present in the page tables + to facilitate stack overflow detection. + + This padding must be placed inside of the section, or else it will + get dropped when the UEFI GenFw program generates the UEFI binary. + */ + . += 4K; + + /* + Place the main stack first so that an overflow is detected and does + not overwrite the interrupt or supervisor stacks. Usage of the + interrupt and stack is predictable, since it is only used by short + trampoline code sequences that quickly pivot to the main stack. + */ + *(.main_stack) + *(.int_stack) + *(.exc_stack) + + /* + The combined sizes of the stacks is an even multiple of 4K, so there + is no need to align the location counter here. + */ + + /* + Introduce a guard band page after the stacks to detect stack underflow. + Note that an underflow that only affects the interrupt and supervisor + stacks will not generate a page fault. Detecting such conditions by + placing the interrupt and supervisor stacks on separate pages would + substantially increase memory usage. + */ + . += 4K; + } + + .data : ALIGN (32) + { + /* + The UEFI GenFw program treats all sections that are alloc and read- + only as code sections. By that criteria, .rodata would be a code + section, but making such data executable is undesirable. Thus, this + script lumps in .rodata with other data. It may be desirable in the + future to actually write-protect this data. + */ + *(.rodata*) + *(.data*) + + /* + These could also be treated as read-only data to prevent tampering + from the user privilege level. + */ + _sdata_shared_isr = .; + KEEP(*(.shared_isr_data*)) + _edata_shared_isr = .; + + . = ALIGN(4K); + } + + .bss : ALIGN (32) + { + *(COMMON) + *(.bss*) + + . = ALIGN(4K); + } + + _sdata_addr = ADDR(.data); + _edata_addr = ADDR(.bss) + SIZEOF(.bss); + + .bss.kern (NOLOAD) : ALIGN (32) + { + /* + Page-aligned data is output first. + It is infeasible to apply a page-alignment attribute to them in the + source code, because that increases the alignment of this section to + be page-aligned, which causes problems when generating a UEFI binary + as described above. + */ + *(.page_aligned_kern_bss) + *(.kern_bss) + + syscalls_entrypoints = .; + *(.syscall_bss) + syscalls_entrypoints_end = .; + + . = ALIGN(4K); + } + + _ebss_syscall_addr = ADDR(.bss.kern) + SIZEOF(.bss.kern); + + .bss.kern_priv (NOLOAD) : ALIGN (32) + { + prot_domains_kern_data = .; + /* + The kernel and app protection domain control structures must always + be placed in the first two slots in this order, so that they have + well-known protection domain IDs: + */ + *(.kern_prot_dom_bss) + *(.app_prot_dom_bss) + *(.prot_dom_bss) + prot_domains_kern_data_end = .; + + *(.gdt_bss_start) + *(.gdt_bss_mid) + *(.gdt_bss) + _ebss_gdt_addr = .; + + . = ALIGN(4K); + } + + _sbss_kern_addr = ADDR(.bss.kern); + _ebss_kern_addr = ADDR(.bss.kern_priv) + SIZEOF(.bss.kern_priv); + + .bss.meta (NOLOAD) : ALIGN (32) + { + *(.meta_bss) + + . = ALIGN(4K); + } +} diff --git a/cpu/x86/uefi/bootstrap_uefi.c b/cpu/x86/uefi/bootstrap_uefi.c index f6981eb96..5ef778cca 100644 --- a/cpu/x86/uefi/bootstrap_uefi.c +++ b/cpu/x86/uefi/bootstrap_uefi.c @@ -35,7 +35,12 @@ void start(void); -EFI_STATUS EFIAPI +/* The section attribute below is copied from ATTR_BOOT_CODE in prot-domains.h. + * prot-domains.h includes stdlib.h which defines NULL. The UEFI headers also + * define NULL, which induces a warning when the compiler detects the conflict. + * To avoid that, we avoid including prot-domains.h from this file. + */ +EFI_STATUS EFIAPI __attribute__((section(".boot_text"))) uefi_start(IN EFI_HANDLE ImageHandle, IN EFI_SYSTEM_TABLE *SystemTable) { EFI_MEMORY_DESCRIPTOR mem_map[MAX_MEM_DESC]; diff --git a/examples/galileo/Makefile b/examples/galileo/Makefile index 94a327bfd..bc7b071ff 100644 --- a/examples/galileo/Makefile +++ b/examples/galileo/Makefile @@ -8,6 +8,10 @@ ifeq ($(filter $(EXAMPLE),$(KNOWN_EXAMPLES)),) $(error Unable to proceed) endif +ifeq ($(EXAMPLE),print-imr) + CFLAGS += -DDBG_IMRS +endif + CONTIKI_PROJECT = $(EXAMPLE) all: $(CONTIKI_PROJECT) diff --git a/platform/galileo/README.md b/platform/galileo/README.md index 89e6f5711..e441876db 100644 --- a/platform/galileo/README.md +++ b/platform/galileo/README.md @@ -44,6 +44,9 @@ Standard APIs: * Stdio library (stdout and stderr only). Console output through UART 1 device (connected to Galileo Gen2 FTDI header) +Optional support for protection domains is also implemented and is +described in cpu/x86/mm/README.md. + Building -------- diff --git a/platform/galileo/contiki-main.c b/platform/galileo/contiki-main.c index 42568d90a..7b31a9961 100644 --- a/platform/galileo/contiki-main.c +++ b/platform/galileo/contiki-main.c @@ -33,12 +33,17 @@ #include "contiki.h" #include "contiki-net.h" #include "cpu.h" +#include "eth.h" #include "eth-conf.h" #include "galileo-pinmux.h" #include "gpio.h" +#include "helpers.h" #include "i2c.h" #include "imr-conf.h" #include "interrupt.h" +#include "irq.h" +#include "pci.h" +#include "prot-domains.h" #include "shared-isr.h" #include "uart.h" @@ -49,31 +54,12 @@ PROCINIT( &etimer_process #endif ); -int -main(void) +/*---------------------------------------------------------------------------*/ +void +app_main(void) { - cpu_init(); -#ifdef X86_CONF_RESTRICT_DMA - quarkX1000_imr_conf(); -#endif - /* Initialize UART connected to Galileo Gen2 FTDI header */ - quarkX1000_uart_init(QUARK_X1000_UART_1); - clock_init(); - rtimer_init(); - printf("Starting Contiki\n"); - quarkX1000_i2c_init(); - quarkX1000_i2c_configure(QUARKX1000_I2C_SPEED_STANDARD, - QUARKX1000_I2C_ADDR_MODE_7BIT); - /* use default pinmux configuration */ - if(galileo_pinmux_initialize() < 0) { - fprintf(stderr, "Failed to initialize pinmux\n"); - } - quarkX1000_gpio_init(); - - ENABLE_IRQ(); - process_init(); procinit_init(); ctimer_init(); @@ -81,11 +67,45 @@ main(void) eth_init(); - shared_isr_init(); - while(1) { process_run(); } + halt(); +} +/*---------------------------------------------------------------------------*/ +/* Kernel entrypoint */ +int +main(void) +{ +#ifdef X86_CONF_RESTRICT_DMA + quarkX1000_imr_conf(); +#endif + irq_init(); + /* Initialize UART connected to Galileo Gen2 FTDI header */ + quarkX1000_uart_init(QUARK_X1000_UART_1); + clock_init(); + rtimer_init(); + + pci_root_complex_init(); + quarkX1000_eth_init(); + quarkX1000_i2c_init(); + quarkX1000_i2c_configure(QUARKX1000_I2C_SPEED_STANDARD, + QUARKX1000_I2C_ADDR_MODE_7BIT); + /* use default pinmux configuration */ + if(galileo_pinmux_initialize() < 0) { + fprintf(stderr, "Failed to initialize pinmux\n"); + } + quarkX1000_gpio_init(); + shared_isr_init(); + + /* The ability to remap interrupts is not needed after this point and should + * thus be disabled according to the principle of least privilege. + */ + pci_root_complex_lock(); + + prot_domains_leave_main(); + return 0; } +/*---------------------------------------------------------------------------*/ diff --git a/platform/galileo/net/eth-conf.c b/platform/galileo/net/eth-conf.c index ff3a771bf..061e0aae7 100644 --- a/platform/galileo/net/eth-conf.c +++ b/platform/galileo/net/eth-conf.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015, Intel Corporation. All rights reserved. + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -29,7 +29,6 @@ */ #include "eth-conf.h" -#include "eth.h" #include "net/eth-proc.h" #include "contiki-net.h" #include "net/linkaddr.h" @@ -45,6 +44,7 @@ const linkaddr_t linkaddr_null = { { 0, 0, 0, 0, 0, 0 } }; #define NAMESERVER_IP GATEWAY_IP #endif +/*---------------------------------------------------------------------------*/ void eth_init(void) { @@ -69,7 +69,6 @@ eth_init(void) #endif #endif - quarkX1000_eth_init(); - process_start(ð_process, NULL); } +/*---------------------------------------------------------------------------*/ From 4cdb7ba9b606c4308b9ae5d5206f4c0b0e0ffd82 Mon Sep 17 00:00:00 2001 From: Michael LeMay Date: Fri, 7 Aug 2015 15:43:10 -0700 Subject: [PATCH 3/5] x86: Add TSS-based protection domain support This patch extends the protection domain framework with an additional plugin to use Task-State Segment (TSS) structures to offload much of the work of switching protection domains to the CPU. This can save space compared to paging, since paging requires two 4KiB page tables and one 32-byte page table plus one whole-system TSS and an additional 32-byte data structure for each protection domain, whereas the approach implemented by this patch just requires a 128-byte data structure for each protection domain. Only a small number of protection domains will typically be used, so n * 128 < 8328 + (n * 32). For additional information, please refer to cpu/x86/mm/README.md. GCC 6 is introducing named address spaces for the FS and GS segments [1]. LLVM Clang also provides address spaces for the FS and GS segments [2]. This patch also adds support to the multi-segment X86 memory management subsystem for using these features instead of inline assembly blocks, which enables type checking to detect some address space mismatches. [1] https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html [2] http://llvm.org/releases/3.3/tools/clang/docs/LanguageExtensions.html#target-specific-extensions --- cpu/x86/Makefile.x86_quarkX1000 | 19 + cpu/x86/bootstrap_quarkX1000.S | 12 + cpu/x86/dma.h | 2 +- cpu/x86/drivers/legacy_pc/pci.c | 10 +- cpu/x86/drivers/legacy_pc/pci.h | 12 +- cpu/x86/drivers/legacy_pc/uart-16x50.c | 45 ++- cpu/x86/drivers/legacy_pc/uart-16x50.h | 2 +- cpu/x86/drivers/quarkX1000/eth.c | 116 ++++-- cpu/x86/drivers/quarkX1000/gpio.c | 8 + cpu/x86/drivers/quarkX1000/i2c.c | 8 + cpu/x86/drivers/quarkX1000/uart.c | 8 +- cpu/x86/init/common/cpu.c | 10 +- cpu/x86/init/common/gdt.c | 26 +- cpu/x86/init/common/gdt.h | 24 +- cpu/x86/init/common/idt.c | 50 ++- cpu/x86/init/common/idt.h | 2 +- cpu/x86/mm/README.md | 363 ++++++++++++++++-- cpu/x86/mm/gdt-layout.h | 28 ++ cpu/x86/mm/ldt-layout.h | 59 +++ cpu/x86/mm/multi-segment.c | 239 ++++++++++++ cpu/x86/mm/multi-segment.h | 195 ++++++++++ cpu/x86/mm/prot-domains.c | 6 +- cpu/x86/mm/prot-domains.h | 113 +++++- cpu/x86/mm/segmentation.h | 19 +- cpu/x86/mm/stacks.h | 11 + cpu/x86/mm/syscalls.h | 47 ++- cpu/x86/mm/tss-prot-domains-asm.S | 88 +++++ cpu/x86/mm/tss-prot-domains.c | 161 ++++++++ cpu/x86/mm/tss-prot-domains.h | 130 +++++++ cpu/x86/quarkX1000.ld | 2 + cpu/x86/quarkX1000_dma.ld | 28 +- cpu/x86/quarkX1000_multi_seg.ld | 190 +++++++++ cpu/x86/quarkX1000_paging.ld | 4 +- platform/galileo/Makefile.customrules-galileo | 12 +- 34 files changed, 1883 insertions(+), 166 deletions(-) create mode 100644 cpu/x86/mm/ldt-layout.h create mode 100644 cpu/x86/mm/multi-segment.c create mode 100644 cpu/x86/mm/multi-segment.h create mode 100644 cpu/x86/mm/tss-prot-domains-asm.S create mode 100644 cpu/x86/mm/tss-prot-domains.c create mode 100644 cpu/x86/mm/tss-prot-domains.h create mode 100644 cpu/x86/quarkX1000_multi_seg.ld diff --git a/cpu/x86/Makefile.x86_quarkX1000 b/cpu/x86/Makefile.x86_quarkX1000 index 4a7668cc6..13a9c686f 100644 --- a/cpu/x86/Makefile.x86_quarkX1000 +++ b/cpu/x86/Makefile.x86_quarkX1000 @@ -20,6 +20,11 @@ CFLAGS += -DX86_CONF_USE_INVLPG endif # This matches the definition of X86_CONF_PROT_DOMAINS__PAGING in prot-domains.h: CFLAGS += -DX86_CONF_PROT_DOMAINS=1 +else ifeq ($(X86_CONF_PROT_DOMAINS),tss) +# This matches the definition of X86_CONF_PROT_DOMAINS__TSS in prot-domains.h: +CFLAGS += -DX86_CONF_PROT_DOMAINS=2 +X86_CONF_MULTI_SEG = 1 +CONTIKI_SOURCEFILES += tss-prot-domains-asm.S else $(error Unrecognized setting for X86_CONF_PROT_DOMAINS: \ $(X86_CONF_PROT_DOMAINS). See cpu/x86/mm/README.md for \ @@ -30,6 +35,20 @@ ifeq ($(X86_CONF_SYSCALLS_INT),1) CONTIKI_SOURCEFILES += syscalls-int-asm.S tss.c endif +ifeq ($(X86_CONF_MULTI_SEG),1) +LINKERSCRIPT_SFX = _multi_seg +CONTIKI_SOURCEFILES += multi-segment.c +# Due to the way the multi-segment implementation of protection domains define +# tightly-bounded stack segments, the base pointer register cannot be used as +# a general-purpose register in all circumstances. The stack segment is used +# by default for a data access that uses the base pointer as the base register +# to compute the address. If the data referenced by the base pointer is not +# on the stack, then the access will fail. Thus, it is necessary to disable +# the omit-frame-pointer optimization. See mm/README.md for more details of +# how multi-segment protection domains are implemented. +CFLAGS += -fno-omit-frame-pointer +endif + endif CFLAGS += -m32 -march=i586 -mtune=i586 diff --git a/cpu/x86/bootstrap_quarkX1000.S b/cpu/x86/bootstrap_quarkX1000.S index 4211e51a3..622c9dab8 100644 --- a/cpu/x86/bootstrap_quarkX1000.S +++ b/cpu/x86/bootstrap_quarkX1000.S @@ -45,5 +45,17 @@ .global start start: cli +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS + /* TSS-based protection domains use a multi-segment model that defines + * tight bounds around stacks. That means that the bottom of the stack + * has an offset of 0, which is the address of the stacks_main symbol. + * The following code computes the physical load address of the top of + * the stack, which is what should be initially used as the stack + * pointer while the flat memory model is in use. + */ + lea _sdata_addr, %eax + lea (stacks_main + STACKS_SIZE_MAIN)(%eax), %esp +#else mov $(stacks_main + STACKS_SIZE_MAIN), %esp +#endif call cpu_boot_stage0 diff --git a/cpu/x86/dma.h b/cpu/x86/dma.h index b0122fcdb..7a8d991b1 100644 --- a/cpu/x86/dma.h +++ b/cpu/x86/dma.h @@ -43,6 +43,6 @@ #endif #endif -extern int _sbss_dma_addr, _ebss_dma_addr; +extern int _ebss_pre_dma_addr, _sbss_dma_addr, _ebss_dma_addr; #endif /* CPU_X86_DMA_H_ */ diff --git a/cpu/x86/drivers/legacy_pc/pci.c b/cpu/x86/drivers/legacy_pc/pci.c index e94c9ecbe..4584d454c 100644 --- a/cpu/x86/drivers/legacy_pc/pci.c +++ b/cpu/x86/drivers/legacy_pc/pci.c @@ -138,7 +138,9 @@ SYSCALLS_DEFINE_SINGLETON(pci_irq_agent_set_pirq, offset = 0x3146; } - value = *(uint16_t*)(rcba_addr + offset); + prot_domains_enable_mmio(); + + MMIO_READW(value, *(uint16_t ATTR_MMIO_ADDR_SPACE *)(rcba_addr + offset)); /* clear interrupt pin route and set corresponding pirq. */ switch(pin) { @@ -159,7 +161,9 @@ SYSCALLS_DEFINE_SINGLETON(pci_irq_agent_set_pirq, value |= (pirq << 12); } - *(uint16_t*)(rcba_addr + offset) = value; + MMIO_WRITEW(*(uint16_t ATTR_MMIO_ADDR_SPACE *)(rcba_addr + offset), value); + + prot_domains_disable_mmio(); } /*---------------------------------------------------------------------------*/ /** @@ -231,7 +235,7 @@ pci_pirq_set_irq(PIRQ pirq, uint8_t irq, uint8_t route_to_legacy) * \param meta_sz Size of optional driver-defined metadata. */ void -pci_init(pci_driver_t *c_this, +pci_init(pci_driver_t ATTR_KERN_ADDR_SPACE *c_this, pci_config_addr_t pci_addr, size_t mmio_sz, uintptr_t meta, diff --git a/cpu/x86/drivers/legacy_pc/pci.h b/cpu/x86/drivers/legacy_pc/pci.h index fff53a048..666b3c29e 100644 --- a/cpu/x86/drivers/legacy_pc/pci.h +++ b/cpu/x86/drivers/legacy_pc/pci.h @@ -102,7 +102,7 @@ void pci_command_enable(pci_config_addr_t addr, uint32_t flags); typedef dom_client_data_t pci_driver_t; -void pci_init(pci_driver_t *c_this, +void pci_init(pci_driver_t ATTR_KERN_ADDR_SPACE *c_this, pci_config_addr_t pci_addr, size_t mmio_sz, uintptr_t meta, @@ -113,10 +113,12 @@ void pci_root_complex_init(void); void pci_root_complex_lock(void); #define PCI_MMIO_READL(c_this, dest, reg_addr) \ - dest = *((volatile uint32_t *) \ - (((uintptr_t)PROT_DOMAINS_MMIO(c_this)) + (reg_addr))) + MMIO_READL(dest, \ + *((volatile uint32_t ATTR_MMIO_ADDR_SPACE *) \ + (((uintptr_t)PROT_DOMAINS_MMIO(c_this)) + (reg_addr)))) #define PCI_MMIO_WRITEL(c_this, reg_addr, src) \ - *((volatile uint32_t *) \ - (((uintptr_t)PROT_DOMAINS_MMIO(c_this)) + (reg_addr))) = (src) + MMIO_WRITEL(*((volatile uint32_t ATTR_MMIO_ADDR_SPACE *) \ + (((uintptr_t)PROT_DOMAINS_MMIO(c_this)) + (reg_addr))), \ + src) #endif /* CPU_X86_DRIVERS_LEGACY_PC_PCI_H_ */ diff --git a/cpu/x86/drivers/legacy_pc/uart-16x50.c b/cpu/x86/drivers/legacy_pc/uart-16x50.c index d1f2c498d..d17e61498 100644 --- a/cpu/x86/drivers/legacy_pc/uart-16x50.c +++ b/cpu/x86/drivers/legacy_pc/uart-16x50.c @@ -74,6 +74,11 @@ typedef struct uart_16x50_regs { */ #define UART_MMIO_SZ MIN_PAGE_SIZE #else +/* Multi-segment protection domain implementations can control memory with + * byte granularity. Thus, only the registers defined in the uart_16x50_regs + * structure are included in the MMIO region allocated for this protection + * domain: + */ #define UART_MMIO_SZ sizeof(uart_16x50_regs_t) #endif @@ -82,24 +87,30 @@ void uart_16x50_setup(uart_16x50_driver_t c_this, uint16_t dl); /*---------------------------------------------------------------------------*/ SYSCALLS_DEFINE(uart_16x50_setup, uart_16x50_driver_t c_this, uint16_t dl) { - uart_16x50_regs_t *regs = (uart_16x50_regs_t *)PROT_DOMAINS_MMIO(c_this); + uart_16x50_regs_t ATTR_MMIO_ADDR_SPACE *regs = + (uart_16x50_regs_t ATTR_MMIO_ADDR_SPACE *)PROT_DOMAINS_MMIO(c_this); + + prot_domains_enable_mmio(); /* Set the DLAB bit to enable access to divisor settings. */ - regs->lcr = UART_LCR_7_DLAB; + MMIO_WRITEL(regs->lcr, UART_LCR_7_DLAB); /* The divisor settings configure the baud rate, and may need to be defined * on a per-device basis. */ - regs->rbr_thr_dll = dl & UINT8_MAX; - regs->ier_dlh = dl >> 8; + MMIO_WRITEL(regs->rbr_thr_dll, dl & UINT8_MAX); + MMIO_WRITEL(regs->ier_dlh, dl >> 8); /* Clear the DLAB bit to enable access to other settings and configure other * UART parameters. */ - regs->lcr = UART_LCR_8BITS; + MMIO_WRITEL(regs->lcr, UART_LCR_8BITS); /* Enable the FIFOs. */ - regs->iir_fcr = UART_FCR_0_FIFOE | UART_FCR_1_RFIFOR | UART_FCR_2_XFIFOR; + MMIO_WRITEL(regs->iir_fcr, + UART_FCR_0_FIFOE | UART_FCR_1_RFIFOR | UART_FCR_2_XFIFOR); + + prot_domains_disable_mmio(); } /*---------------------------------------------------------------------------*/ /** @@ -112,13 +123,21 @@ SYSCALLS_DEFINE(uart_16x50_setup, uart_16x50_driver_t c_this, uint16_t dl) */ SYSCALLS_DEFINE(uart_16x50_tx, uart_16x50_driver_t c_this, uint8_t c) { - uart_16x50_regs_t *regs = (uart_16x50_regs_t *)PROT_DOMAINS_MMIO(c_this); + uint32_t ready; + uart_16x50_regs_t ATTR_MMIO_ADDR_SPACE *regs = + (uart_16x50_regs_t ATTR_MMIO_ADDR_SPACE *)PROT_DOMAINS_MMIO(c_this); + + prot_domains_enable_mmio(); /* Wait for space in TX FIFO. */ - while((regs->lsr & UART_LSR_5_THRE) == 0); + do { + MMIO_READL(ready, regs->lsr); + } while((ready & UART_LSR_5_THRE) == 0); /* Add character to TX FIFO. */ - regs->rbr_thr_dll = c; + MMIO_WRITEL(regs->rbr_thr_dll, c); + + prot_domains_disable_mmio(); } /*---------------------------------------------------------------------------*/ /** @@ -128,10 +147,12 @@ SYSCALLS_DEFINE(uart_16x50_tx, uart_16x50_driver_t c_this, uint8_t c) * \param dl Divisor setting to configure the baud rate. */ void -uart_16x50_init(uart_16x50_driver_t *c_this, +uart_16x50_init(uart_16x50_driver_t ATTR_KERN_ADDR_SPACE *c_this, pci_config_addr_t pci_addr, uint16_t dl) { + uart_16x50_driver_t loc_c_this; + /* This assumes that the UART had an MMIO range assigned to it by the * firmware during boot. */ @@ -141,6 +162,8 @@ uart_16x50_init(uart_16x50_driver_t *c_this, SYSCALLS_INIT(uart_16x50_tx); SYSCALLS_AUTHZ(uart_16x50_tx, *c_this); - uart_16x50_setup(*c_this, dl); + prot_domains_copy_dcd(&loc_c_this, c_this); + + uart_16x50_setup(loc_c_this, dl); } /*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/drivers/legacy_pc/uart-16x50.h b/cpu/x86/drivers/legacy_pc/uart-16x50.h index 615806518..4a038b948 100644 --- a/cpu/x86/drivers/legacy_pc/uart-16x50.h +++ b/cpu/x86/drivers/legacy_pc/uart-16x50.h @@ -35,7 +35,7 @@ typedef pci_driver_t uart_16x50_driver_t; -void uart_16x50_init(uart_16x50_driver_t *c_this, +void uart_16x50_init(uart_16x50_driver_t ATTR_KERN_ADDR_SPACE *c_this, pci_config_addr_t pci_addr, uint16_t dl); diff --git a/cpu/x86/drivers/quarkX1000/eth.c b/cpu/x86/drivers/quarkX1000/eth.c index 5c16b10a5..88782ebc2 100644 --- a/cpu/x86/drivers/quarkX1000/eth.c +++ b/cpu/x86/drivers/quarkX1000/eth.c @@ -216,13 +216,19 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_setup, drv, uintptr_t meta_phys_base) { uip_eth_addr mac_addr; uint32_t mac_tmp1, mac_tmp2; - quarkX1000_eth_meta_t *loc_meta = - (quarkX1000_eth_meta_t *)PROT_DOMAINS_META(drv); + quarkX1000_eth_rx_desc_t rx_desc; + quarkX1000_eth_tx_desc_t tx_desc; + quarkX1000_eth_meta_t ATTR_META_ADDR_SPACE *loc_meta = + (quarkX1000_eth_meta_t ATTR_META_ADDR_SPACE *)PROT_DOMAINS_META(drv); + + prot_domains_enable_mmio(); /* Read the MAC address from the device. */ PCI_MMIO_READL(drv, mac_tmp1, REG_ADDR_MACADDR_HI); PCI_MMIO_READL(drv, mac_tmp2, REG_ADDR_MACADDR_LO); + prot_domains_disable_mmio(); + /* Convert the data read from the device into the format expected by * Contiki. */ @@ -245,29 +251,39 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_setup, drv, uintptr_t meta_phys_base) uip_setethaddr(mac_addr); /* Initialize transmit descriptor. */ - loc_meta->tx_desc.tdes0 = 0; - loc_meta->tx_desc.tdes1 = 0; + tx_desc.tdes0 = 0; + tx_desc.tdes1 = 0; - loc_meta->tx_desc.buf1_ptr = - (uint8_t *)PROT_DOMAINS_META_OFF_TO_PHYS( - (uintptr_t)&loc_meta->tx_buf, meta_phys_base); - loc_meta->tx_desc.tx_end_of_ring = 1; - loc_meta->tx_desc.first_seg_in_frm = 1; - loc_meta->tx_desc.last_seg_in_frm = 1; - loc_meta->tx_desc.tx_end_of_ring = 1; + tx_desc.tx_end_of_ring = 1; + tx_desc.first_seg_in_frm = 1; + tx_desc.last_seg_in_frm = 1; + tx_desc.tx_end_of_ring = 1; + + META_WRITEL(loc_meta->tx_desc.tdes0, tx_desc.tdes0); + META_WRITEL(loc_meta->tx_desc.tdes1, tx_desc.tdes1); + META_WRITEL(loc_meta->tx_desc.buf1_ptr, + (uint8_t *)PROT_DOMAINS_META_OFF_TO_PHYS( + (uintptr_t)&loc_meta->tx_buf, meta_phys_base)); + META_WRITEL(loc_meta->tx_desc.buf2_ptr, 0); /* Initialize receive descriptor. */ - loc_meta->rx_desc.rdes0 = 0; - loc_meta->rx_desc.rdes1 = 0; + rx_desc.rdes0 = 0; + rx_desc.rdes1 = 0; - loc_meta->rx_desc.buf1_ptr = - (uint8_t *)PROT_DOMAINS_META_OFF_TO_PHYS( - (uintptr_t)&loc_meta->rx_buf, meta_phys_base); - loc_meta->rx_desc.own = 1; - loc_meta->rx_desc.first_desc = 1; - loc_meta->rx_desc.last_desc = 1; - loc_meta->rx_desc.rx_buf1_sz = UIP_BUFSIZE; - loc_meta->rx_desc.rx_end_of_ring = 1; + rx_desc.own = 1; + rx_desc.first_desc = 1; + rx_desc.last_desc = 1; + rx_desc.rx_buf1_sz = UIP_BUFSIZE; + rx_desc.rx_end_of_ring = 1; + + META_WRITEL(loc_meta->rx_desc.rdes0, rx_desc.rdes0); + META_WRITEL(loc_meta->rx_desc.rdes1, rx_desc.rdes1); + META_WRITEL(loc_meta->rx_desc.buf1_ptr, + (uint8_t *)PROT_DOMAINS_META_OFF_TO_PHYS( + (uintptr_t)&loc_meta->rx_buf, meta_phys_base)); + META_WRITEL(loc_meta->rx_desc.buf2_ptr, 0); + + prot_domains_enable_mmio(); /* Install transmit and receive descriptors. */ PCI_MMIO_WRITEL(drv, REG_ADDR_RX_DESC_LIST, @@ -298,8 +314,11 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_setup, drv, uintptr_t meta_phys_base) /* Place the receiver state machine in the Running state. */ OP_MODE_1_START_RX); + prot_domains_disable_mmio(); + printf(LOG_PFX "Enabled 100M full-duplex mode.\n"); } + /*---------------------------------------------------------------------------*/ /** * \brief Poll for a received Ethernet frame. @@ -313,33 +332,43 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_poll, drv, uint16_t * frame_len) { uint16_t *loc_frame_len; uint16_t frm_len = 0; - quarkX1000_eth_meta_t *loc_meta = - (quarkX1000_eth_meta_t *)PROT_DOMAINS_META(drv); + quarkX1000_eth_rx_desc_t tmp_desc; + quarkX1000_eth_meta_t ATTR_META_ADDR_SPACE *loc_meta = + (quarkX1000_eth_meta_t ATTR_META_ADDR_SPACE *)PROT_DOMAINS_META(drv); PROT_DOMAINS_VALIDATE_PTR(loc_frame_len, frame_len, sizeof(*frame_len)); + META_READL(tmp_desc.rdes0, loc_meta->rx_desc.rdes0); + /* Check whether the RX descriptor is still owned by the device. If not, * process the received frame or an error that may have occurred. */ - if(loc_meta->rx_desc.own == 0) { - if(loc_meta->rx_desc.err_summary) { + if(tmp_desc.own == 0) { + META_READL(tmp_desc.rdes1, loc_meta->rx_desc.rdes1); + if(tmp_desc.err_summary) { fprintf(stderr, LOG_PFX "Error receiving frame: RDES0 = %08x, RDES1 = %08x.\n", - loc_meta->rx_desc.rdes0, loc_meta->rx_desc.rdes1); + tmp_desc.rdes0, tmp_desc.rdes1); assert(0); } - frm_len = loc_meta->rx_desc.frm_len; + frm_len = tmp_desc.frm_len; assert(frm_len <= UIP_BUFSIZE); - memcpy(uip_buf, (void *)loc_meta->rx_buf, frm_len); + MEMCPY_FROM_META(uip_buf, loc_meta->rx_buf, frm_len); /* Return ownership of the RX descriptor to the device. */ - loc_meta->rx_desc.own = 1; + tmp_desc.own = 1; + + META_WRITEL(loc_meta->rx_desc.rdes0, tmp_desc.rdes0); + + prot_domains_enable_mmio(); /* Request that the device check for an available RX descriptor, since * ownership of the descriptor was just transferred to the device. */ PCI_MMIO_WRITEL(drv, REG_ADDR_RX_POLL_DEMAND, 1); + + prot_domains_disable_mmio(); } *loc_frame_len = frm_len; @@ -356,32 +385,45 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_poll, drv, uint16_t * frame_len) */ SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_send, drv) { - quarkX1000_eth_meta_t *loc_meta = - (quarkX1000_eth_meta_t *)PROT_DOMAINS_META(drv); + quarkX1000_eth_tx_desc_t tmp_desc; + quarkX1000_eth_meta_t ATTR_META_ADDR_SPACE *loc_meta = + (quarkX1000_eth_meta_t ATTR_META_ADDR_SPACE *)PROT_DOMAINS_META(drv); /* Wait until the TX descriptor is no longer owned by the device. */ - while(loc_meta->tx_desc.own == 1); + do { + META_READL(tmp_desc.tdes0, loc_meta->tx_desc.tdes0); + } while(tmp_desc.own == 1); + + META_READL(tmp_desc.tdes1, loc_meta->tx_desc.tdes1); /* Check whether an error occurred transmitting the previous frame. */ - if(loc_meta->tx_desc.err_summary) { + if(tmp_desc.err_summary) { fprintf(stderr, LOG_PFX "Error transmitting frame: TDES0 = %08x, TDES1 = %08x.\n", - loc_meta->tx_desc.tdes0, loc_meta->tx_desc.tdes1); + tmp_desc.tdes0, tmp_desc.tdes1); assert(0); } /* Transmit the next frame. */ assert(uip_len <= UIP_BUFSIZE); - memcpy((void *)loc_meta->tx_buf, uip_buf, uip_len); + MEMCPY_TO_META(loc_meta->tx_buf, uip_buf, uip_len); - loc_meta->tx_desc.tx_buf1_sz = uip_len; + tmp_desc.tx_buf1_sz = uip_len; - loc_meta->tx_desc.own = 1; + META_WRITEL(loc_meta->tx_desc.tdes1, tmp_desc.tdes1); + + tmp_desc.own = 1; + + META_WRITEL(loc_meta->tx_desc.tdes0, tmp_desc.tdes0); + + prot_domains_enable_mmio(); /* Request that the device check for an available TX descriptor, since * ownership of the descriptor was just transferred to the device. */ PCI_MMIO_WRITEL(drv, REG_ADDR_TX_POLL_DEMAND, 1); + + prot_domains_disable_mmio(); } /*---------------------------------------------------------------------------*/ /** diff --git a/cpu/x86/drivers/quarkX1000/gpio.c b/cpu/x86/drivers/quarkX1000/gpio.c index 642cad310..ba825c090 100644 --- a/cpu/x86/drivers/quarkX1000/gpio.c +++ b/cpu/x86/drivers/quarkX1000/gpio.c @@ -56,7 +56,11 @@ #define HIGHEST_REG LS_SYNC +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING #define MMIO_SZ MIN_PAGE_SIZE +#else +#define MMIO_SZ (HIGHEST_REG + 4) +#endif PROT_DOMAINS_ALLOC(pci_driver_t, drv); @@ -77,7 +81,9 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_gpio_mmin, drv, halt(); } + prot_domains_enable_mmio(); PCI_MMIO_READL(drv, *loc_res, offset); + prot_domains_disable_mmio(); } static inline uint32_t @@ -96,7 +102,9 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_gpio_mmout, drv, halt(); } + prot_domains_enable_mmio(); PCI_MMIO_WRITEL(drv, offset, val); + prot_domains_disable_mmio(); } static inline void diff --git a/cpu/x86/drivers/quarkX1000/i2c.c b/cpu/x86/drivers/quarkX1000/i2c.c index 746e52b96..9e233e89c 100644 --- a/cpu/x86/drivers/quarkX1000/i2c.c +++ b/cpu/x86/drivers/quarkX1000/i2c.c @@ -51,7 +51,11 @@ #define I2C_IRQ 9 +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING #define MMIO_SZ MIN_PAGE_SIZE +#else +#define MMIO_SZ (QUARKX1000_IC_HIGHEST + 4) +#endif typedef enum { I2C_DIRECTION_READ, @@ -99,7 +103,9 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_i2c_mmin, drv, halt(); } + prot_domains_enable_mmio(); PCI_MMIO_READL(drv, *loc_res, offset); + prot_domains_disable_mmio(); } static inline uint32_t @@ -119,7 +125,9 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_i2c_mmout, drv, halt(); } + prot_domains_enable_mmio(); PCI_MMIO_WRITEL(drv, offset, val); + prot_domains_disable_mmio(); } static inline void diff --git a/cpu/x86/drivers/quarkX1000/uart.c b/cpu/x86/drivers/quarkX1000/uart.c index dcd0af8f2..341e31cf7 100644 --- a/cpu/x86/drivers/quarkX1000/uart.c +++ b/cpu/x86/drivers/quarkX1000/uart.c @@ -49,7 +49,7 @@ void quarkX1000_uart_init(quarkX1000_uart_dev_t dev) { pci_config_addr_t pci_addr; - uart_16x50_driver_t *drv; + uart_16x50_driver_t ATTR_KERN_ADDR_SPACE *drv; assert((dev == QUARK_X1000_UART_0) || (dev == QUARK_X1000_UART_1)); @@ -78,7 +78,11 @@ quarkX1000_uart_init(quarkX1000_uart_dev_t dev) void quarkX1000_uart_tx(quarkX1000_uart_dev_t dev, uint8_t c) { + uart_16x50_driver_t drv; assert((dev == QUARK_X1000_UART_0) || (dev == QUARK_X1000_UART_1)); - uart_16x50_tx((dev == QUARK_X1000_UART_0) ? quarkX1000_uart0 : quarkX1000_uart1, c); + prot_domains_copy_dcd(&drv, + (dev == QUARK_X1000_UART_0) ? + &quarkX1000_uart0 : &quarkX1000_uart1); + uart_16x50_tx(drv, c); } /*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/init/common/cpu.c b/cpu/x86/init/common/cpu.c index 94ec2ddab..dd58b96d5 100644 --- a/cpu/x86/init/common/cpu.c +++ b/cpu/x86/init/common/cpu.c @@ -42,8 +42,11 @@ double_fault_handler(struct interrupt_context context) halt(); } /*---------------------------------------------------------------------------*/ -/* The OS has switched to its own segment descriptors. However, the protection - * domain support, if enabled, has not yet been fully activated. +/* The OS has switched to its own segment descriptors. When multi-segment + * protection domain support is enabled, this routine runs with the + * necessary address translations configured to invoke other routines that + * require those translations to be in place. However, the protection domain + * support, if enabled, has not yet been fully activated. */ static void boot_stage1(void) @@ -75,7 +78,8 @@ cpu_boot_stage0(void) uintptr_t top_of_stack = STACKS_INIT_TOP; #if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE - uintptr_t *top_of_stack_ptr = (uintptr_t *)top_of_stack; + uintptr_t *top_of_stack_ptr = + (uintptr_t *)DATA_OFF_TO_PHYS_ADDR(top_of_stack); top_of_stack_ptr[0] = (uintptr_t)prot_domains_launch_kernel; top_of_stack_ptr[1] = (uintptr_t)prot_domains_launch_app; diff --git a/cpu/x86/init/common/gdt.c b/cpu/x86/init/common/gdt.c index f7fa10342..f63767850 100644 --- a/cpu/x86/init/common/gdt.c +++ b/cpu/x86/init/common/gdt.c @@ -72,7 +72,7 @@ set_descriptor(unsigned int index, segment_desc_init(&descriptor, base, len, flag); /* Save descriptor into gdt */ - gdt[index] = descriptor; + gdt_insert_boot(index, descriptor); } /*---------------------------------------------------------------------------*/ void @@ -86,15 +86,17 @@ gdt_copy_desc_change_dpl(unsigned int dest_idx, halt(); } - desc = gdt[src_idx]; + gdt_lookup(src_idx, &desc); SEG_SET_FLAG(desc, DPL, dpl); - gdt[dest_idx] = desc; + gdt_insert(dest_idx, desc); } /*---------------------------------------------------------------------------*/ /* This function initializes the Global Descriptor Table. For simplicity, the - * memory is organized following the flat model. Thus, memory appears to - * Contiki as a single continuous address space. Code, data, and stack + * memory is initially organized following the flat model. Thus, memory appears + * to Contiki as a single continuous address space. Code, data, and stack * are all contained in this address space (so called linear address space). + * Certain protection domain implementations switch to a multi-segment memory + * model later during boot. */ void gdt_init(void) @@ -103,7 +105,7 @@ gdt_init(void) /* Initialize gdtr structure */ gdtr.limit = sizeof(segment_desc_t) * GDT_LEN - 1; - gdtr.base = (uint32_t) &gdt; + gdtr.base = KERN_DATA_OFF_TO_PHYS_ADDR(gdt); /* Initialize descriptors */ set_descriptor(GDT_IDX_NULL, 0, 0, 0); @@ -115,13 +117,20 @@ gdt_init(void) } /*---------------------------------------------------------------------------*/ void +gdt_insert_boot(unsigned int idx, segment_desc_t desc) +{ + ((segment_desc_t *)KERN_DATA_OFF_TO_PHYS_ADDR(gdt))[idx] = desc; +} +/*---------------------------------------------------------------------------*/ +void gdt_insert(unsigned int idx, segment_desc_t desc) { if(GDT_LEN <= idx) { halt(); } - gdt[idx] = desc; + KERN_WRITEL(gdt[idx].raw_lo, desc.raw_lo); + KERN_WRITEL(gdt[idx].raw_hi, desc.raw_hi); } /*---------------------------------------------------------------------------*/ void @@ -131,6 +140,7 @@ gdt_lookup(unsigned int idx, segment_desc_t *desc) halt(); } - *desc = gdt[idx]; + KERN_READL(desc->raw_lo, gdt[idx].raw_lo); + KERN_READL(desc->raw_hi, gdt[idx].raw_hi); } /*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/init/common/gdt.h b/cpu/x86/init/common/gdt.h index 37f1f4dbe..305e32716 100644 --- a/cpu/x86/init/common/gdt.h +++ b/cpu/x86/init/common/gdt.h @@ -35,13 +35,21 @@ #include "prot-domains.h" #include "segmentation.h" -extern segment_desc_t gdt[]; -extern int _ebss_gdt_addr; +extern segment_desc_t ATTR_KERN_ADDR_SPACE gdt[]; +extern int ATTR_KERN_ADDR_SPACE _ebss_gdt_addr; #define GDT_IDX_OF_DESC(ptr) \ ((((uintptr_t)(ptr)) - ((uintptr_t)&gdt))/ \ sizeof(segment_desc_t)) +typedef struct far_pointer { + /** Far pointer offset. */ + uint32_t offset; + /** Far pointer segment/gate selector. */ + uint16_t sel; + uint16_t pad; +} __attribute__((packed)) far_pointer_t; + /** * \brief Compute the selector for a GDT entry allocated somewhere besides gdt.c. * \param ptr Pointer to GDT descriptor. @@ -49,14 +57,22 @@ extern int _ebss_gdt_addr; */ #define GDT_SEL_OF_DESC(ptr, rpl) GDT_SEL(GDT_IDX_OF_DESC(ptr), rpl) -#define ATTR_BSS_GDT __attribute__((section(".gdt_bss"))) -#define ATTR_BSS_GDT_START __attribute__((section(".gdt_bss_start"))) +/* Section for fixed GDT entries */ +#define ATTR_BSS_GDT \ + __attribute__((section(".gdt_bss"))) ATTR_KERN_ADDR_SPACE +/* Section for TSS and LDT descriptors for protection domains */ +#define ATTR_BSS_GDT_MID \ + __attribute__((used, section(".gdt_bss_mid"))) ATTR_KERN_ADDR_SPACE +/* Section for other GDT entries */ +#define ATTR_BSS_GDT_START \ + __attribute__((section(".gdt_bss_start"))) ATTR_KERN_ADDR_SPACE void gdt_copy_desc_change_dpl(unsigned int dest_idx, unsigned int src_idx, unsigned dpl); void gdt_init(void) ATTR_CODE_BOOT; void gdt_insert(unsigned int idx, segment_desc_t desc); +void gdt_insert_boot(unsigned int idx, segment_desc_t desc) ATTR_CODE_BOOT; void gdt_lookup(unsigned int idx, segment_desc_t *desc); #endif /* GDT_H */ diff --git a/cpu/x86/init/common/idt.c b/cpu/x86/init/common/idt.c index 441668a75..c5de5ed25 100644 --- a/cpu/x86/init/common/idt.c +++ b/cpu/x86/init/common/idt.c @@ -43,17 +43,23 @@ typedef struct idtr { uint32_t base; } __attribute__((packed)) idtr_t; -typedef struct intr_gate_desc { - uint16_t offset_low; - uint16_t selector; /* Segment Selector for destination code segment */ - uint16_t fixed:11; - uint16_t d:1; /* Size of gate: 1 = 32 bits; 0 = 16 bits */ - uint16_t pad:1; - uint16_t dpl:2; /* Descriptor Privilege Level */ - uint16_t p:1; /* Segment Present flag */ - uint16_t offset_high; - -} __attribute__((packed)) intr_gate_desc_t; +typedef union intr_gate_desc { + struct __attribute__((packed)) { + uint16_t offset_low; + uint16_t selector; /* Segment Selector for destination code segment */ + uint16_t fixed:11; + uint16_t d:1; /* Size of gate: 1 = 32 bits; 0 = 16 bits */ + uint16_t pad:1; + uint16_t dpl:2; /* Descriptor Privilege Level */ + uint16_t p:1; /* Segment Present flag */ + uint16_t offset_high; + }; + uint64_t raw; + struct { + uint32_t raw_lo; + uint32_t raw_hi; + }; +} intr_gate_desc_t; /* According to Intel Combined Manual, Vol. 3, Section 6.10, the base addresses * of the IDT should be aligned on an 8-byte boundary to maximize performance @@ -73,15 +79,19 @@ idt_set_intr_gate_desc(int intr_num, uint16_t cs, uint16_t dpl) { - intr_gate_desc_t *desc = &idt[intr_num]; + intr_gate_desc_t desc; - desc->offset_low = offset & 0xFFFF; - desc->selector = cs; - desc->fixed = BIT(9) | BIT(10); - desc->d = 1; - desc->dpl = dpl; - desc->p = 1; - desc->offset_high = (offset >> 16) & 0xFFFF; + desc.offset_low = offset & 0xFFFF; + desc.selector = cs; + desc.fixed = BIT(9) | BIT(10); + desc.pad = 0; + desc.d = 1; + desc.dpl = dpl; + desc.p = 1; + desc.offset_high = (offset >> 16) & 0xFFFF; + + KERN_WRITEL(idt[intr_num].raw_hi, desc.raw_hi); + KERN_WRITEL(idt[intr_num].raw_lo, desc.raw_lo); } /*---------------------------------------------------------------------------*/ /* Initialize Interrupt Descriptor Table. The IDT is initialized with @@ -95,7 +105,7 @@ idt_init(void) /* Initialize idtr structure */ idtr.limit = (sizeof(intr_gate_desc_t) * NUM_DESC) - 1; - idtr.base = (uint32_t)&idt; + idtr.base = KERN_DATA_OFF_TO_PHYS_ADDR((uint32_t)idt); /* Load IDTR register */ __asm__("lidt %0\n\t" :: "m" (idtr)); diff --git a/cpu/x86/init/common/idt.h b/cpu/x86/init/common/idt.h index 18f168ad8..059e81705 100644 --- a/cpu/x86/init/common/idt.h +++ b/cpu/x86/init/common/idt.h @@ -34,7 +34,7 @@ #include #include "prot-domains.h" -void idt_init(void) ATTR_CODE_BOOT; +void idt_init(void); void idt_set_intr_gate_desc(int intr_num, uint32_t offset, uint16_t cs, diff --git a/cpu/x86/mm/README.md b/cpu/x86/mm/README.md index 8990beec9..dcd6370b4 100644 --- a/cpu/x86/mm/README.md +++ b/cpu/x86/mm/README.md @@ -5,13 +5,15 @@ Introduction ------------ The X86 port of Contiki implements a simple, lightweight form of -protection domains using a pluggable framework. Currently, the -following plugin is available: +protection domains using a pluggable framework. Currently, there are +two plugins available: - Flat memory model with paging. + - Multi-segment memory model with hardware-switched segments based on + Task-State Segment (TSS) structures. -For an introduction to paging and possible ways in which it can be -used, refer to the following resources: +For an introduction to paging and TSS and possible ways in which they +can be used, refer to the following resources: - Intel Combined Manual (Intel 64 and IA-32 Architectures Software Developer's Manual), Vol. 3, Chapter 4 @@ -28,7 +30,7 @@ idealized principle is balanced against the practical objectives of limiting the number of relatively time-consuming context switches and minimizing changes to existing code. In fact, no changes were made to code outside of the CPU- and platform-specific code directories for -the initial plugin. +the initial plugins. Each protection domain can optionally be associated with a metadata and/or MMIO region. The hardware can support additional regions per @@ -139,7 +141,11 @@ the one that was interrupted. However, interrupts are only actually enabled in the application protection domain. Similarly, register contents may be accessed and modified across -protection domain boundaries. +protection domain boundaries in some protection domain +implementations. The TSS task switching mechanism automatically saves +and restores many registers to and from TSS data structures when +switching tasks, but the paging-based protection domain implementation +does not perform analogous operations. For the reasons described above, each protection domain should only invoke other protection domains that it trusts to properly handle data @@ -186,7 +192,9 @@ disabled. Flat segments each map the whole 4GiB physical memory space. This is the state of the system when the OS enters boot stage 0. This stage is responsible for setting up a new GDT and loading the segment registers with the appropriate descriptors from the new GDT to -enable boot stage 1 to run. +enable boot stage 1 to run. Code in stage 1 for multi-segment +protection domain implementations require that the appropriate +segment-based address translations be configured. #### Boot Stage 1 @@ -258,17 +266,18 @@ Ring level 1 is unused. ### IO and Interrupt Privileges The kernel protection domain cooperative scheduling context needs -access to IO ports, for device initialization. Other protection -domains may also require such access. The IO Privilege Level (IOPL) -that is assigned to a protection domain using the relevant bits in the +access to IO ports, for device initialization. Some other protection +domains also require such access. The IO Privilege Level (IOPL) that +is assigned to a protection domain using the relevant bits in the EFLAGS field could be set according to whether IO port access is -required in that protection domain. However, this would introduce -additional complexity and overhead in the critical system call and -return dispatchers. Instead, the IOPL is always set to block IO -access from the cooperative scheduling context. Port IO instructions -in that context will then generate general protection faults, and the -exception handler decodes and emulates authorized port IO -instructions. +required in that protection domain. This is straightforward for TSS, +which includes separate flags settings for each protection domain. +However, this would introduce additional complexity and overhead in +the critical system call and return dispatchers for other plugins. +Instead, the IOPL is always set to block IO access from the +cooperative scheduling context. Port IO instructions in that context +will then generate general protection faults, and the exception +handler decodes and emulates authorized port IO instructions. Interrupts are handled at ring level 2, since they do not use any privileged instructions. They do cause the interrupt flag to be @@ -307,11 +316,15 @@ pivoting to the main stack and executing the handler. ### Protection Domain Control Structures (PDCSes) Each protection domain is managed by the kernel and privileged -functions using a PDCS. The PDCS structure is entirely -software-defined. The initial protection domain plugin does not -support re-entrant protection domains to simplify the implementation -of the plugin by enabling domain-specific information (e.g. system -call return address) to be trivially stored in each PDCS. +functions using a PDCS. The structure of the PDCS is partially +hardware-imposed in the cases of the two segment-based plugins, since +the PDCS contains the Local Descriptor Table (LDT) and the TSS, if +applicable. In the paging plugin, the PDCS structure is entirely +software-defined. None of the initial protection domain plugins +support re-entrant protection domains due to hardware-imposed +limitations of TSS and to simplify the implementation of the other +plugins by enabling domain-specific information (e.g. system call +return address) to be trivially stored in each PDCS. ### Paging-Based Protection Domains @@ -547,6 +560,293 @@ be possible to improve the robustness of the system by marking that data as read-only. Doing so would introduce additional complexity into the system. +### Hardware-Switched Segment-Based Protection Domains + +Primary implementation sources: + + - cpu/x86/mm/tss-prot-domains.c + - cpu/x86/mm/tss-prot-domains-asm.S + +#### Introduction + +One TSS is allocated for each protection domain. Each one is +associated with its own dedicated LDT. The memory resources assigned +to each protection domain are represented as segment descriptors in +the LDT for the protection domain. Additional shared memory resources +are represented as segment descriptors in the GDT. + +#### System Call and Return Dispatching + +The system call dispatcher runs in the context of the server +protection domain. It is a common piece of code that is shared among +all protection domains. Thus, each TSS, except the application TSS, +has its EIP field initialized to the entrypoint for the system call +dispatcher so that will be the first code to run when the first switch +to that task is performed. + +The overall process of handling a system call can be illustrated at a +high level as follows. Some minor steps are omitted from this +illustration in the interest of clarity and brevity. + +``` + == BEGIN Client protection domain ========================================== + -- BEGIN Caller ------------------------------------------------------------ + 1. Call system call stub. + -- + 13. Continue execution... + -- END Caller -------------------------------------------------------------- + -- BEGIN System call stub -------------------------------------------------- + 2. Already in desired (server) protection domain? + - No: Request task switch to server protection domain. + - Yes: Jump to system call body. + -- + 12. Return to caller. + -- END System call stub ---------------------------------------------------- + == END Client protection domain ============================================ + == BEGIN Server protection domain ========================================== + -- BEGIN System call dispatcher--------------------------------------------- + 3. Check that the requested system call is allowed. Get entrypoint. + 4. Switch to the main stack. + 5. Pop the client return address off the stack to a callee-saved register. + 6. Push the address of the system call return dispatcher onto the stack. + 7. Jump to system call body. + -- + 10. Restore the client return address to the stack. + 11. Request task switch to client protection domain. + -- END System call dispatcher ---------------------------------------------- + -- BEGIN System call body -------------------------------------------------- + 8. Execute the work for the requested system call. + 9. Return (to system call return stub, unless invoked from server + protection domain, in which case return is to caller). + -- END System call body ---------------------------------------------------- + == END Server protection domain ============================================ +``` + +An additional exception handler is needed, for the "Device Not +Available" exception. The handler comprises just a CLTS and an IRET +instruction. The CLTS instruction is privileged, which is why it must +be run at ring level 0. This exception handler is invoked when a +floating point instruction is used following a task switch, and its +sole purpose is to enable the floating point instruction to execute +after the exception handler returns. See the TSS resources listed +above for more details regarding interactions between task switching +and floating point instructions. + +Each segment register may represent a different data region within +each protection domain, although the FS register is used for two +separate purposes at different times. The segments are defined as +follows: + + - CS (code segment) maps all non-startup code with execute-only + permissions in all protection domains. Limiting the code that is + executable within each protection domain to just the code that is + actually needed within that protection domain could improve the + robustness of the system, but it is challenging to determine all + code that may be needed in a given protection domain (e.g. all + needed library routines). Furthermore, that code may not all be + contiguous, and each segment descriptor can only map a contiguous + memory region. Finally, segment-based memory addressing is + relative to an offset of zero from the beginning of each segment, + introducing additional complexity if such fine-grained memory + management were to be used. + - DS (default data segment) typically maps the main stack and all + non-stack data memory that is accessible from all protection + domains. Limiting the data that is accessible via DS within each + protection domain to just the subset of the data that is actually + needed within that protection domain could improve the robustness + of the system, but it is challenging for similar reasons to those + that apply to CS. Access to the main stack via DS is supported so + that code that copies the stack pointer to a register and attempts + to access stack entries via DS works correctly. Disallowing access + to the main stack via DS could improve the robustness of the + system, but that may require modifying code that expects to be able + to access the stack via DS. + - ES is loaded with the same segment descriptor as DS so that string + operations (e.g. the MOVS instruction) work correctly. + - FS usually maps the kernel-owned data region. That region can only + be written via FS in the kernel protection domain. FS contains a + descriptor specifying a read-only mapping in all other protection + domains except the application protection domain, in which FS is + nullified. Requiring that code specifically request access to the + kernel-owned data region by using the FS segment may improve the + robustness of the system by blocking undesired accesses to the + kernel-owned data region via memory access instructions within the + kernel protection domain that implicitly access DS. The reason for + granting read-only access to the kernel-owned data region from most + protection domains is that the system call dispatcher runs in the + context of the server protection domain to minimize overhead, and + it requires access to the kernel-owned data region. It may improve + the robustness of the system to avoid this by running the system + call dispatcher in a more-privileged ring level (e.g. ring 1) + within the protection domain and just granting access to the + kernel-owned data region from that ring. However, that would + necessitate a ring level transition to ring 3 when dispatching the + system call, which would increase overhead. The application + protection domain does not export any system calls, so it does not + require access to the kernel-owned data region. + - FS is temporarily loaded with a segment descriptor that maps just + an MMIO region used by a driver protection domain when such a + driver needs to perform MMIO accesses. + - GS maps an optional region of readable and writable metadata that + can be associated with a protection domain. In protection domains + that are not associated with metadata, GS is nullified. + - SS usually maps just the main stack. This may improve the + robustness of the system by enabling immediate detection of stack + underflows and overflows rather than allowing such a condition to + result in silent data corruption. Interrupt handlers use a stack + segment that covers the main stack and also includes a region above + the main stack that is specifically for use by interrupt handlers. + In like manner, exception handlers use a stack segment that covers + both of the other stacks and includes an additional region. This + is to support the interrupt dispatchers that copy parameters from + the interrupt-specific stack region to the main stack prior to + pivoting to the main stack to execute an interrupt handler body. + +The approximate memory layout of the system is depicted below, +starting with the highest physical addresses and proceeding to lower +physical addresses. The memory ranges that are mapped at various +times by each of the segment registers are also depicted. Read the +descriptions of each segment above for more information about what +memory range may be mapped by each segment register at various times +with various protection domain configurations. Parenthetical notes +indicate the protection domains that can use each mapping. The suffix +[L] indicates that the descriptor is loaded from LDT. Optional +mappings are denoted by a '?' after the protection domain label. The +'other' protection domain label refers to protection domains other +than the application and kernel domains. + +``` + ... + +------------------------------------------+ \ + | Domain X MMIO | +- FS[L] + +------------------------------------------+ / (other?) + ... + +------------------------------------------+ \ + | Domain X DMA-accessible metadata | +- GS[L] (other?) + | (section .dma_bss) | | + +------------------------------------------+ / + +------------------------------------------+ \ + | Domain X metadata (section .meta_bss) | +- GS[L] (other?) + +------------------------------------------+ / + ... + +------------------------------------------+ \ + | Kernel-private data | | + | (sections .prot_dom_bss, .gdt_bss, etc.) | +- FS[L] (kern) + +------------------------------------------+ | + +------------------------------------------+ \ + | System call data (section .syscall_bss) | | + +------------------------------------------+ +- FS[L] (all) + +------------------------------------------+ | + | Kernel-owned data (section .kern_bss) | | + +------------------------------------------+ / + +------------------------------------------+ \ + | Common data | | + | (sections .data, .rodata*, .bss, etc.) | | + +------------------------------------------+ +- DS, ES + +------------------------------------------+ \ | (all) + | Exception stack (section .exc_stack) | | | + |+----------------------------------------+| \ | + || Interrupt stack (section .int_stack) || | | + ||+--------------------------------------+|| \ | + ||| Main stack (section .main_stack) ||| +- SS (all) | + +++--------------------------------------+++ / / + +------------------------------------------+ \ + | Main code (.text) | +- CS (all) + +------------------------------------------+ / + +------------------------------------------+ + | Bootstrap code (section .boot_text) | + +------------------------------------------+ + +------------------------------------------+ + | Multiboot header | + +------------------------------------------+ + ... +``` + +This memory layout is more efficient than the layout that is possible +with paging-based protection domains, since segments have byte +granularity, whereas the minimum unit of control supported by paging +is a 4KiB page. For example, this means that metadata may need to be +padded to be a multiple of the page size. This may also permit +potentially-undesirable accesses to padded areas of code and data +regions that do not entirely fill the pages that they occupy. + +Kernel data structure access, including to the descriptor tables +themselves, is normally restricted to the code running at ring level +0, specifically the exception handlers and the system call and return +dispatchers. It is also accessible from the cooperative scheduling +context in the kernel protection domain. Interrupt delivery is +disabled in the kernel protection domain, so the preemptive scheduling +context is not used. + +SS, DS, and ES all have the same base address, since the compiler may +assume that a flat memory model is in use. Memory accesses that use a +base register of SP/ESP or BP/EBP or that are generated by certain +other instructions (e.g. PUSH, RET, etc.) are directed to SS by +default, whereas other accesses are directed to DS or ES by default. +The compiler may use an instruction that directs an access to DS or ES +even if the data being accessed is on the stack, which is why these +three segments must use the same base address. However, it is +possible to use a lower limit for SS than for DS and ES for the +following reasons. Compilers commonly provide an option for +preventing the frame pointer, EBP, from being omitted and possibly +used to point to non-stack data. In our tests, compilers never used +ESP to point to non-stack data. + +Each task switch ends up saving and restoring more state than is +actually useful to us, but the implementation attempts to minimize +overhead by configuring the register values in each TSS to reduce the +number of register loads that are needed in the system call +dispatcher. Specifically, two callee-saved registers are populated +with base addresses used when computing addresses in the entrypoint +information table as well as a mask corresponding to the ID of the +server protection domain that is used to check whether the requested +system call is exported by the server protection domain. Callee-saved +registers are used, since the task return will update the saved +register values. + +Note that this implies that the intervening code run between the task +call and return can modify critical data used by the system call +dispatcher. However, this is analogous to the considerations +associated with sharing a single stack amongst all protection domains +and should be addressed similarly, by only invoking protection domains +that are trusted by the caller to not modify the saved critical +values. This consideration is specific to the TSS-based dispatcher +and is not shared by the ring 0 dispatcher used in the other +plugins. + +Data in the .rodata sections is marked read/write, even though it may +be possible to improve the robustness of the system by marking that +data as read-only. Doing so would introduce even more complexity into +the system than would be the case with paging-based protection +domains, since it would require allocating different segment +descriptors for the read-only vs. the read/write data. + +#### Supporting Null-Pointer Checks + +A lot of code considers a pointer value of 0 to be invalid. However, +segment offsets always start at 0. To accommodate the common software +behavior, at least the first byte of each segment is marked as +unusable. An exception to this is that the first byte of the stack +segments is usable. + +#### Interrupt and Exception Dispatching + +A distinctive challenge that occurs during interrupt and exception +dispatching is that the state of the segment registers when an +interrupt or exception occurs is somewhat unpredictable. For example, +an exception may occur while MMIO is being performed, meaning that FS +is loaded with the MMIO descriptor instead of the kernel descriptor. +Leaving the segment registers configured in that way could cause +incorrect interrupt or exception handler behavior. Thus, the +interrupt or exception dispatcher must save the current segment +configuration, switch to a configuration that is suitable for the +handler body, and then restore the saved segment configuration after +the handler body returns. Another motivation for this is that the +interrupted code may have corrupted the segment register configuration +in an unexpected manner, since segment register load instructions are +unprivileged. Similar segment register updates must be performed for +similar reasons when dispatching system calls. + ### Pointer Validation Primary implementation sources: @@ -563,10 +863,14 @@ an unintended manner. For example, if an incoming pointer referenced the return address, it could potentially redirect execution with the privileges of the callee protection domain. -It is also necessary to check that the pointer is either within the -stack region or the shared data region (or a guard band region, since -that will generate a fault) to prevent redirection of data accesses to -MMIO or metadata regions. +When the paging-based plugin is in use, it is also necessary to check +that the pointer is either within the stack region or the shared data +region (or a guard band region, since that will generate a fault) to +prevent redirection of data accesses to MMIO or metadata regions. The +other plugins already configure segments to restrict accesses to DS to +just those regions. Pointers provided as inputs to system calls as +defined above should never be dereferenced in any segment other than +DS. The pointer is both validated and copied to a new storage location, which must be within the callee's local stack region (excluding the @@ -648,8 +952,11 @@ The following steps are required: Usage ----- -To enable protection domain support, add -"X86_CONF_PROT_DOMAINS=paging" to the command line. +To enable protection domain support, add "X86_CONF_PROT_DOMAINS=" to +the command line and specify one of the following options: + + - paging + - tss The paging option accepts a sub-option to determine whether the TLB is fully- or selectively-invalidated during protection domain switches. diff --git a/cpu/x86/mm/gdt-layout.h b/cpu/x86/mm/gdt-layout.h index 8a5af6cbf..5dddd3a4d 100644 --- a/cpu/x86/mm/gdt-layout.h +++ b/cpu/x86/mm/gdt-layout.h @@ -39,6 +39,8 @@ * outside of gdt.c. */ #define GDT_NUM_FIXED_DESC 7 +#elif X86_CONF_PROT_DOMAINS_MULTI_SEG +#define GDT_NUM_FIXED_DESC 11 #else #define GDT_NUM_FIXED_DESC 3 #endif @@ -66,12 +68,34 @@ /** Stack segment for interrupt handlers */ #define GDT_IDX_STK_INT 5 +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING #define GDT_IDX_CODE_EXC GDT_IDX_CODE_FLAT /** Default data segment used by code at all privilege levels */ #define GDT_IDX_DATA 6 #define GDT_IDX_STK GDT_IDX_DATA #define GDT_IDX_STK_EXC GDT_IDX_DATA_FLAT #else +/** + * Same bounds and permissions as default code segment, but at the exception + * handler privilege level + */ +#define GDT_IDX_CODE_EXC 6 +/** R/W kernel data descriptor used during boot stage 1 */ +#define GDT_IDX_DATA_KERN_EXC 7 +/** Default data segment used by code at all privilege levels */ +#define GDT_IDX_DATA 8 +/** + * Default stack segment, which overlaps with the beginning of the default data + * segment + */ +#define GDT_IDX_STK 9 +/** Stack segment for exception handlers */ +#define GDT_IDX_STK_EXC 10 + +#define GDT_IDX_TSS(dom_id) (GDT_NUM_FIXED_DESC + (2 * (dom_id))) +#define GDT_IDX_LDT(dom_id) (GDT_NUM_FIXED_DESC + (2 * (dom_id)) + 1) +#endif +#else #define GDT_IDX_CODE GDT_IDX_CODE_FLAT #define GDT_IDX_CODE_INT GDT_IDX_CODE_FLAT #define GDT_IDX_CODE_EXC GDT_IDX_CODE_FLAT @@ -96,10 +120,14 @@ #define GDT_SEL_CODE_EXC GDT_SEL(GDT_IDX_CODE_EXC, PRIV_LVL_EXC) #define GDT_SEL_DATA GDT_SEL(GDT_IDX_DATA, PRIV_LVL_EXC) +#define GDT_SEL_DATA_KERN_EXC GDT_SEL(GDT_IDX_DATA_KERN_EXC, PRIV_LVL_EXC) #define GDT_SEL_STK GDT_SEL(GDT_IDX_STK, PRIV_LVL_USER) #define GDT_SEL_STK_INT GDT_SEL(GDT_IDX_STK_INT, PRIV_LVL_INT) #define GDT_SEL_STK_EXC GDT_SEL(GDT_IDX_STK_EXC, PRIV_LVL_EXC) +#define GDT_SEL_TSS(dom_id) GDT_SEL(GDT_IDX_TSS(dom_id), PRIV_LVL_USER) +#define GDT_SEL_LDT(dom_id) GDT_SEL(GDT_IDX_LDT(dom_id), PRIV_LVL_USER) + #endif /* CPU_X86_MM_GDT_LAYOUT_H_ */ diff --git a/cpu/x86/mm/ldt-layout.h b/cpu/x86/mm/ldt-layout.h new file mode 100644 index 000000000..7c61054a5 --- /dev/null +++ b/cpu/x86/mm/ldt-layout.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_LDT_LAYOUT_H_ +#define CPU_X86_MM_LDT_LAYOUT_H_ + +#include "gdt-layout.h" + +/* Each LDT can contain up to this many descriptors, but some protection + * domains may not use all of the slots. + */ +#define LDT_NUM_DESC 3 + +/** + * Provides access to kernel data. Most protection domains are granted at most + * read-only access, but the kernel protection domain is granted read/write + * access. + */ +#define LDT_IDX_KERN 0 +/** Maps a device MMIO range */ +#define LDT_IDX_MMIO 1 +/** Maps domain-defined metadata */ +#define LDT_IDX_META 2 + +#define LDT_SEL(idx, rpl) (GDT_SEL(idx, rpl) | (1 << 2)) + +#define LDT_SEL_KERN LDT_SEL(LDT_IDX_KERN, PRIV_LVL_USER) +#define LDT_SEL_MMIO LDT_SEL(LDT_IDX_MMIO, PRIV_LVL_USER) +#define LDT_SEL_META LDT_SEL(LDT_IDX_META, PRIV_LVL_USER) +#define LDT_SEL_STK LDT_SEL(LDT_IDX_STK, PRIV_LVL_USER) + +#endif /* CPU_X86_MM_LDT_LAYOUT_H_ */ diff --git a/cpu/x86/mm/multi-segment.c b/cpu/x86/mm/multi-segment.c new file mode 100644 index 000000000..f60a2c8bb --- /dev/null +++ b/cpu/x86/mm/multi-segment.c @@ -0,0 +1,239 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "gdt.h" +#include "helpers.h" +#include "prot-domains.h" +#include "segmentation.h" +#include "stacks.h" + +/*---------------------------------------------------------------------------*/ +static uint32_t +segment_desc_compute_base(segment_desc_t desc) +{ + return (desc.base_hi << 24) | (desc.base_mid << 16) | desc.base_lo; +} +/*---------------------------------------------------------------------------*/ +void +prot_domains_reg_multi_seg(volatile struct dom_kern_data ATTR_KERN_ADDR_SPACE *dkd, + uintptr_t mmio, size_t mmio_sz, + uintptr_t meta, size_t meta_sz) +{ + segment_desc_t desc; + dom_id_t dom_id = PROT_DOMAINS_GET_DOM_ID(dkd); + uint32_t kern_data_len; + uint32_t tmp; + + if((dkd < prot_domains_kern_data) || + (prot_domains_kern_data_end <= dkd) || + (((((uintptr_t)dkd) - (uintptr_t)prot_domains_kern_data) % + sizeof(dom_kern_data_t)) != 0)) { + halt(); + } + + KERN_READL(tmp, dkd->ldt[DT_SEL_GET_IDX(LDT_SEL_KERN)].raw_hi); + if(tmp != 0) { + /* This PDCS was previously initialized, which is disallowed. */ + halt(); + } + + /* Initialize descriptors */ + + if(dom_id == DOM_ID_kern) { + kern_data_len = (uint32_t)&_ebss_kern_addr; + } else { + /* Non-kernel protection domains do not need to access the protection + * domain control structures, and they may contain saved register values + * that are private to each domain. + */ + kern_data_len = (uint32_t)&_ebss_syscall_addr; + } + kern_data_len -= (uint32_t)&_sbss_kern_addr; + + segment_desc_init(&desc, (uint32_t)&_sbss_kern_addr, kern_data_len, + /* Every protection domain requires at least read-only access to kernel + data to read dom_client_data structures and to support the system call + dispatcher, if applicable. Only the kernel protection domain is granted + read/write access to the kernel data. */ + ((dom_id == DOM_ID_kern) ? + SEG_TYPE_DATA_RDWR : + SEG_TYPE_DATA_RDONLY) | + SEG_FLAG(DPL, PRIV_LVL_USER) | + SEG_GRAN_BYTE | SEG_DESCTYPE_NSYS); + + KERN_WRITEL(dkd->ldt[LDT_IDX_KERN].raw_lo, desc.raw_lo); + KERN_WRITEL(dkd->ldt[LDT_IDX_KERN].raw_hi, desc.raw_hi); + + if(mmio_sz != 0) { + if(SEG_MAX_BYTE_GRAN_LEN < mmio_sz) { + halt(); + } + + segment_desc_init(&desc, mmio, mmio_sz, + SEG_FLAG(DPL, PRIV_LVL_USER) | SEG_GRAN_BYTE | + SEG_DESCTYPE_NSYS | SEG_TYPE_DATA_RDWR); + } else { + desc.raw = SEG_DESC_NOT_PRESENT; + } + + KERN_WRITEL(dkd->ldt[LDT_IDX_MMIO].raw_lo, desc.raw_lo); + KERN_WRITEL(dkd->ldt[LDT_IDX_MMIO].raw_hi, desc.raw_hi); + + if(meta_sz != 0) { + if(SEG_MAX_BYTE_GRAN_LEN < meta_sz) { + halt(); + } + + segment_desc_init(&desc, meta, meta_sz, + SEG_FLAG(DPL, PRIV_LVL_USER) | SEG_GRAN_BYTE | + SEG_DESCTYPE_NSYS | SEG_TYPE_DATA_RDWR); + } else { + desc.raw = SEG_DESC_NOT_PRESENT; + } + + KERN_WRITEL(dkd->ldt[LDT_IDX_META].raw_lo, desc.raw_lo); + KERN_WRITEL(dkd->ldt[LDT_IDX_META].raw_hi, desc.raw_hi); + + segment_desc_init(&desc, + KERN_DATA_OFF_TO_PHYS_ADDR(dkd->ldt), + sizeof(dkd->ldt), + SEG_FLAG(DPL, PRIV_LVL_USER) | SEG_GRAN_BYTE | + SEG_DESCTYPE_SYS | SEG_TYPE_LDT); + gdt_insert(GDT_IDX_LDT(dom_id), desc); +} +/*---------------------------------------------------------------------------*/ +void +prot_domains_gdt_init() +{ + int i; + segment_desc_t desc; + + segment_desc_init(&desc, + (uint32_t)&_stext_addr, + ((uint32_t)&_etext_addr) - (uint32_t)&_stext_addr, + SEG_FLAG(DPL, PRIV_LVL_EXC) | SEG_GRAN_BYTE | + SEG_DESCTYPE_NSYS | SEG_TYPE_CODE_EX); + gdt_insert_boot(GDT_IDX_CODE_EXC, desc); + + segment_desc_init(&desc, + (uint32_t)&_sdata_addr, + ((uint32_t)&_edata_addr) - (uint32_t)&_sdata_addr, + SEG_FLAG(DPL, PRIV_LVL_USER) | SEG_GRAN_BYTE | + SEG_DESCTYPE_NSYS | SEG_TYPE_DATA_RDWR); + gdt_insert_boot(GDT_IDX_DATA, desc); + + segment_desc_init(&desc, + (uint32_t)&_sbss_kern_addr, + ((uint32_t)&_ebss_kern_addr) - + (uint32_t)&_sbss_kern_addr, + SEG_FLAG(DPL, PRIV_LVL_EXC) | SEG_GRAN_BYTE | + SEG_DESCTYPE_NSYS | SEG_TYPE_DATA_RDWR); + gdt_insert_boot(GDT_IDX_DATA_KERN_EXC, desc); + + segment_desc_init(&desc, + (uint32_t)DATA_OFF_TO_PHYS_ADDR(stacks_main), + STACKS_SIZE_MAIN, + SEG_FLAG(DPL, PRIV_LVL_USER) | SEG_GRAN_BYTE | + SEG_DESCTYPE_NSYS | SEG_TYPE_DATA_RDWR); + gdt_insert_boot(GDT_IDX_STK, desc); + + segment_desc_set_limit(&desc, STACKS_SIZE_MAIN + STACKS_SIZE_INT); + SEG_SET_FLAG(desc, DPL, PRIV_LVL_INT); + gdt_insert_boot(GDT_IDX_STK_INT, desc); + + segment_desc_set_limit(&desc, + STACKS_SIZE_MAIN + + STACKS_SIZE_INT + + STACKS_SIZE_EXC); + SEG_SET_FLAG(desc, DPL, PRIV_LVL_EXC); + gdt_insert_boot(GDT_IDX_STK_EXC, desc); + + /* Not all domains will necessarily be initialized, so this initially marks + * all per-domain descriptors not-present. + */ + desc.raw = SEG_DESC_NOT_PRESENT; + for(i = 0; i < PROT_DOMAINS_ACTUAL_CNT; i++) { + gdt_insert_boot(GDT_IDX_TSS(i), desc); + gdt_insert_boot(GDT_IDX_LDT(i), desc); + } + + __asm__ __volatile__ ( + "mov %[_default_data_], %%ds\n\t" + "mov %[_default_data_], %%es\n\t" + "mov %[_kern_data_], %%" SEG_KERN "s\n\t" + : + : [_default_data_] "r"(GDT_SEL_DATA), + [_kern_data_] "r"(GDT_SEL_DATA_KERN_EXC)); +} +/*---------------------------------------------------------------------------*/ +void +multi_segment_launch_kernel(void) +{ + /* Update segment registers. */ + __asm__ __volatile__ ( + "mov %[_data_seg_], %%ds\n\t" + "mov %[_data_seg_], %%es\n\t" + "mov %[_kern_seg_], %%" SEG_KERN "s\n\t" + "mov %[_data_seg_], %%" SEG_META "s\n\t" + : + : [_data_seg_] "r" (GDT_SEL_DATA), + [_kern_seg_] "r" (LDT_SEL_KERN) + ); +} +/*---------------------------------------------------------------------------*/ +void +prot_domains_enable_mmio(void) +{ + __asm__ __volatile__ ("mov %0, %%" SEG_MMIO "s" :: "r" (LDT_SEL_MMIO)); +} +/*---------------------------------------------------------------------------*/ +void +prot_domains_disable_mmio(void) +{ + __asm__ __volatile__ ("mov %0, %%" SEG_KERN "s" :: "r" (LDT_SEL_KERN)); +} +/*---------------------------------------------------------------------------*/ +uintptr_t +prot_domains_lookup_meta_phys_base(dom_client_data_t ATTR_KERN_ADDR_SPACE *drv) +{ + dom_id_t dom_id; + segment_desc_t desc; + volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE *dkd; + + KERN_READL(dom_id, drv->dom_id); + + dkd = prot_domains_kern_data + dom_id; + + KERN_READL(desc.raw_lo, dkd->ldt[DT_SEL_GET_IDX(LDT_SEL_META)].raw_lo); + KERN_READL(desc.raw_hi, dkd->ldt[DT_SEL_GET_IDX(LDT_SEL_META)].raw_hi); + + return segment_desc_compute_base(desc); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/mm/multi-segment.h b/cpu/x86/mm/multi-segment.h new file mode 100644 index 000000000..baa28002b --- /dev/null +++ b/cpu/x86/mm/multi-segment.h @@ -0,0 +1,195 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_MULTI_SEGMENT_H_ +#define CPU_X86_MM_MULTI_SEGMENT_H_ + +#include +#include +#include "helpers.h" +#include "ldt-layout.h" + +#ifdef __clang__ +#define __SEG_FS +#define __seg_fs __attribute__((address_space(257))) +#define __SEG_GS +#define __seg_gs __attribute__((address_space(256))) +#endif + +#ifdef __SEG_FS +#define ATTR_MMIO_ADDR_SPACE __seg_fs +#define ATTR_KERN_ADDR_SPACE __seg_fs +#else +#define ATTR_KERN_ADDR_SPACE +#endif +#ifdef __SEG_GS +#define ATTR_META_ADDR_SPACE __seg_gs +#endif + +void prot_domains_reg_multi_seg(volatile struct dom_kern_data ATTR_KERN_ADDR_SPACE *dkd, + uintptr_t mmio, size_t mmio_sz, + uintptr_t meta, size_t meta_sz); +void multi_segment_launch_kernel(void); + +#define MULTI_SEGMENT_ENTER_ISR(exc) \ + "mov $" EXP_STRINGIFY(GDT_SEL_DATA) ", %%eax\n\t" \ + /* Refresh DS and ES in case the userspace code corrupted them. */ \ + "mov %%eax, %%ds\n\t" \ + "mov %%eax, %%es\n\t" \ + /* Refresh SEG_KERN. */ \ + "mov $" EXP_STRINGIFY(LDT_SEL_KERN) ", %%eax\n\t" \ + "mov %%eax, %%" SEG_KERN "s\n\t" \ + ".if " #exc "\n\t" \ + /* It is possible that a routine performing MMIO is being interrupted. */ \ + /* Thus, it is necessary to save and restore the MMIO segment register */ \ + /* (in a callee-saved register). */ \ + "mov %%" SEG_MMIO "s, %%ebp\n\t" \ + "mov $" EXP_STRINGIFY(GDT_SEL_DATA_KERN_EXC) ", %%eax\n\t" \ + "mov %%eax, %%" SEG_KERN "s\n\t" \ + ".endif\n\t" +#define MULTI_SEGMENT_LEAVE_ISR(exc) \ + ".if " #exc "\n\t" \ + "mov %%ebp, %%" SEG_MMIO "s\n\t" \ + ".endif\n\t" + +/** + * The MMIO region is tightly bounded within a segment, so its base offset is + * always 0. + */ +#define PROT_DOMAINS_MMIO(dcd) 0 +/** + * The metadata region is tightly bounded within a segment, so its base offset + * is always 0. + */ +#define PROT_DOMAINS_META(dcd) 0 + +#define SEG_MMIO "f" /**< For MMIO accesses, when enabled. */ +#define SEG_KERN "f" /**< For kernel data accesses */ +#define SEG_META "g" /**< For metadata accesses */ + +#define _SEG_READL(seg, dst, src) \ + __asm__ __volatile__ ( \ + "movl %%" seg "s:%[src_], %[dst_]" : [dst_]"=r"(dst) : [src_]"m"(src)) + +#define _SEG_READW(seg, dst, src) \ + __asm__ __volatile__ ( \ + "movw %%" seg "s:%[src_], %[dst_]" : [dst_]"=r"(dst) : [src_]"m"(src)) + +#define _SEG_READB(seg, dst, src) \ + __asm__ __volatile__ ( \ + "movb %%" seg "s:%[src_], %[dst_]" : [dst_]"=q"(dst) : [src_]"m"(src)) + +#define _SEG_WRITEL(seg, dst, src) \ + __asm__ __volatile__ ( \ + "movl %[src_], %%" seg "s:%[dst_]" \ + : [dst_]"=m"(dst) : [src_]"r"((uint32_t)(src))) + +#define _SEG_WRITEW(seg, dst, src) \ + __asm__ __volatile__ ( \ + "movw %[src_], %%" seg "s:%[dst_]" \ + : [dst_]"=m"(dst) : [src_]"r"((uint16_t)(src))) + +#define _SEG_WRITEB(seg, dst, src) \ + __asm__ __volatile__ ( \ + "movb %[src_], %%" seg "s:%[dst_]" \ + : [dst_]"=m"(dst) : [src_]"q"((uint8_t)(src))) + +#ifndef __SEG_FS +#define MMIO_READL(dst, src) _SEG_READL(SEG_MMIO, dst, src) +#define MMIO_READW(dst, src) _SEG_READW(SEG_MMIO, dst, src) +#define MMIO_READB(dst, src) _SEG_READB(SEG_MMIO, dst, src) +#define MMIO_WRITEL(dst, src) _SEG_WRITEL(SEG_MMIO, dst, src) +#define MMIO_WRITEW(dst, src) _SEG_WRITEW(SEG_MMIO, dst, src) +#define MMIO_WRITEB(dst, src) _SEG_WRITEB(SEG_MMIO, dst, src) + +#define KERN_READL(dst, src) _SEG_READL(SEG_KERN, dst, src) +#define KERN_READW(dst, src) _SEG_READW(SEG_KERN, dst, src) +#define KERN_READB(dst, src) _SEG_READB(SEG_KERN, dst, src) +#define KERN_WRITEL(dst, src) _SEG_WRITEL(SEG_KERN, dst, src) +#define KERN_WRITEW(dst, src) _SEG_WRITEW(SEG_KERN, dst, src) +#define KERN_WRITEB(dst, src) _SEG_WRITEB(SEG_KERN, dst, src) +#endif + +#ifndef __SEG_GS +#define META_READL(dst, src) _SEG_READL(SEG_META, dst, src) +#define META_READW(dst, src) _SEG_READW(SEG_META, dst, src) +#define META_READB(dst, src) _SEG_READB(SEG_META, dst, src) +#define META_WRITEL(dst, src) _SEG_WRITEL(SEG_META, dst, src) +#define META_WRITEW(dst, src) _SEG_WRITEW(SEG_META, dst, src) +#define META_WRITEB(dst, src) _SEG_WRITEB(SEG_META, dst, src) +#endif + +#define MEMCPY_FROM_META(dst, src, sz) \ + { \ + uintptr_t __dst = (uintptr_t)(dst); \ + uintptr_t __src = (uintptr_t)(src); \ + size_t __sz = (size_t)(sz); \ + __asm__ __volatile__ ( \ + "rep movsb %%" SEG_META "s:(%%esi), %%es:(%%edi)\n\t" \ + : "+D"(__dst), "+S"(__src), "+c"(__sz)); \ + } + +#define MEMCPY_TO_META(dst, src, sz) \ + { \ + uintptr_t __dst = (uintptr_t)(dst); \ + uintptr_t __src = (uintptr_t)(src); \ + size_t __sz = (size_t)(sz); \ + __asm__ __volatile__ ( \ + "push %%es\n\t" \ + "push %%" SEG_META "s\n\t" \ + "pop %%es\n\t" \ + "rep movsb\n\t" \ + "pop %%es\n\t" \ + : "+D"(__dst), "+S"(__src), "+c"(__sz)); \ + } + +/** Compute physical address from offset into kernel data space */ +#define KERN_DATA_OFF_TO_PHYS_ADDR(x) \ + (((uintptr_t)&_sbss_kern_addr) + (uintptr_t)(x)) +/** Compute physical address from offset into default data space */ +#define DATA_OFF_TO_PHYS_ADDR(x) \ + (((uintptr_t)&_sdata_addr) + (uintptr_t)(x)) +/** Compute kernel data offset from physical address in kernel data space */ +#define PHYS_ADDR_TO_KERN_DATA_OFF(x) \ + (((uintptr_t)(x)) - (uintptr_t)&_sbss_kern_addr) + +/** + * In multi-segment protection domain implementations, it is sufficient to just + * compare incoming pointers against the frame pointer. All incoming pointers + * are dereferenced in the main data segment, which only maps the stacks and + * the shared data section. Since the shared data section is at a higher + * address range than the stacks, the frame pointer check is sufficient. + */ +#define PROT_DOMAINS_CHECK_INCOMING_PTR PROT_DOMAINS_CHECK_INCOMING_PTR_EBP + +void prot_domains_enable_mmio(void); +void prot_domains_disable_mmio(void); + +#endif /* CPU_X86_MM_MULTI_SEGMENT_H_ */ diff --git a/cpu/x86/mm/prot-domains.c b/cpu/x86/mm/prot-domains.c index 593da98e2..8bbeb4d83 100644 --- a/cpu/x86/mm/prot-domains.c +++ b/cpu/x86/mm/prot-domains.c @@ -39,10 +39,12 @@ #include "stacks.h" static dom_kern_data_t __attribute__((section(".kern_prot_dom_bss"))) - PROT_DOMAINS_PDCS_NM(kern_dcd); + ATTR_KERN_ADDR_SPACE PROT_DOMAINS_PDCS_NM(kern_dcd); +PROT_DOMAINS_ALLOC_IMPL(kern_dcd); static dom_client_data_t ATTR_BSS_KERN kern_dcd; static dom_kern_data_t __attribute__((section(".app_prot_dom_bss"))) - PROT_DOMAINS_PDCS_NM(app_dcd); + ATTR_KERN_ADDR_SPACE PROT_DOMAINS_PDCS_NM(app_dcd); +PROT_DOMAINS_ALLOC_IMPL(app_dcd); static dom_client_data_t ATTR_BSS_KERN app_dcd; /*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/mm/prot-domains.h b/cpu/x86/mm/prot-domains.h index f7dc84e3c..a1fbca130 100644 --- a/cpu/x86/mm/prot-domains.h +++ b/cpu/x86/mm/prot-domains.h @@ -40,6 +40,10 @@ #define X86_CONF_PROT_DOMAINS__NONE 0 #define X86_CONF_PROT_DOMAINS__PAGING 1 +#define X86_CONF_PROT_DOMAINS__TSS 2 + +#define X86_CONF_PROT_DOMAINS_MULTI_SEG \ + (X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS) /** Privilege level (ring) for exception handlers and other supervisory code */ #define PRIV_LVL_EXC 0 @@ -68,6 +72,49 @@ typedef uint32_t dom_id_t; #if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING #include "paging-prot-domains.h" +#elif X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS +#include "tss-prot-domains.h" +#endif + +#ifndef ATTR_META_ADDR_SPACE +#define ATTR_META_ADDR_SPACE +#endif +#ifndef ATTR_MMIO_ADDR_SPACE +#define ATTR_MMIO_ADDR_SPACE +#endif +#ifndef ATTR_KERN_ADDR_SPACE +#define ATTR_KERN_ADDR_SPACE +#endif + +#ifndef MMIO_READL +#define MMIO_READL(dst, src) dst = (src) +#define MMIO_READW(dst, src) dst = (src) +#define MMIO_READB(dst, src) dst = (src) +#define MMIO_WRITEL(dst, src) MMIO_READL(dst, src) +#define MMIO_WRITEW(dst, src) MMIO_READW(dst, src) +#define MMIO_WRITEB(dst, src) MMIO_READB(dst, src) +#endif +#ifndef KERN_READL +#define KERN_READL(dst, src) dst = (src) +#define KERN_READW(dst, src) dst = (src) +#define KERN_READB(dst, src) dst = (src) +#define KERN_WRITEL(dst, src) KERN_READL(dst, src) +#define KERN_WRITEW(dst, src) KERN_READW(dst, src) +#define KERN_WRITEB(dst, src) KERN_READB(dst, src) +#endif +#ifndef META_READL +#define META_READL(dst, src) dst = (src) +#define META_READW(dst, src) dst = (src) +#define META_READB(dst, src) dst = (src) +#define META_WRITEL(dst, src) META_READL(dst, src) +#define META_WRITEW(dst, src) META_READw(dst, src) +#define META_WRITEB(dst, src) META_READB(dst, src) +#endif + +#ifndef MEMCPY_FROM_META +#define MEMCPY_FROM_META(dst, src, sz) \ + memcpy((void *)(dst), (const void *)(src), (sz)) +#define MEMCPY_TO_META(dst, src, sz) MEMCPY_FROM_META(dst, src, sz) #endif /* The following symbols are defined in the linker script */ @@ -77,9 +124,9 @@ extern uint32_t _stext_addr, _etext_addr; #if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE /** Metadata that should not be DMA-accessible */ -#define ATTR_BSS_META __attribute__((section(".meta_bss"))) +#define ATTR_BSS_META __attribute__((section(".meta_bss"))) ATTR_META_ADDR_SPACE /** Kernel-owned data */ -#define ATTR_BSS_KERN __attribute__((section(".kern_bss"))) +#define ATTR_BSS_KERN __attribute__((section(".kern_bss"))) ATTR_KERN_ADDR_SPACE /** Code that should only be executable during bootup */ #define ATTR_CODE_BOOT __attribute__((section(".boot_text"))) @@ -97,6 +144,10 @@ extern uint32_t _ebss_syscall_addr; /** Bounds for other data sections */ extern uint32_t _sdata_addr, _edata_addr; +#ifndef SEG_KERN +#define SEG_KERN "d" +#endif + /** * If set, this protection domain is already in the call stack and is not * available for nested invocations. @@ -114,8 +165,8 @@ extern uint32_t _sdata_addr, _edata_addr; */ typedef struct dom_kern_data dom_kern_data_t; -extern volatile dom_kern_data_t prot_domains_kern_data[]; -extern volatile dom_kern_data_t prot_domains_kern_data_end[]; +extern volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE prot_domains_kern_data[]; +extern volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE prot_domains_kern_data_end[]; #define PROT_DOMAINS_ACTUAL_CNT \ (prot_domains_kern_data_end - prot_domains_kern_data) @@ -125,6 +176,7 @@ extern volatile dom_kern_data_t prot_domains_kern_data_end[]; void prot_domains_syscall_dispatcher(void); +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__TSS /** * Data associated with each protection domain that is owned by clients of that * domain and used to identify the domain. @@ -132,15 +184,21 @@ void prot_domains_syscall_dispatcher(void); struct dom_client_data { dom_id_t dom_id; } __attribute__((packed)); +#endif + +#ifndef PROT_DOMAINS_ALLOC_IMPL +#define PROT_DOMAINS_ALLOC_IMPL(nm) +#endif /** Allocate the client-owned protection domain data structure. */ #define PROT_DOMAINS_PDCS_NM(nm) _pdcs_##nm #define PROT_DOMAINS_ALLOC(typ, nm) \ static dom_kern_data_t __attribute__((section(".prot_dom_bss"))) \ - PROT_DOMAINS_PDCS_NM(nm); \ + ATTR_KERN_ADDR_SPACE PROT_DOMAINS_PDCS_NM(nm); \ + PROT_DOMAINS_ALLOC_IMPL(nm); \ static typ ATTR_BSS_KERN nm #define PROT_DOMAINS_INIT_ID(nm) \ - (nm).dom_id = PROT_DOMAINS_GET_DOM_ID(&PROT_DOMAINS_PDCS_NM(nm)) + KERN_WRITEL((nm).dom_id, PROT_DOMAINS_GET_DOM_ID(&PROT_DOMAINS_PDCS_NM(nm))) /** * Perform early initialization during boot stage 0 to prepare for boot stage 1 @@ -169,8 +227,12 @@ void prot_domains_launch_kernel(void); */ #define PROT_DOMAINS_INIT_RET_ADDR_CNT 2 +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS +void prot_domains_launch_app(void); +#else void app_main(void); #define prot_domains_launch_app app_main +#endif #else @@ -229,7 +291,7 @@ typedef struct dom_client_data dom_client_data_t; * \param meta_sz Size of metadata * \param pio Set to true if protection domain requires port IO access */ -void prot_domains_reg(dom_client_data_t *dcd, +void prot_domains_reg(dom_client_data_t ATTR_KERN_ADDR_SPACE *dcd, uintptr_t mmio, size_t mmio_sz, uintptr_t meta, @@ -237,11 +299,41 @@ void prot_domains_reg(dom_client_data_t *dcd, bool pio); #endif +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__NONE +#define prot_domains_copy_dcd(dst, src) *(dst) = *(src) +#else +static inline void +/** + * It is necessary to make a local copy of a dom_client_data structure when a + * multi-segment protection domain implementation is in use, segment attributes + * are not supported by the compiler, and a dom_client_data structure needs to + * be passed by value into some function. Otherwise, the compiler will not know + * to access the non-default segment in which *src is stored and will attempt + * to copy it out of the default data segment. + */ +prot_domains_copy_dcd(struct dom_client_data *dst, + struct dom_client_data ATTR_KERN_ADDR_SPACE *src) +{ + KERN_READL(dst->dom_id, src->dom_id); +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS + KERN_READL(dst->tss_sel, src->tss_sel); +#endif +} +#endif + +#if !X86_CONF_PROT_DOMAINS_MULTI_SEG +#define prot_domains_enable_mmio() +#define prot_domains_disable_mmio() + +#define KERN_DATA_OFF_TO_PHYS_ADDR(x) ((uintptr_t)(x)) +#define DATA_OFF_TO_PHYS_ADDR(x) ((uintptr_t)(x)) +#endif + #if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__NONE #define prot_domains_lookup_meta_phys_base(drv) 0 #else /** Lookup base physical address of metadata region for specified domain */ -uintptr_t prot_domains_lookup_meta_phys_base(dom_client_data_t *drv); +uintptr_t prot_domains_lookup_meta_phys_base(dom_client_data_t ATTR_KERN_ADDR_SPACE *drv); #endif #if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__PAGING @@ -270,6 +362,11 @@ uintptr_t prot_domains_lookup_meta_phys_base(dom_client_data_t *drv); ".endif\n\t" #endif +#ifdef X86_CONF_PROT_DOMAINS_MULTI_SEG +/* include GDT section definitions used when allocating protection domains: */ +#include "gdt.h" +#endif + #endif /* !__ASSEMBLER__ */ #endif /* CPU_X86_MM_PROT_DOMAINS_H_ */ diff --git a/cpu/x86/mm/segmentation.h b/cpu/x86/mm/segmentation.h index 57b1b8aea..71cd6beb6 100644 --- a/cpu/x86/mm/segmentation.h +++ b/cpu/x86/mm/segmentation.h @@ -59,8 +59,11 @@ #define SEG_WIDTH_GRAN 1 #define SEG_SHAMT_GRAN 15 +#define SEG_TYPE_DATA_RDONLY SEG_FLAG(TYPE, 0x00) /* Read only */ #define SEG_TYPE_DATA_RDWR SEG_FLAG(TYPE, 0x02) /* Read/Write */ #define SEG_TYPE_CODE_EXRD SEG_FLAG(TYPE, 0x0A) /* Execute/Read */ +#define SEG_TYPE_CODE_EX SEG_FLAG(TYPE, 0x08) /* Execute only */ +#define SEG_TYPE_LDT SEG_FLAG(TYPE, 0x02) #define SEG_TYPE_TSS32_AVAIL SEG_FLAG(TYPE, 0x09) #define SEG_DESCTYPE_SYS SEG_FLAG(DESCTYPE, 0) @@ -73,6 +76,12 @@ #define SEG_GRAN_BYTE SEG_FLAG(GRAN, 0) #define SEG_GRAN_PAGE SEG_FLAG(GRAN, 1) +/** + * Maximum length of segment that can be regulated with a byte-granularity + * segment limit. + */ +#define SEG_MAX_BYTE_GRAN_LEN (1 << 20) + /** * Segment descriptor. See Intel Combined Manual, * Vol. 3, Section 3.4.5 for more details. @@ -91,7 +100,13 @@ typedef union segment_desc { uint64_t raw; } segment_desc_t; -static inline void +#define SEG_DESC_NOT_PRESENT 0 + +/* The next two functions are invoked by boot code, so they must always be + * inlined to avoid being placed in a different address space than the initial, + * flat address space. + */ +static inline void __attribute__((always_inline)) segment_desc_set_limit(segment_desc_t *c_this, uint32_t len) { uint32_t limit = len - 1; @@ -108,7 +123,7 @@ segment_desc_set_limit(segment_desc_t *c_this, uint32_t len) * \param flags Flags to be added to the default flags: present, default * operand size of 32 bits, and high limit bits. */ -static inline void +static inline void __attribute__((always_inline)) segment_desc_init(segment_desc_t *c_this, uint32_t base, uint32_t len, uint16_t flags) { diff --git a/cpu/x86/mm/stacks.h b/cpu/x86/mm/stacks.h index a1005d8e0..96be72cf9 100644 --- a/cpu/x86/mm/stacks.h +++ b/cpu/x86/mm/stacks.h @@ -61,6 +61,17 @@ #else #define STACKS_SIZE_EXC 256 #endif +#elif X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS +/** + * This should be large enough to execute the exception handler with the + * largest stack requirement: double_fault_handler: + * - 1 word for the return address from calling double_fault_handler + * - 1 word for the saved frame pointer in double_fault_handler + * - 2 words that GCC has been observed to skip on the stack to align it + * to a preferred boundary + * - 1 word for the return address for calling halt + */ +#define STACKS_SIZE_EXC (STACKS_SIZE_INT + (6 * 4)) #else #define STACKS_SIZE_EXC STACKS_SIZE_INT #endif diff --git a/cpu/x86/mm/syscalls.h b/cpu/x86/mm/syscalls.h index 83be7a47e..cae8ff2f5 100644 --- a/cpu/x86/mm/syscalls.h +++ b/cpu/x86/mm/syscalls.h @@ -33,6 +33,7 @@ #include "helpers.h" #include "prot-domains.h" +#include typedef uint32_t dom_id_bitmap_t; @@ -40,8 +41,8 @@ typedef struct syscalls_entrypoint { uintptr_t entrypoint; dom_id_bitmap_t doms; } syscalls_entrypoint_t; -extern syscalls_entrypoint_t syscalls_entrypoints[]; -extern syscalls_entrypoint_t syscalls_entrypoints_end[]; +extern syscalls_entrypoint_t ATTR_KERN_ADDR_SPACE syscalls_entrypoints[]; +extern syscalls_entrypoint_t ATTR_KERN_ADDR_SPACE syscalls_entrypoints_end[]; #define SYSCALLS_ACTUAL_CNT (syscalls_entrypoints_end - syscalls_entrypoints) @@ -49,11 +50,11 @@ extern syscalls_entrypoint_t syscalls_entrypoints_end[]; #define SYSCALLS_ALLOC_ENTRYPOINT(nm) \ syscalls_entrypoint_t __attribute__((section(".syscall_bss"))) \ - _syscall_ent_##nm + ATTR_KERN_ADDR_SPACE _syscall_ent_##nm #define SYSCALLS_INIT(nm) \ - _syscall_ent_##nm.entrypoint = (uintptr_t)_syscall_##nm; \ - _syscall_ent_##nm.doms = 0 + KERN_WRITEL(_syscall_ent_##nm.entrypoint, (uintptr_t)_syscall_##nm); \ + KERN_WRITEL(_syscall_ent_##nm.doms, 0) #define SYSCALLS_DEFINE(nm, ...) \ void _syscall_##nm(__VA_ARGS__); \ @@ -65,8 +66,19 @@ extern syscalls_entrypoint_t syscalls_entrypoints_end[]; SYSCALLS_STUB_SINGLETON(nm, dcd); \ void _syscall_##nm(__VA_ARGS__) -#define SYSCALLS_AUTHZ(nm, drv) _syscall_ent_##nm.doms |= BIT((drv).dom_id) -#define SYSCALLS_DEAUTHZ(nm, drv) _syscall_ent_##nm.doms &= ~BIT((drv).dom_id) +#define SYSCALLS_AUTHZ_UPD(nm, drv, set) \ + { \ + dom_id_t _sc_tmp_id; \ + dom_id_bitmap_t _sc_tmp_bm; \ + KERN_READL(_sc_tmp_id, (drv).dom_id); \ + KERN_READL(_sc_tmp_bm, _syscall_ent_##nm.doms); \ + if(set) { \ + _sc_tmp_bm |= BIT(_sc_tmp_id); \ + } else { \ + _sc_tmp_bm &= ~BIT(_sc_tmp_id); \ + } \ + KERN_WRITEL(_syscall_ent_##nm.doms, _sc_tmp_bm); \ + } /** * Check that any untrusted pointer that could have been influenced by a caller @@ -78,7 +90,11 @@ extern syscalls_entrypoint_t syscalls_entrypoints_end[]; * * This also checks that the pointer is either within the stack region or the * shared data region, which is important for preventing redirection of data - * accesses to MMIO or metadata regions. + * accesses to MMIO or metadata regions. This check is omitted for multi- + * segment protection domain implementations, since the segment settings + * already enforce this property for pointers dereferenced in DS. Pointers + * that can be influenced by a caller should not be dereferenced in any other + * segment. * * The pointer is both validated and copied to a new storage location, which * must be within the callee's local stack region (excluding the parameter @@ -92,6 +108,14 @@ extern syscalls_entrypoint_t syscalls_entrypoints_end[]; * references the return address, it could potentially redirect execution with * the privileges of the callee protection domain. */ +#if X86_CONF_PROT_DOMAINS_MULTI_SEG +#define PROT_DOMAINS_VALIDATE_PTR(validated, untrusted, sz) \ + validated = untrusted; \ + if(((uintptr_t)(validated)) < \ + ((2 * sizeof(uintptr_t)) + (uintptr_t)__builtin_frame_address(0))) { \ + halt(); \ + } +#else #define PROT_DOMAINS_VALIDATE_PTR(validated, untrusted, sz) \ validated = untrusted; \ if((((uintptr_t)(validated)) < \ @@ -99,6 +123,7 @@ extern syscalls_entrypoint_t syscalls_entrypoints_end[]; (((uintptr_t)&_edata_addr) <= (((uintptr_t)(validated)) + (sz)))) { \ halt(); \ } +#endif #else @@ -106,10 +131,12 @@ extern syscalls_entrypoint_t syscalls_entrypoints_end[]; #define SYSCALLS_INIT(nm) #define SYSCALLS_DEFINE(nm, ...) void nm(__VA_ARGS__) #define SYSCALLS_DEFINE_SINGLETON(nm, dcd, ...) void nm(__VA_ARGS__) -#define SYSCALLS_AUTHZ(nm, drv) -#define SYSCALLS_DEAUTHZ(nm, drv) +#define SYSCALLS_AUTHZ_UPD(nm, drv, set) #define PROT_DOMAINS_VALIDATE_PTR(validated, untrusted, sz) validated = untrusted #endif +#define SYSCALLS_AUTHZ(nm, drv) SYSCALLS_AUTHZ_UPD(nm, drv, true) +#define SYSCALLS_DEAUTHZ(nm, drv) SYSCALLS_AUTHZ_UPD(nm, drv, false) + #endif /* CPU_X86_MM_SYSCALLS_H_ */ diff --git a/cpu/x86/mm/tss-prot-domains-asm.S b/cpu/x86/mm/tss-prot-domains-asm.S new file mode 100644 index 000000000..45832a62c --- /dev/null +++ b/cpu/x86/mm/tss-prot-domains-asm.S @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +.text + +/* Initialize the TSS fields in prot_domains_reg accordingly: + * Note: Each of these must be a callee-saved register, so that they are + * restored to their original values prior to the task returning. This will + * result in the same values being loaded when the task is next invoked. + */ +#define CUR_DOM_ID_BITMAP esi + +/* Must match SEG_KERN (plus the trailing 's') in multi-segment.h */ +#define SEG_KERN fs + +.global prot_domains_syscall_dispatcher +prot_domains_syscall_dispatcher: +#define PROT_DOMAINS_SYSCALL eax + mov prot_domains_syscall, %PROT_DOMAINS_SYSCALL + cmp $syscalls_entrypoints, %PROT_DOMAINS_SYSCALL + jl halt + cmp $syscalls_entrypoints_end, %PROT_DOMAINS_SYSCALL + jnl halt +#define SYSCALLS_ENTRYPOINTS_ALIGN_MASK ebp + mov $3, %SYSCALLS_ENTRYPOINTS_ALIGN_MASK + and %PROT_DOMAINS_SYSCALL, %SYSCALLS_ENTRYPOINTS_ALIGN_MASK + jnz halt + + /* Compare allowed domains bitmask against current domain ID bitmap. If + * the check fails, then the current domain ID bitmap value will be zeroed + * out, which could cause incorrect behavior in the future. However, the + * response to a failed check is to halt the system, so destroying the + * current domain ID bitmap value will have no effect. + */ + and %SEG_KERN:4(%PROT_DOMAINS_SYSCALL), %CUR_DOM_ID_BITMAP + jz halt + + mov prot_domains_main_esp, %esp + + /* Must be a callee-saved register: */ +#define ORIG_RET_ADDR edi + /* Update the caller's stack to return back to here */ + pop %ORIG_RET_ADDR + push $sysret_dispatcher + /* Jump to the system call body */ + jmp *%SEG_KERN:(%PROT_DOMAINS_SYSCALL) + +sysret_dispatcher: + push %ORIG_RET_ADDR + + iret + + /* The task will resume here for the next system call, so it is necessary + * to jump back to the top. + */ + jmp prot_domains_syscall_dispatcher + +.global dev_not_avail_isr +dev_not_avail_isr: + clts + iret diff --git a/cpu/x86/mm/tss-prot-domains.c b/cpu/x86/mm/tss-prot-domains.c new file mode 100644 index 000000000..40041a6d1 --- /dev/null +++ b/cpu/x86/mm/tss-prot-domains.c @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "gdt.h" +#include "helpers.h" +#include "idt.h" +#include "prot-domains.h" +#include "stacks.h" +#include "syscalls.h" +#include "tss.h" + +uint32_t prot_domains_main_esp; +syscalls_entrypoint_t ATTR_KERN_ADDR_SPACE *prot_domains_syscall; + +/*---------------------------------------------------------------------------*/ +void app_main(void); +void +prot_domains_reg(dom_client_data_t ATTR_KERN_ADDR_SPACE *dcd, + uintptr_t mmio, size_t mmio_sz, + uintptr_t meta, size_t meta_sz, + bool pio) +{ + segment_desc_t desc; + uint32_t eflags; + dom_id_t dom_id; + volatile struct dom_kern_data ATTR_KERN_ADDR_SPACE *dkd; + + KERN_READL(dom_id, dcd->dom_id); + + dkd = prot_domains_kern_data + dom_id; + + prot_domains_reg_multi_seg(dkd, mmio, mmio_sz, meta, meta_sz); + + /* Only the kernel protection domain requires port I/O access outside of the + * interrupt handlers. + */ + eflags = EFLAGS_IOPL(pio ? PRIV_LVL_USER : PRIV_LVL_INT); + if(dom_id == DOM_ID_app) { + eflags |= EFLAGS_IF; + } + + /* Keep this initialization in sync with the register definitions in + * tss-prot-domains-asm.S. + */ + KERN_WRITEL(dkd->tss.ebp, 0); + KERN_WRITEL(dkd->tss.ebx, 0); + KERN_WRITEL(dkd->tss.esi, BIT(dom_id)); + KERN_WRITEL(dkd->tss.eip, + (dom_id == DOM_ID_app) ? + (uint32_t)app_main : + (uint32_t)prot_domains_syscall_dispatcher); + KERN_WRITEL(dkd->tss.cs, GDT_SEL_CODE); + KERN_WRITEL(dkd->tss.ds, GDT_SEL_DATA); + KERN_WRITEL(dkd->tss.es, GDT_SEL_DATA); + KERN_WRITEL(dkd->tss.fs, LDT_SEL_KERN); + KERN_WRITEL(dkd->tss.gs, + (meta_sz == 0) ? GDT_SEL_NULL : LDT_SEL_META); + KERN_WRITEL(dkd->tss.ss, GDT_SEL_STK); + /* This stack pointer is only actually used in application protection domain. + * Other domains enter at system call dispatcher, which switches to main + * stack. + */ + KERN_WRITEL(dkd->tss.esp, + /* Two return addresses have been consumed: */ + STACKS_INIT_TOP + (2 * sizeof(uintptr_t))); + KERN_WRITEL(dkd->tss.eflags, eflags); + KERN_WRITEL(dkd->tss.ldt, GDT_SEL_LDT(dom_id)); + KERN_WRITEL(dkd->tss.esp2, STACKS_SIZE_MAIN + STACKS_SIZE_INT); + KERN_WRITEL(dkd->tss.ss2, GDT_SEL_STK_INT); + KERN_WRITEL(dkd->tss.esp0, + STACKS_SIZE_MAIN + STACKS_SIZE_INT + STACKS_SIZE_EXC); + KERN_WRITEL(dkd->tss.ss0, GDT_SEL_STK_EXC); + KERN_WRITEW(dkd->tss.t, 0); + KERN_WRITEW(dkd->tss.iomap_base, sizeof(tss_t)); + KERN_WRITEL(dkd->tss.cr3, 0); + + segment_desc_init(&desc, + KERN_DATA_OFF_TO_PHYS_ADDR((uint32_t)&(dkd->tss)), + sizeof(dkd->tss), + /* It should be possible for code at any privilege level to invoke the task's + * system call dispatcher. + */ + SEG_FLAG(DPL, PRIV_LVL_USER) | SEG_TYPE_TSS32_AVAIL); + + gdt_insert(GDT_IDX_TSS(dom_id), desc); + + KERN_WRITEW(dcd->tss_sel, GDT_SEL(GDT_IDX_TSS(dom_id), PRIV_LVL_USER)); +} +/*---------------------------------------------------------------------------*/ +void dev_not_avail_isr(void); +void +prot_domains_impl_init(void) +{ + __asm__ __volatile__ ("ltr %0" :: "r" ((uint16_t)GDT_SEL_TSS(DOM_ID_kern))); + __asm__ __volatile__ ("lldt %0" :: "r" ((uint16_t)GDT_SEL_LDT(DOM_ID_kern))); + + idt_set_intr_gate_desc(7, + (uint32_t)dev_not_avail_isr, + GDT_SEL_CODE_EXC, PRIV_LVL_EXC); +} +/*---------------------------------------------------------------------------*/ +int main(); +void +prot_domains_launch_kernel(void) +{ + multi_segment_launch_kernel(); + + /* Activate kernel protection domain, entering the kernel at main. */ + __asm__ __volatile__ ( + "pushl %[_ss_]\n\t" + "pushl %[_top_of_stk_]\n\t" + "pushl %[_eflags_]\n\t" + "pushl %[_cs_]\n\t" + "pushl %[_kern_start_]\n\t" + "iretl\n\t" + : + : [_ss_] "g" (GDT_SEL_STK), + [_eflags_] "g" (EFLAGS_IOPL(PRIV_LVL_USER)), + [_cs_] "g" (GDT_SEL_CODE), + [_kern_start_] "g" (main), + /* one address has already been consumed */ + [_top_of_stk_] "g" (STACKS_INIT_TOP + sizeof(uint32_t)) + ); +} +/*---------------------------------------------------------------------------*/ +void +prot_domains_launch_app() +{ + far_pointer_t app_ptr = { 0, GDT_SEL_TSS(DOM_ID_app) }; + __asm__ __volatile__ ("ljmp *%0" :: "m" (app_ptr)); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/mm/tss-prot-domains.h b/cpu/x86/mm/tss-prot-domains.h new file mode 100644 index 000000000..d61d97504 --- /dev/null +++ b/cpu/x86/mm/tss-prot-domains.h @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_TSS_PROT_DOMAINS_H_ +#define CPU_X86_MM_TSS_PROT_DOMAINS_H_ + +#include +#include +#include +#include "ldt-layout.h" +#include "segmentation.h" +#include "tss.h" + +struct dom_kern_data { + /** Task State Segment */ + tss_t tss; + /** Local Descriptor Table with per-domain descriptors */ + segment_desc_t ldt[LDT_NUM_DESC]; +} __attribute__((packed)); + +/* relies on dom_kern_data: */ +#include "multi-segment.h" + +/* relies on ATTR_KERN_ADDR_SPACE: */ +#include "syscalls.h" + +/** + * Data associated with each protection domain that is owned by clients of that + * domain and used to identify the domain. + */ +struct dom_client_data { + dom_id_t dom_id; + /** The selector is only 16 bits, but it is padded to 32 bits. */ + uint32_t tss_sel; +}; + +extern uint32_t prot_domains_main_esp; + +#define SYSCALLS_STUB_MIDDLE(nm) \ + /* If already in the callee protection domain, skip the protection */ \ + /* domain switch and directly invoke the system call body */ \ + " je _syscall_" #nm "\n\t" \ + " movl $" EXP_STRINGIFY(_syscall_ent_##nm) ", prot_domains_syscall\n\t" \ + " mov %esp, prot_domains_main_esp\n\t" + +#define SYSCALLS_STUB(nm) \ + SYSCALLS_ALLOC_ENTRYPOINT(nm); \ + asm ( \ + ".text\n\t" \ + ".global " #nm "\n\t" \ + #nm ":\n\t" \ + " str %ax\n\t" \ + /* Compare current Task Register selector to selector for callee */ \ + /* protection domain, in tss_sel field of dom_client_data */ \ + " cmpw %ax, 8(%esp)\n\t" \ + SYSCALLS_STUB_MIDDLE(nm) \ + /* This will treat the dom_id field as the offset for the call, but */ \ + /* that is ignored when performing a far call to a task */ \ + " lcall *4(%esp)\n\t" \ + " ret\n\t") + +#define SYSCALLS_STUB_SINGLETON(nm, dcd) \ + SYSCALLS_ALLOC_ENTRYPOINT(nm); \ + asm ( \ + ".text\n\t" \ + ".global " #nm "\n\t" \ + #nm ":\n\t" \ + " str %ax\n\t" \ + /* Compare current Task Register selector to selector for callee */ \ + /* protection domain, in tss_sel field of dom_client_data */ \ + " cmpw %ax, %" SEG_KERN "s:(4 + " #dcd ")\n\t" \ + SYSCALLS_STUB_MIDDLE(nm) \ + /* This will treat the dom_id field as the offset for the call, but */ \ + /* that is ignored when performing a far call to a task */ \ + " lcall *%" SEG_KERN "s:" #dcd "\n\t" \ + " ret\n\t") + +#define PROT_DOMAINS_ENTER_ISR(exc) \ + MULTI_SEGMENT_ENTER_ISR(exc) \ + /* It is possible that the system call dispatcher is being interrupted, */ \ + /* and some interrupt handlers perform system calls. Thus, it is */ \ + /* necessary to save and restore the system call dispatcher parameters */ \ + /* (in callee-saved registers). */ \ + "mov prot_domains_main_esp, %%esi\n\t" \ + "mov prot_domains_syscall, %%edi\n\t" \ + PROT_DOMAINS_ENTER_ISR_COMMON(exc) +#define PROT_DOMAINS_LEAVE_ISR(exc) \ + PROT_DOMAINS_LEAVE_ISR_COMMON(exc) \ + "mov %%edi, prot_domains_syscall\n\t" \ + "mov %%esi, prot_domains_main_esp\n\t" \ + MULTI_SEGMENT_LEAVE_ISR(exc) + +/* Allocate two additional GDT entries for each protection domain. Note that + * the particular storage allocated by this statement may actually be used for + * some other protection domain, depending on how the linker happens to arrange + * all of the GDT storage. The GDT_IDX_TSS and GDT_IDX_LDT macros in + * gdt-layout.h determine which storage is used for each protection domain. + * Thus, this storage should not be referenced directly by its variable name. + */ +#define PROT_DOMAINS_ALLOC_IMPL(nm) \ + static segment_desc_t ATTR_BSS_GDT_MID _gdt_storage_##nm[2] + +#endif /* CPU_X86_MM_TSS_PROT_DOMAINS_H_ */ diff --git a/cpu/x86/quarkX1000.ld b/cpu/x86/quarkX1000.ld index 2f90b7c70..be91a74c7 100644 --- a/cpu/x86/quarkX1000.ld +++ b/cpu/x86/quarkX1000.ld @@ -87,4 +87,6 @@ SECTIONS { */ _ebss_gdt_addr = .; } + + _ebss_pre_dma_addr = ALIGN(32); } diff --git a/cpu/x86/quarkX1000_dma.ld b/cpu/x86/quarkX1000_dma.ld index fe3b79861..4cecac839 100644 --- a/cpu/x86/quarkX1000_dma.ld +++ b/cpu/x86/quarkX1000_dma.ld @@ -30,26 +30,18 @@ SECTIONS { - /* - It would be more natural to use a 1K alignment for this entire section. - However, the UEFI GenFw program ratchets up its alignment - granularity to the maximum granularity discovered in its input file. - Using 1K-alignment perturbs the symbols, hindering debugging. Thus, - this section is simply padded out to the desired alignment and - declared to have a section alignment of only 32 bytes. - - The alignment directives used here suffice even when paging is in use, - because this is the last section and directly follows one (.bss.meta) - that is 4K-aligned. - */ - .bss.dma (NOLOAD) : ALIGN (32) + .bss.dma (NOLOAD) : AT(_ebss_pre_dma_addr) ALIGN (32) { - /* The IMR feature operates at 1K granularity. */ - . = ALIGN(1K); - _sbss_dma_addr = .; + /* IMRs are used to restrict DMA, and they require 1K physical address alignment. */ + . += ALIGN(_ebss_pre_dma_addr, 1K) - ALIGN(_ebss_pre_dma_addr, 32); *(.dma_bss) - . = ALIGN(1K); - _ebss_dma_addr = .; } + _sbss_dma_addr = LOADADDR(.bss.dma) + ALIGN(_ebss_pre_dma_addr, 1K) - ALIGN(_ebss_pre_dma_addr, 32); + /* + Effectively pointing beyond the end of .bss.dma is acceptable, since + .bss.dma is the last section in memory. + */ + _ebss_dma_addr = ALIGN(LOADADDR(.bss.dma) + SIZEOF(.bss.dma), 1K); + } diff --git a/cpu/x86/quarkX1000_multi_seg.ld b/cpu/x86/quarkX1000_multi_seg.ld new file mode 100644 index 000000000..945650399 --- /dev/null +++ b/cpu/x86/quarkX1000_multi_seg.ld @@ -0,0 +1,190 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +OUTPUT_FORMAT("elf32-i386") + +ENTRY(start) + +/* + The TSS-based protection domain implementation does not explicitly reference + these symbols, so we list them here to prevent them from being garbage- + collected. +*/ +EXTERN(stacks_int) +EXTERN(stacks_exc) + +PHDRS { + boot_text PT_LOAD; + text PT_LOAD; + data PT_LOAD; +} + +SECTIONS { + /* + OS-Dev Wiki says it is common for kernels to start at 1M. Addresses before that + are used by BIOS/EFI, the bootloader and memory-mapped I/O. + + The UEFI GenFw program inserts a 0x220 byte offset between the image base and + the .text section. We add that same offset here to align the symbols in the + UEFI DLL with those in the final UEFI binary to make debugging easier. + */ + . = 1M + 0x220; + + /* + The GenFw program in the EDK2 UEFI toolchain outputs UEFI images with a + section alignment of at least 32 bytes. Thus, it is desirable to use at + least that alignment granularity to avoid symbols being shifted from the + intermediate DLL to the final UEFI image. Such shifting may make + debugging more difficult by preventing the DLL from being a useful + source of symbol information. The debugging symbols are not included in + the final UEFI image. + */ + .text.boot : ALIGN (32) + { + *(.multiboot) + /* + The initial bootstrap code expects to operate in a flat address + space with an identity mapping between linear and physical + addresses. + */ + *(.boot_text) + } :boot_text + + /* The post-boot code segments define tight bounds around the code + section, so this directive resets the virtual address to 0. */ + . = 0; + + /* The virtual address differs from the load address. */ + .text : AT(LOADADDR(.text.boot) + ALIGN(SIZEOF(.text.boot), 32)) ALIGN (32) + { + /* + These BYTE directives emit a UD2 instruction to cause execution to + halt if the control flow ever deviates to address 0. This also + prevents other code from being placed at address 0. Some code + considers a function pointer to address 0 to be a null function + pointer. + */ + BYTE(0x0F); + BYTE(0x0B); + *(.text*) + + /* + An alternative design to eliminate the need for ALIGN directives + within the AT directives in later sections could have padded + each section out to a 32-byte boundary. However, that would have + enabled unneeded software accesses to the padding past the end of actual + code/data in each section, since segments are also configured based on + the values of the SIZEOF expressions. As a general principle, accesses + should be as restricted as is feasible. + */ + } :text + + _stext_addr = LOADADDR(.text); + _etext_addr = LOADADDR(.text) + SIZEOF(.text); + + . = 0; + + .data : AT(ALIGN(_etext_addr, 32)) ALIGN (32) + { + *(.main_stack) + *(.int_stack) + *(.exc_stack) + *(.rodata*) + *(.data*) + + /* + These could alternatively be treated as read-only data to prevent tampering + from the user privilege level. + */ + _sdata_shared_isr = .; + KEEP(*(.shared_isr_data*)) + _edata_shared_isr = .; + } :data + + .bss : ALIGN (32) + { + *(COMMON) + *(.bss*) + } + + _sdata_addr = LOADADDR(.data); + _edata_addr = LOADADDR(.bss) + SIZEOF(.bss); + + . = 0; + + .bss.kern (NOLOAD) : AT(ALIGN(_edata_addr, 32)) ALIGN (32) + { + /* + This directive prevents any data from being allocated at address + zero, since the address 0 is commonly used to represent null + pointers. + */ + LONG(0); + *(.kern_bss) + + syscalls_entrypoints = .; + *(.syscall_bss) + syscalls_entrypoints_end = .; + } + + _ebss_syscall_addr = LOADADDR(.bss.kern) + SIZEOF(.bss.kern); + + .bss.kern_priv (NOLOAD) : ALIGN (32) + { + prot_domains_kern_data = .; + /* + The kernel and app protection domain control structures must always + be placed in the first two slots in this order, so that they have + well-known protection domain IDs: + */ + *(.kern_prot_dom_bss) + *(.app_prot_dom_bss) + *(.prot_dom_bss) + prot_domains_kern_data_end = .; + + *(.gdt_bss_start) + KEEP(*(.gdt_bss_mid)) + *(.gdt_bss) + _ebss_gdt_addr = .; + } + + _sbss_kern_addr = LOADADDR(.bss.kern); + _ebss_kern_addr = LOADADDR(.bss.kern_priv) + SIZEOF(.bss.kern_priv); + + . = _ebss_kern_addr; + + .bss.meta (NOLOAD) : AT(ALIGN(_ebss_kern_addr, 32)) ALIGN (32) + { + *(.meta_bss) + } + + /* .bss.meta may be empty, so this uses .bss.kern_priv as a base instead: */ + _ebss_pre_dma_addr = ALIGN(ALIGN(_ebss_kern_addr, 32) + SIZEOF(.bss.meta), 32); +} diff --git a/cpu/x86/quarkX1000_paging.ld b/cpu/x86/quarkX1000_paging.ld index 0352cbf64..19e50e1e0 100644 --- a/cpu/x86/quarkX1000_paging.ld +++ b/cpu/x86/quarkX1000_paging.ld @@ -129,7 +129,7 @@ SECTIONS { *(.data*) /* - These could also be treated as read-only data to prevent tampering + These could alternatively be treated as read-only data to prevent tampering from the user privilege level. */ _sdata_shared_isr = .; @@ -201,4 +201,6 @@ SECTIONS { . = ALIGN(4K); } + + _ebss_pre_dma_addr = ALIGN(32); } diff --git a/platform/galileo/Makefile.customrules-galileo b/platform/galileo/Makefile.customrules-galileo index cb5bc26a9..b141ee211 100644 --- a/platform/galileo/Makefile.customrules-galileo +++ b/platform/galileo/Makefile.customrules-galileo @@ -9,7 +9,17 @@ MULTIBOOT = $(CONTIKI_PROJECT).$(MULTIBOOT_SFX) # UEFI binary UEFI_DLL_SFX = $(TARGET).dll UEFI_DLL = $(CONTIKI_PROJECT).$(UEFI_SFX) -UEFI_LDFLAGS += -Xlinker --emit-relocs -Xlinker --entry=uefi_start +# The GenFw program is unable to process absolute symbols like _stext_addr, +# etc., that are defined in quarkX1000_dma.ld and quarkX1000_multi_seg.ld +# and used to configure segments in multi-segment.c, etc. Furthermore, +# relocating the UEFI image during load would result in those symbols not +# pointing to the expected image locations. So, relocation data is omitted +# from the intermediate UEFI DLL. This will only result in a +# correctly-functioning build if the UEFI firmware does not attempt to +# relocate the UEFI image, so it may be desirable in the future to revisit +# this design. To emit relocation data, '-Xlinker --emit-relocs' should be +# appended to the following line. +UEFI_LDFLAGS = -Xlinker --entry=uefi_start UEFI_SFX = $(TARGET).efi UEFI = $(CONTIKI_PROJECT).$(UEFI_SFX) From e0aefd11d94415c5ffc58344201e16c85a4e2e41 Mon Sep 17 00:00:00 2001 From: Michael LeMay Date: Fri, 7 Aug 2015 11:51:04 -0700 Subject: [PATCH 4/5] x86: Add support for SW-switched segment-based protection domains This patch extends the protection domain framework with a third plugin that is a hybrid of the previous two. The hardware task switching mechanism has a strictly-defined format for TSS data structures that causes more space to be consumed than would otherwise be required. This patch defines a smaller data structure that is allocated for each protection domain, only requiring 32 bytes instead of 128 bytes. It uses the same multi-segment memory layout as the TSS-based plugin and leaves paging disabled. However, it uses a similar mechanism as the paging plugin to perform system call dispatches and returns. For additional information, please refer to cpu/x86/mm/README.md. --- cpu/x86/Makefile.x86_quarkX1000 | 5 ++ cpu/x86/mm/README.md | 31 ++++++++++-- cpu/x86/mm/gdt-layout.h | 5 ++ cpu/x86/mm/multi-segment.c | 11 ++++- cpu/x86/mm/prot-domains.c | 7 +++ cpu/x86/mm/prot-domains.h | 6 ++- cpu/x86/mm/stacks.h | 6 +++ cpu/x86/mm/swseg-prot-domains.c | 83 +++++++++++++++++++++++++++++++ cpu/x86/mm/swseg-prot-domains.h | 86 +++++++++++++++++++++++++++++++++ cpu/x86/mm/syscalls-int-asm.S | 48 +++++++++++++++++- cpu/x86/mm/syscalls-int.c | 66 ++++++++++++++++--------- cpu/x86/mm/syscalls-int.h | 2 +- cpu/x86/mm/tss.c | 17 ++++--- 13 files changed, 335 insertions(+), 38 deletions(-) create mode 100644 cpu/x86/mm/swseg-prot-domains.c create mode 100644 cpu/x86/mm/swseg-prot-domains.h diff --git a/cpu/x86/Makefile.x86_quarkX1000 b/cpu/x86/Makefile.x86_quarkX1000 index 13a9c686f..1a8d3ac8e 100644 --- a/cpu/x86/Makefile.x86_quarkX1000 +++ b/cpu/x86/Makefile.x86_quarkX1000 @@ -25,6 +25,11 @@ else ifeq ($(X86_CONF_PROT_DOMAINS),tss) CFLAGS += -DX86_CONF_PROT_DOMAINS=2 X86_CONF_MULTI_SEG = 1 CONTIKI_SOURCEFILES += tss-prot-domains-asm.S +else ifeq ($(X86_CONF_PROT_DOMAINS),swseg) +# This matches the definition of X86_CONF_PROT_DOMAINS__SWSEG in prot-domains.h: +CFLAGS += -DX86_CONF_PROT_DOMAINS=3 +X86_CONF_SYSCALLS_INT = 1 +X86_CONF_MULTI_SEG = 1 else $(error Unrecognized setting for X86_CONF_PROT_DOMAINS: \ $(X86_CONF_PROT_DOMAINS). See cpu/x86/mm/README.md for \ diff --git a/cpu/x86/mm/README.md b/cpu/x86/mm/README.md index dcd6370b4..42c4070a7 100644 --- a/cpu/x86/mm/README.md +++ b/cpu/x86/mm/README.md @@ -6,11 +6,12 @@ Introduction The X86 port of Contiki implements a simple, lightweight form of protection domains using a pluggable framework. Currently, there are -two plugins available: +three plugins available: - Flat memory model with paging. - - Multi-segment memory model with hardware-switched segments based on - Task-State Segment (TSS) structures. + - Multi-segment memory model with either hardware- or + software-switched segments. The hardware-switched segments + approach is based on Task-State Segment (TSS) structures. For an introduction to paging and TSS and possible ways in which they can be used, refer to the following resources: @@ -144,8 +145,8 @@ Similarly, register contents may be accessed and modified across protection domain boundaries in some protection domain implementations. The TSS task switching mechanism automatically saves and restores many registers to and from TSS data structures when -switching tasks, but the paging-based protection domain implementation -does not perform analogous operations. +switching tasks, but the other protection domain implementations do +not perform analogous operations. For the reasons described above, each protection domain should only invoke other protection domains that it trusts to properly handle data @@ -847,6 +848,25 @@ in an unexpected manner, since segment register load instructions are unprivileged. Similar segment register updates must be performed for similar reasons when dispatching system calls. +### Software-Switched Segment-Based Protection Domains + +Primary implementation sources: + + - cpu/x86/mm/swseg-prot-domains.c + +The requirement to allocate a TSS for each protection domain in the +hardware-switched segments plugin may consume a substantial amount of +space, since the size of each TSS is fixed by hardware to be at least +104 bytes. The software-switched segments plugin saves space by +defining a more compact PDCS. However, the layout and definitions of +the segments is identical to what was described above for the +hardware-switched segments plugin. + +The system call and return procedure is mostly identical to that for +paging-based protection domains. However, instead of updating and +invalidating page tables, the dispatchers update the LDT and some of +the segment registers. + ### Pointer Validation Primary implementation sources: @@ -957,6 +977,7 @@ the command line and specify one of the following options: - paging - tss + - swseg The paging option accepts a sub-option to determine whether the TLB is fully- or selectively-invalidated during protection domain switches. diff --git a/cpu/x86/mm/gdt-layout.h b/cpu/x86/mm/gdt-layout.h index 5dddd3a4d..b79c2b9ca 100644 --- a/cpu/x86/mm/gdt-layout.h +++ b/cpu/x86/mm/gdt-layout.h @@ -92,8 +92,13 @@ /** Stack segment for exception handlers */ #define GDT_IDX_STK_EXC 10 +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS #define GDT_IDX_TSS(dom_id) (GDT_NUM_FIXED_DESC + (2 * (dom_id))) #define GDT_IDX_LDT(dom_id) (GDT_NUM_FIXED_DESC + (2 * (dom_id)) + 1) +#else +#define GDT_IDX_LDT(dom_id) (GDT_NUM_FIXED_DESC + (dom_id)) +#endif + #endif #else #define GDT_IDX_CODE GDT_IDX_CODE_FLAT diff --git a/cpu/x86/mm/multi-segment.c b/cpu/x86/mm/multi-segment.c index f60a2c8bb..6d3fd57f1 100644 --- a/cpu/x86/mm/multi-segment.c +++ b/cpu/x86/mm/multi-segment.c @@ -139,7 +139,14 @@ prot_domains_gdt_init() (uint32_t)&_stext_addr, ((uint32_t)&_etext_addr) - (uint32_t)&_stext_addr, SEG_FLAG(DPL, PRIV_LVL_EXC) | SEG_GRAN_BYTE | - SEG_DESCTYPE_NSYS | SEG_TYPE_CODE_EX); + SEG_DESCTYPE_NSYS | +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__SWSEG + /* The general protection fault handler requires read access to CS */ + SEG_TYPE_CODE_EXRD +#else + SEG_TYPE_CODE_EX +#endif + ); gdt_insert_boot(GDT_IDX_CODE_EXC, desc); segment_desc_init(&desc, @@ -180,7 +187,9 @@ prot_domains_gdt_init() */ desc.raw = SEG_DESC_NOT_PRESENT; for(i = 0; i < PROT_DOMAINS_ACTUAL_CNT; i++) { +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS gdt_insert_boot(GDT_IDX_TSS(i), desc); +#endif gdt_insert_boot(GDT_IDX_LDT(i), desc); } diff --git a/cpu/x86/mm/prot-domains.c b/cpu/x86/mm/prot-domains.c index 8bbeb4d83..56461b381 100644 --- a/cpu/x86/mm/prot-domains.c +++ b/cpu/x86/mm/prot-domains.c @@ -54,6 +54,13 @@ prot_domains_init(void) segment_desc_t desc; gdt_lookup(GDT_IDX_CODE_EXC, &desc); +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__SWSEG + /* The exception code segment needs to be readable so that the general + * protection fault handler can decode instructions, but the interrupt and + * user level code segments should not be. + */ + SEG_SET_FLAG(desc, TYPE, SEG_TYPE_CODE_EX); +#endif SEG_SET_FLAG(desc, DPL, PRIV_LVL_INT); gdt_insert(GDT_IDX_CODE_INT, desc); diff --git a/cpu/x86/mm/prot-domains.h b/cpu/x86/mm/prot-domains.h index a1fbca130..13062612b 100644 --- a/cpu/x86/mm/prot-domains.h +++ b/cpu/x86/mm/prot-domains.h @@ -41,9 +41,11 @@ #define X86_CONF_PROT_DOMAINS__NONE 0 #define X86_CONF_PROT_DOMAINS__PAGING 1 #define X86_CONF_PROT_DOMAINS__TSS 2 +#define X86_CONF_PROT_DOMAINS__SWSEG 3 #define X86_CONF_PROT_DOMAINS_MULTI_SEG \ - (X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS) + ((X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS) || \ + (X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__SWSEG)) /** Privilege level (ring) for exception handlers and other supervisory code */ #define PRIV_LVL_EXC 0 @@ -74,6 +76,8 @@ typedef uint32_t dom_id_t; #include "paging-prot-domains.h" #elif X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS #include "tss-prot-domains.h" +#elif X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__SWSEG +#include "swseg-prot-domains.h" #endif #ifndef ATTR_META_ADDR_SPACE diff --git a/cpu/x86/mm/stacks.h b/cpu/x86/mm/stacks.h index 96be72cf9..327e75600 100644 --- a/cpu/x86/mm/stacks.h +++ b/cpu/x86/mm/stacks.h @@ -61,6 +61,12 @@ #else #define STACKS_SIZE_EXC 256 #endif +#elif X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__SWSEG +#ifdef __clang__ +#define STACKS_SIZE_EXC 512 +#else +#define STACKS_SIZE_EXC 256 +#endif #elif X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS /** * This should be large enough to execute the exception handler with the diff --git a/cpu/x86/mm/swseg-prot-domains.c b/cpu/x86/mm/swseg-prot-domains.c new file mode 100644 index 000000000..78a29aaf6 --- /dev/null +++ b/cpu/x86/mm/swseg-prot-domains.c @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "gdt.h" +#include "helpers.h" +#include "multi-segment.h" +#include "prot-domains.h" + +/*---------------------------------------------------------------------------*/ +void +prot_domains_reg(dom_client_data_t ATTR_KERN_ADDR_SPACE *dcd, + uintptr_t mmio, size_t mmio_sz, + uintptr_t meta, size_t meta_sz, + bool pio) +{ + volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE *dkd; + dom_id_t dom_id; + + KERN_READL(dom_id, dcd->dom_id); + + if(PROT_DOMAINS_ACTUAL_CNT <= dom_id) { + halt(); + } + + dkd = prot_domains_kern_data + dom_id; + + prot_domains_reg_multi_seg(dkd, mmio, mmio_sz, meta, meta_sz); + + KERN_WRITEL(dkd->flags, pio ? PROT_DOMAINS_FLAG_PIO : 0); +} +/*---------------------------------------------------------------------------*/ +static inline void __attribute__((always_inline)) +prot_domains_switch(dom_id_t from_id, dom_id_t to_id, + interrupt_stack_t *intr_stk) +{ + __asm__ __volatile__ ( + "lldt %[_ldt_]\n\t" + "mov %[_meta_seg_], %%eax\n\t" + "lsl %%eax, %%ecx\n\t" + "jz 1f\n\t" /* ZF will only be set if the segment descriptor is valid. */ + "xor %%eax, %%eax\n\t" /* Nullify metadata selector */ + "1: mov %%eax, %%" SEG_META "s\n\t" + "mov %[_kern_seg_], %%eax\n\t" + "mov %%eax, %%" SEG_KERN "s\n\t" + : + : [_ldt_] "r" ((uint16_t)GDT_SEL_LDT(to_id)), + [_meta_seg_] "i" (LDT_SEL_META), + [_kern_seg_] "i" (LDT_SEL_KERN) + : "cc", "eax", "ecx" + ); +} +/*---------------------------------------------------------------------------*/ + +/* Enable inter-procedural optimization with procedures in the following file: + */ +#include "syscalls-int.c" diff --git a/cpu/x86/mm/swseg-prot-domains.h b/cpu/x86/mm/swseg-prot-domains.h new file mode 100644 index 000000000..a503bc6db --- /dev/null +++ b/cpu/x86/mm/swseg-prot-domains.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_SWSEG_PROT_DOMAINS_H_ +#define CPU_X86_MM_SWSEG_PROT_DOMAINS_H_ + +#include +#include +#include +#include "ldt-layout.h" +#include "paging.h" +#include "segmentation.h" +#include "syscalls-int.h" + +struct dom_kern_data { + /** Local Descriptor Table with per-domain descriptors */ + segment_desc_t ldt[LDT_NUM_DESC]; + /** Flags are defined with the prefix PROT_DOMAINS_FLAG in prot-domains.h */ + uint32_t flags; + /** + * Original return address from call stack when this protection domain + * invoked some other protection domain. This serves to control the return + * entrypoint. The callee is not permitted to modify this value (unless the + * callee is the kernel protection domain). + */ + uintptr_t orig_ret_addr; + + /* This structure is precisely 32 bytes in length, a power of 2. If its size + * changes, add an alignment attribute to keep it aligned at a power of 2 so + * that dereferencing arrays of these structures uses shift instructions + * instead of multiplication. Shifting is faster than multiplication. + */ +}; + +/* relies on dom_kern_data: */ +#include "multi-segment.h" + +#define PROT_DOMAINS_ENTER_ISR(exc) \ + MULTI_SEGMENT_ENTER_ISR(exc) \ + PROT_DOMAINS_ENTER_ISR_COMMON(exc) +#define PROT_DOMAINS_LEAVE_ISR(exc) \ + PROT_DOMAINS_LEAVE_ISR_COMMON(exc) \ + MULTI_SEGMENT_LEAVE_ISR(exc) + +#define prot_domains_impl_init syscalls_int_init + +#define prot_domains_set_wp(en) + +/* Allocate one additional GDT entry for each protection domain. Note that + * the particular storage allocated by this statement may actually be used for + * some other protection domain, depending on how the linker happens to arrange + * all of the GDT storage. The GDT_IDX_LDT macro in gdt-layout.h determine + * which storage is used for each protection domain. Thus, this storage should + * not be referenced directly by its variable name. + */ +#define PROT_DOMAINS_ALLOC_IMPL(nm) \ + static segment_desc_t ATTR_BSS_GDT_MID _gdt_storage_##nm + +#endif /* CPU_X86_MM_SWSEG_PROT_DOMAINS_H_ */ diff --git a/cpu/x86/mm/syscalls-int-asm.S b/cpu/x86/mm/syscalls-int-asm.S index 1fe80310f..5c88c890b 100644 --- a/cpu/x86/mm/syscalls-int-asm.S +++ b/cpu/x86/mm/syscalls-int-asm.S @@ -33,6 +33,10 @@ #include "gdt-layout.h" #include "stacks.h" +/* Must match definitions (plus the trailing 's') in multi-segment.h */ +#define SEG_MMIO fs +#define SEG_KERN fs + .text /* Invoke the system call return dispatcher from the default privilege @@ -42,21 +46,57 @@ prot_domains_sysret_stub: int $PROT_DOMAINS_SYSRET_DISPATCH_INT +.macro save_segs +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__SWSEG + /* Save (and restore, in restore_segs) MMIO segment register into + * callee-saved register in case a system call was invoked from a region in + * which MMIO is enabled. + */ + push %SEG_MMIO +#endif +.endm + +.macro restore_segs +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__SWSEG + pop %SEG_MMIO +#endif +.endm + +/* Refresh most of the segment registers in case they were corrupted by + * userspace code to prevent that from corrupting the operation of the + * privileged code. + */ +.macro load_kern_segs +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__SWSEG + mov $GDT_SEL_DATA, %eax + mov %eax, %ds + mov %eax, %es + mov $GDT_SEL_DATA_KERN_EXC, %eax + mov %eax, %SEG_KERN +#endif +.endm + /* Invoke the system call dispatcher C routine */ .global prot_domains_syscall_dispatcher prot_domains_syscall_dispatcher: mov %esp, %ecx /*< interrupt_stack_t *intr_stk */ /* EDX already set to "dom_client_data_t to_dcd" by syscall stub */ - push %eax /*< syscalls_id_t syscall_id */ + save_segs + push %eax /*< syscalls_entrypoint_t *syscall */ + load_kern_segs call prot_domains_syscall_dispatcher_impl /* fastcall convention, so callee pops arguments */ + restore_segs iret /* Invoke the system call return dispatcher C routine */ .global prot_domains_sysret_dispatcher prot_domains_sysret_dispatcher: mov %esp, %ecx /*< interrupt_stack_t *intr_stk */ + save_segs + load_kern_segs call prot_domains_sysret_dispatcher_impl + restore_segs /* Zero caller-saved registers in case they contain secrets. The system call * handlers and dispatchers need to preserve the callee-saved registers. */ @@ -67,11 +107,17 @@ prot_domains_sysret_dispatcher: .global prot_domains_launch_kernel prot_domains_launch_kernel: +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING mov $GDT_SEL_DATA, %eax mov %eax, %ds mov %eax, %es mov %eax, %fs mov %eax, %gs +#else + mov $GDT_SEL_LDT(DOM_ID_kern), %eax + lldt %ax + call multi_segment_launch_kernel +#endif /* init interrupt return stack: */ pushl $GDT_SEL_STK lea stacks_main, %eax diff --git a/cpu/x86/mm/syscalls-int.c b/cpu/x86/mm/syscalls-int.c index 1d1c77efb..6820d3264 100644 --- a/cpu/x86/mm/syscalls-int.c +++ b/cpu/x86/mm/syscalls-int.c @@ -91,7 +91,10 @@ syscall_dispatcher_tail(interrupt_stack_t *intr_stk, uint32_t syscall_eip) { dom_id_t from_id; - volatile dom_kern_data_t *from_dkd, *to_dkd; + uint32_t tmp; + volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE *from_dkd, *to_dkd; + + uint32_t loc_call_stk_ptr; to_dkd = prot_domains_kern_data + to_id; @@ -101,36 +104,40 @@ syscall_dispatcher_tail(interrupt_stack_t *intr_stk, * kernel data associated with that protection domain. That model does not * permit reentrancy. */ - if((to_dkd->flags & PROT_DOMAINS_FLAG_BUSY) == PROT_DOMAINS_FLAG_BUSY) { + KERN_READL(tmp, to_dkd->flags); + if((tmp & PROT_DOMAINS_FLAG_BUSY) == PROT_DOMAINS_FLAG_BUSY) { halt(); } - to_dkd->flags |= PROT_DOMAINS_FLAG_BUSY; + tmp |= PROT_DOMAINS_FLAG_BUSY; + KERN_WRITEL(to_dkd->flags, tmp); /* Update the interrupt stack so that the IRET instruction will return to the * system call entrypoint. */ intr_stk->eip = syscall_eip; + KERN_READL(loc_call_stk_ptr, inter_dom_call_stk_ptr); /* Lookup the information for the caller */ - from_id = inter_dom_call_stk[inter_dom_call_stk_ptr - 1]; + KERN_READL(from_id, inter_dom_call_stk[loc_call_stk_ptr - 1]); from_dkd = prot_domains_kern_data + from_id; /* Save the current return address from the unprivileged stack to a protected * location in the kernel-owned data structure. This enforces return * entrypoint control. */ - from_dkd->orig_ret_addr = *(uintptr_t *)intr_stk->esp; + KERN_WRITEL(from_dkd->orig_ret_addr, *(uintptr_t *)intr_stk->esp); /* Update the unprivileged stack so that when the system call body is * complete, it will invoke the system call return stub. */ *((uintptr_t *)intr_stk->esp) = (uintptr_t)prot_domains_sysret_stub; - if(MAX_INTER_DOM_CALL_STK_SZ <= inter_dom_call_stk_ptr) { + if(MAX_INTER_DOM_CALL_STK_SZ <= loc_call_stk_ptr) { halt(); } - inter_dom_call_stk[inter_dom_call_stk_ptr] = to_id; + KERN_WRITEL(inter_dom_call_stk[loc_call_stk_ptr], to_id); - inter_dom_call_stk_ptr++; + loc_call_stk_ptr++; + KERN_WRITEL(inter_dom_call_stk_ptr, loc_call_stk_ptr); dispatcher_tail(from_id, to_id, intr_stk); } @@ -140,6 +147,7 @@ prot_domains_syscall_dispatcher_impl(interrupt_stack_t *intr_stk, dom_id_t to_id, syscalls_entrypoint_t *syscall) { + uint32_t tmp; uint32_t syscall_eip; if(PROT_DOMAINS_ACTUAL_CNT <= to_id) { @@ -156,11 +164,12 @@ prot_domains_syscall_dispatcher_impl(interrupt_stack_t *intr_stk, halt(); } - if((BIT(to_id) & syscall->doms) == 0) { + KERN_READL(tmp, syscall->doms); + if((BIT(to_id) & tmp) == 0) { halt(); } - syscall_eip = syscall->entrypoint; + KERN_READL(syscall_eip, syscall->entrypoint); prot_domains_set_wp(false); @@ -171,9 +180,9 @@ int main(void); void __attribute__((fastcall)) prot_domains_launch_kernel_impl(interrupt_stack_t *intr_stk) { - inter_dom_call_stk[0] = DOM_ID_app; + KERN_WRITEL(inter_dom_call_stk[0], DOM_ID_app); - inter_dom_call_stk_ptr = 1; + KERN_WRITEL(inter_dom_call_stk_ptr, 1); syscall_dispatcher_tail(intr_stk, DOM_ID_kern, (uint32_t)main); } @@ -182,20 +191,27 @@ void __attribute__((fastcall)) prot_domains_sysret_dispatcher_impl(interrupt_stack_t *intr_stk) { dom_id_t from_id, to_id; - if(inter_dom_call_stk_ptr <= 1) { + uint32_t loc_call_stk_ptr; + uint32_t flags; + + KERN_READL(loc_call_stk_ptr, inter_dom_call_stk_ptr); + if(loc_call_stk_ptr <= 1) { halt(); } - from_id = inter_dom_call_stk[inter_dom_call_stk_ptr - 1]; - to_id = inter_dom_call_stk[inter_dom_call_stk_ptr - 2]; + KERN_READL(from_id, inter_dom_call_stk[loc_call_stk_ptr - 1]); + KERN_READL(to_id, inter_dom_call_stk[loc_call_stk_ptr - 2]); - intr_stk->eip = prot_domains_kern_data[to_id].orig_ret_addr; + KERN_READL(intr_stk->eip, + prot_domains_kern_data[to_id].orig_ret_addr); prot_domains_set_wp(false); - prot_domains_kern_data[from_id].flags &= ~PROT_DOMAINS_FLAG_BUSY; + KERN_READL(flags, prot_domains_kern_data[from_id].flags); + flags &= ~PROT_DOMAINS_FLAG_BUSY; + KERN_WRITEL(prot_domains_kern_data[from_id].flags, flags); - inter_dom_call_stk_ptr--; + KERN_WRITEL(inter_dom_call_stk_ptr, loc_call_stk_ptr - 1); dispatcher_tail(from_id, to_id, intr_stk); } @@ -204,11 +220,13 @@ prot_domains_sysret_dispatcher_impl(interrupt_stack_t *intr_stk) * \brief Lookup the current protection domain. * \return Kernel data structure for the current protection domain. */ -static volatile dom_kern_data_t * +static volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE * get_current_domain(void) { + uint32_t loc_call_stk_ptr; dom_id_t id; - id = inter_dom_call_stk[inter_dom_call_stk_ptr - 1]; + KERN_READL(loc_call_stk_ptr, inter_dom_call_stk_ptr); + KERN_READL(id, inter_dom_call_stk[loc_call_stk_ptr - 1]); return prot_domains_kern_data + id; } /*---------------------------------------------------------------------------*/ @@ -219,9 +237,11 @@ get_current_domain(void) * \return Result of the check as a Boolean value */ static bool -needs_port_io(volatile dom_kern_data_t *dkd) +needs_port_io(volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE *dkd) { - return (dkd->flags & PROT_DOMAINS_FLAG_PIO) == PROT_DOMAINS_FLAG_PIO; + uint32_t dkd_flags; + KERN_READL(dkd_flags, dkd->flags); + return (dkd_flags & PROT_DOMAINS_FLAG_PIO) == PROT_DOMAINS_FLAG_PIO; } /*---------------------------------------------------------------------------*/ /* Mark the context parameter as volatile so that writes to it will not get @@ -236,7 +256,7 @@ gp_fault_handler(volatile struct interrupt_context context) uint32_t cs_lim; uint8_t opcode; - volatile dom_kern_data_t *dkd = get_current_domain(); + volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE *dkd = get_current_domain(); if (needs_port_io(dkd)) { __asm__ __volatile__ ( "mov %%cs, %0\n\t" diff --git a/cpu/x86/mm/syscalls-int.h b/cpu/x86/mm/syscalls-int.h index 7ee4bcb36..a5478d1c5 100644 --- a/cpu/x86/mm/syscalls-int.h +++ b/cpu/x86/mm/syscalls-int.h @@ -74,7 +74,7 @@ extern dom_id_t cur_dom; #nm ":\n\t" \ /* First, load server protection domain ID into EDX, as required by */ \ /* prot_domains_syscall_dispatcher: */ \ - " mov " #dcd ", %edx\n\t" \ + " mov %" SEG_KERN "s:" #dcd ", %edx\n\t" \ SYSCALLS_STUB_EPILOGUE(nm)) void syscalls_int_init(void); diff --git a/cpu/x86/mm/tss.c b/cpu/x86/mm/tss.c index c3628fa8a..b6e0f3deb 100644 --- a/cpu/x86/mm/tss.c +++ b/cpu/x86/mm/tss.c @@ -47,15 +47,20 @@ static segment_desc_t ATTR_BSS_GDT sys_tss_desc; void tss_init(void) { - sys_tss.iomap_base = sizeof(sys_tss); - sys_tss.esp2 = ((uint32_t)stacks_int) + STACKS_SIZE_INT; - sys_tss.ss2 = GDT_SEL_STK_INT; - sys_tss.esp0 = ((uint32_t)stacks_exc) + STACKS_SIZE_EXC; - sys_tss.ss0 = GDT_SEL_STK_EXC; + segment_desc_t seg_desc; - segment_desc_init(&sys_tss_desc, (uint32_t)&sys_tss, sizeof(sys_tss), + /* Initialize TSS */ + KERN_WRITEW(sys_tss.iomap_base, sizeof(sys_tss)); + KERN_WRITEL(sys_tss.esp2, ((uint32_t)stacks_int) + STACKS_SIZE_INT); + KERN_WRITEL(sys_tss.ss2, GDT_SEL_STK_INT); + KERN_WRITEL(sys_tss.esp0, ((uint32_t)stacks_exc) + STACKS_SIZE_EXC); + KERN_WRITEL(sys_tss.ss0, GDT_SEL_STK_EXC); + + segment_desc_init(&seg_desc, + KERN_DATA_OFF_TO_PHYS_ADDR(&sys_tss), sizeof(sys_tss), SEG_FLAG(DPL, PRIV_LVL_EXC) | SEG_DESCTYPE_SYS | SEG_TYPE_TSS32_AVAIL); + gdt_insert(GDT_IDX_OF_DESC(&sys_tss_desc), seg_desc); __asm__ __volatile__ ( "ltr %0" From 73774def6b7c52ce6d3be0e4a41852047e66f46b Mon Sep 17 00:00:00 2001 From: Michael LeMay Date: Sun, 9 Aug 2015 16:38:04 -0700 Subject: [PATCH 5/5] x86, galileo: Add sample non-driver protection domain This patch adds a simple non-driver protection domain sample to serve as an example for defining other non-driver protection domains. It simply performs a ping-pong test of protection domain switching latency during boot, including optional accesses to a private metadata region, and prints out the results. --- cpu/x86/quarkX1000.ld | 4 + cpu/x86/quarkX1000_multi_seg.ld | 4 + cpu/x86/quarkX1000_paging.ld | 4 + cpu/x86/startup.h | 46 ++++++ examples/galileo/Makefile | 8 +- examples/galileo/README.md | 11 ++ examples/galileo/prot-domain-switch-latency.c | 156 ++++++++++++++++++ platform/galileo/contiki-main.c | 11 ++ 8 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 cpu/x86/startup.h create mode 100644 examples/galileo/prot-domain-switch-latency.c diff --git a/cpu/x86/quarkX1000.ld b/cpu/x86/quarkX1000.ld index be91a74c7..d4ea66aa6 100644 --- a/cpu/x86/quarkX1000.ld +++ b/cpu/x86/quarkX1000.ld @@ -63,6 +63,10 @@ SECTIONS { { *(.rodata*) + _sdata_kern_startup_func = .; + KEEP(*(.kern_startup_func)) + _edata_kern_startup_func = .; + _sdata_shared_isr = .; KEEP(*(.shared_isr_data*)) _edata_shared_isr = .; diff --git a/cpu/x86/quarkX1000_multi_seg.ld b/cpu/x86/quarkX1000_multi_seg.ld index 945650399..c4e6293cf 100644 --- a/cpu/x86/quarkX1000_multi_seg.ld +++ b/cpu/x86/quarkX1000_multi_seg.ld @@ -119,6 +119,10 @@ SECTIONS { *(.rodata*) *(.data*) + _sdata_kern_startup_func = .; + KEEP(*(.kern_startup_func)) + _edata_kern_startup_func = .; + /* These could alternatively be treated as read-only data to prevent tampering from the user privilege level. diff --git a/cpu/x86/quarkX1000_paging.ld b/cpu/x86/quarkX1000_paging.ld index 19e50e1e0..87c89ed8b 100644 --- a/cpu/x86/quarkX1000_paging.ld +++ b/cpu/x86/quarkX1000_paging.ld @@ -128,6 +128,10 @@ SECTIONS { *(.rodata*) *(.data*) + _sdata_kern_startup_func = .; + KEEP(*(.kern_startup_func)) + _edata_kern_startup_func = .; + /* These could alternatively be treated as read-only data to prevent tampering from the user privilege level. diff --git a/cpu/x86/startup.h b/cpu/x86/startup.h new file mode 100644 index 000000000..56ad3b482 --- /dev/null +++ b/cpu/x86/startup.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2016, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_STARTUP_H_ +#define CPU_X86_STARTUP_H_ + +/** + * \brief Declare a function that will be automatically invoked from the kernel + * protection domain during boot, after all of the default device + * initialization has been completed. + */ +#define KERN_STARTUP_FUNC(f) \ +static void f(void); \ +static uintptr_t \ + __attribute__((used, section(".kern_startup_func"))) \ + __kern_startup_f = (uintptr_t)f; \ +static void f(void) + +#endif /* CPU_X86_STARTUP_H_ */ diff --git a/examples/galileo/Makefile b/examples/galileo/Makefile index bc7b071ff..0852fe085 100644 --- a/examples/galileo/Makefile +++ b/examples/galileo/Makefile @@ -1,6 +1,6 @@ TARGET=galileo -KNOWN_EXAMPLES = gpio-input gpio-output gpio-interrupt i2c-LSM9DS0 i2c-callbacks print-imr +KNOWN_EXAMPLES = gpio-input gpio-output gpio-interrupt i2c-LSM9DS0 i2c-callbacks print-imr prot-domain-switch-latency ifeq ($(filter $(EXAMPLE),$(KNOWN_EXAMPLES)),) $(info Set the variable EXAMPLE to one of the following Galileo-specific examples:) @@ -12,6 +12,12 @@ ifeq ($(EXAMPLE),print-imr) CFLAGS += -DDBG_IMRS endif +ifeq ($(EXAMPLE),prot-domain-switch-latency) +ifeq ($(SAMPLE_METADATA),1) +CFLAGS += -DSAMPLE_METADATA=1 +endif +endif + CONTIKI_PROJECT = $(EXAMPLE) all: $(CONTIKI_PROJECT) diff --git a/examples/galileo/README.md b/examples/galileo/README.md index 49777a16c..8f3217f05 100644 --- a/examples/galileo/README.md +++ b/examples/galileo/README.md @@ -93,6 +93,17 @@ Intel Quark X1000 SoC Isolated Memory Regions (IMRs), the Host System Management Mode Controls register, and the Host Memory I/O Boundary register. +Protection Domains +------------------ + +### Protection Domain Switch Latency (EXAMPLE=prot-domain-switch-latency) + +This application measures and prints the average latency of repeatedly +switching from one protection domain to another and back, in ping-pong +fashion. It can optionally perform memory accesses to metadata +associated with the destination protection domain. This feature can +be enabled by specifying SAMPLE_METADATA=1 on the build command line. + References ---------- diff --git a/examples/galileo/prot-domain-switch-latency.c b/examples/galileo/prot-domain-switch-latency.c new file mode 100644 index 000000000..9b6908a2f --- /dev/null +++ b/examples/galileo/prot-domain-switch-latency.c @@ -0,0 +1,156 @@ +/* + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "contiki.h" +#include "prot-domains.h" +#include "startup.h" +#include "syscalls.h" + +#define CPU_FREQ (400 * 1000 * 1000) +/* Run the test for approximately eight seconds. + * + * Duration expressed as shift amount to avoid integer overflow. + */ +#define DURATION_SECONDS_SHAMT 3 + +#ifdef SAMPLE_METADATA +typedef struct sample_meta { + int cnt; + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING + /** + * See the comment on the padding in the metadata for the Intel Quark X1000 + * Ethernet driver for an explanation of why it is sized and structured like + * this. + */ + uint8_t pad[MIN_PAGE_SIZE - sizeof(int)]; +#endif +} __attribute__((packed)) sample_meta_t; + +static sample_meta_t ATTR_BSS_META meta = { .cnt = 0 }; +#endif + +PROT_DOMAINS_ALLOC(dom_client_data_t, ping_dcd); +PROT_DOMAINS_ALLOC(dom_client_data_t, pong_dcd); + +PROCESS(prot_domain_latency_process, "Ping-Pong Process"); +AUTOSTART_PROCESSES(&prot_domain_latency_process); +/*---------------------------------------------------------------------------*/ +void pong(uint64_t *mid, int *cnt); +SYSCALLS_DEFINE_SINGLETON(pong, pong_dcd, + uint64_t *mid, int *cnt) +{ +#ifdef SAMPLE_METADATA + sample_meta_t *loc_meta = (sample_meta_t *)PROT_DOMAINS_META(pong_dcd); +#endif + + *mid = _rdtsc(); + +#ifdef SAMPLE_METADATA + META_READL(*cnt, loc_meta->cnt); + META_WRITEL(loc_meta->cnt, *cnt + 1); +#endif +} +/*---------------------------------------------------------------------------*/ +void ping(void); +SYSCALLS_DEFINE_SINGLETON(ping, ping_dcd) +{ + uint64_t start, mid, end; + uint64_t diff1 = 0, diff2 = 0; + double diff1_d, diff2_d; + int i = 0; + int cnt; + + while(((diff1 + diff2) >> DURATION_SECONDS_SHAMT) < CPU_FREQ) { + start = _rdtsc(); + pong(&mid, &cnt); + end = _rdtsc(); + +#ifdef SAMPLE_METADATA + assert(cnt == i); +#endif + + /* exclude the warm-up round */ + if(i != 0) { + diff1 += mid - start; + diff2 += end - mid; + } + + i++; + } + + diff1_d = diff1; + diff2_d = diff2; + + diff1_d /= i - 1; + diff2_d /= i - 1; + + puts( "Sample protection domain ping-pong switching latency measurements:"); + printf(" %u iterations\n", i - 1); + printf(" Avg. # cycles ping -> pong: %.2f\n", diff1_d); + printf(" + Avg. # cycles pong -> ping: %.2f\n", diff2_d); + puts( " ----------------------------------------"); + printf(" Avg. # cycles round-trip: %.2f\n", diff1_d + diff2_d); +} +/*---------------------------------------------------------------------------*/ +KERN_STARTUP_FUNC(sample_domain_init) +{ + PROT_DOMAINS_INIT_ID(ping_dcd); + prot_domains_reg(&ping_dcd, 0, 0, 0, 0, false); + SYSCALLS_INIT(ping); + SYSCALLS_AUTHZ(ping, ping_dcd); + + PROT_DOMAINS_INIT_ID(pong_dcd); + prot_domains_reg(&pong_dcd, 0, 0, +#ifdef SAMPLE_METADATA + (uintptr_t)&meta, sizeof(meta), false); +#else + 0, 0, false); +#endif + SYSCALLS_INIT(pong); + SYSCALLS_AUTHZ(pong, pong_dcd); +} +/*---------------------------------------------------------------------------*/ +PROCESS_THREAD(prot_domain_latency_process, ev, data) +{ + PROCESS_BEGIN(); + + /* Run the latency test from the ping domain so that interrupts + * are disabled during the test. + */ + ping(); + + PROCESS_END(); +} +/*---------------------------------------------------------------------------*/ diff --git a/platform/galileo/contiki-main.c b/platform/galileo/contiki-main.c index 7b31a9961..ccc1f519f 100644 --- a/platform/galileo/contiki-main.c +++ b/platform/galileo/contiki-main.c @@ -54,6 +54,8 @@ PROCINIT( &etimer_process #endif ); +extern int _sdata_kern_startup_func, _edata_kern_startup_func; + /*---------------------------------------------------------------------------*/ void app_main(void) @@ -78,6 +80,8 @@ app_main(void) int main(void) { + uintptr_t *func_ptr; + #ifdef X86_CONF_RESTRICT_DMA quarkX1000_imr_conf(); #endif @@ -104,6 +108,13 @@ main(void) */ pci_root_complex_lock(); + func_ptr = (uintptr_t *)&_sdata_kern_startup_func; + while(func_ptr != (uintptr_t *)&_edata_kern_startup_func) { + ((void (*)(void))*func_ptr)(); + + func_ptr++; + } + prot_domains_leave_main(); return 0;