diff --git a/cpu/x86/Makefile.x86_common b/cpu/x86/Makefile.x86_common index d6e692e9a..c27626815 100644 --- a/cpu/x86/Makefile.x86_common +++ b/cpu/x86/Makefile.x86_common @@ -3,8 +3,10 @@ CONTIKI_CPU_DIRS += . init/common CONTIKI_SOURCEFILES += gdt.c helpers.S idt.c cpu.c CC = gcc -LD = gcc -AS = as +LD = $(CC) +# Use gcc to invoke the assembler so that the preprocessor will be run on each +# file first, enabling us to use macros within assembly language files: +AS = $(CC) OBJCOPY = objcopy SIZE = size STRIP = strip diff --git a/cpu/x86/Makefile.x86_quarkX1000 b/cpu/x86/Makefile.x86_quarkX1000 index e6e794206..4a7668cc6 100644 --- a/cpu/x86/Makefile.x86_quarkX1000 +++ b/cpu/x86/Makefile.x86_quarkX1000 @@ -1,13 +1,42 @@ +# See mm/README.md for a description of available settings: +X86_CONF_PROT_DOMAINS ?= none + include $(CONTIKI)/cpu/x86/Makefile.x86_common -CONTIKI_CPU_DIRS += drivers/legacy_pc drivers/quarkX1000 init/legacy_pc +CONTIKI_CPU_DIRS += drivers/legacy_pc drivers/quarkX1000 init/legacy_pc net mm CONTIKI_SOURCEFILES += bootstrap_quarkX1000.S rtc.c pit.c pic.c irq.c nmi.c pci.c uart-16x50.c uart.c gpio.c i2c.c eth.c shared-isr.c CONTIKI_SOURCEFILES += imr.c msg-bus.c +CONTIKI_SOURCEFILES += stacks.c + +ifneq ($(X86_CONF_PROT_DOMAINS),none) +CONTIKI_SOURCEFILES += prot-domains.c $(X86_CONF_PROT_DOMAINS)-prot-domains.c imr-conf.c + +ifeq ($(X86_CONF_PROT_DOMAINS),paging) +LINKERSCRIPT_SFX = _paging +X86_CONF_SYSCALLS_INT = 1 +ifeq ($(X86_CONF_USE_INVLPG),1) +CFLAGS += -DX86_CONF_USE_INVLPG +endif +# This matches the definition of X86_CONF_PROT_DOMAINS__PAGING in prot-domains.h: +CFLAGS += -DX86_CONF_PROT_DOMAINS=1 +else +$(error Unrecognized setting for X86_CONF_PROT_DOMAINS: \ + $(X86_CONF_PROT_DOMAINS). See cpu/x86/mm/README.md for \ + descriptions of available settings) +endif + +ifeq ($(X86_CONF_SYSCALLS_INT),1) +CONTIKI_SOURCEFILES += syscalls-int-asm.S tss.c +endif + +endif CFLAGS += -m32 -march=i586 -mtune=i586 -LDFLAGS += -m32 -Xlinker -T -Xlinker $(CONTIKI)/cpu/x86/quarkX1000.ld -ASFLAGS += --32 -march=i586 -mtune=i586 +LDFLAGS += -m32 -Xlinker -T -Xlinker $(CONTIKI)/cpu/x86/quarkX1000$(LINKERSCRIPT_SFX).ld +# The C compiler is used to invoke the assembler, so the CFLAGS should be +# passed to it on the command line: +ASFLAGS = -c $(CFLAGS) ifeq ($(X86_CONF_RESTRICT_DMA),1) CONTIKI_SOURCEFILES += imr-conf.c diff --git a/cpu/x86/bootstrap_quarkX1000.S b/cpu/x86/bootstrap_quarkX1000.S index 8def35843..4211e51a3 100644 --- a/cpu/x86/bootstrap_quarkX1000.S +++ b/cpu/x86/bootstrap_quarkX1000.S @@ -28,8 +28,7 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. */ -# Kernel -.set STACK_SIZE, 8192 +#include "stacks.h" # Multiboot .set MAGIC_NUMBER, 0x1BADB002 @@ -42,15 +41,9 @@ .long FLAGS .long CHECKSUM -# Reserve space for the C stack. -.lcomm c_stack, STACK_SIZE - -.section .text +.section .boot_text .global start start: cli - movl $(c_stack + STACK_SIZE), %esp - call main - - /* We're not expected to return from main(). But if we do we halt */ - call halt + mov $(stacks_main + STACKS_SIZE_MAIN), %esp + call cpu_boot_stage0 diff --git a/cpu/x86/dma.h b/cpu/x86/dma.h index a83ccd2eb..b0122fcdb 100644 --- a/cpu/x86/dma.h +++ b/cpu/x86/dma.h @@ -31,10 +31,18 @@ #ifndef CPU_X86_DMA_H_ #define CPU_X86_DMA_H_ +#include "prot-domains.h" + #ifdef X86_CONF_RESTRICT_DMA #define ATTR_BSS_DMA __attribute__((section(".dma_bss"))) #else +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__NONE #define ATTR_BSS_DMA +#else +#define ATTR_BSS_DMA ATTR_BSS_META +#endif #endif +extern int _sbss_dma_addr, _ebss_dma_addr; + #endif /* CPU_X86_DMA_H_ */ diff --git a/cpu/x86/drivers/legacy_pc/pci.c b/cpu/x86/drivers/legacy_pc/pci.c index 0507ad231..e94c9ecbe 100644 --- a/cpu/x86/drivers/legacy_pc/pci.c +++ b/cpu/x86/drivers/legacy_pc/pci.c @@ -32,12 +32,15 @@ #include "pci.h" #include "helpers.h" +#include "syscalls.h" /* I/O port for PCI configuration address */ #define PCI_CONFIG_ADDR_PORT 0xCF8 /* I/O port for PCI configuration data */ #define PCI_CONFIG_DATA_PORT 0xCFC +PROT_DOMAINS_ALLOC(dom_client_data_t, root_complex_drv); + /*---------------------------------------------------------------------------*/ /* Initialize PCI configuration register address in preparation for accessing * the specified register. @@ -101,40 +104,34 @@ pci_command_enable(pci_config_addr_t addr, uint32_t flags) * \param agent Interrupt Queue Agent to be used, IRQAGENT[0:3]. * \param pin Interrupt Pin Route to be used, INT[A:D]. * \param pirq PIRQ to be used, PIRQ[A:H]. - * \return Returns 0 on success and a negative number otherwise. */ -int -pci_irq_agent_set_pirq(IRQAGENT agent, INTR_PIN pin, PIRQ pirq) +SYSCALLS_DEFINE_SINGLETON(pci_irq_agent_set_pirq, + root_complex_drv, + IRQAGENT agent, INTR_PIN pin, PIRQ pirq) { - pci_config_addr_t pci; uint16_t value; uint32_t rcba_addr, offset = 0; + rcba_addr = PROT_DOMAINS_MMIO(root_complex_drv); + assert(agent >= IRQAGENT0 && agent <= IRQAGENT3); assert(pin >= INTA && pin <= INTD); assert(pirq >= PIRQA && pirq <= PIRQH); - pci.raw = 0; - pci.bus = 0; - pci.dev = 31; - pci.func = 0; - pci.reg_off = 0xF0; /* Root Complex Base Address Register */ - - /* masked to clear non-address bits. */ - rcba_addr = pci_config_read(pci) & ~0x3FFF; - switch(agent) { case IRQAGENT0: - if (pin != INTA) - return -1; + if(pin != INTA) { + halt(); + } offset = 0x3140; break; case IRQAGENT1: offset = 0x3142; break; case IRQAGENT2: - if (pin != INTA) - return -1; + if(pin != INTA) { + halt(); + } offset = 0x3144; break; case IRQAGENT3: @@ -163,8 +160,6 @@ pci_irq_agent_set_pirq(IRQAGENT agent, INTR_PIN pin, PIRQ pirq) } *(uint16_t*)(rcba_addr + offset) = value; - - return 0; } /*---------------------------------------------------------------------------*/ /** @@ -231,13 +226,51 @@ pci_pirq_set_irq(PIRQ pirq, uint8_t irq, uint8_t route_to_legacy) * firmware. * \param c_this Structure that will be initialized to represent the driver. * \param pci_addr PCI base address of device. + * \param mmio_sz Size of MMIO region. * \param meta Base address of optional driver-defined metadata. + * \param meta_sz Size of optional driver-defined metadata. */ void -pci_init(pci_driver_t *c_this, pci_config_addr_t pci_addr, uintptr_t meta) +pci_init(pci_driver_t *c_this, + pci_config_addr_t pci_addr, + size_t mmio_sz, + uintptr_t meta, + size_t meta_sz) { + uintptr_t mmio; + /* The BAR value is masked to clear non-address bits. */ - c_this->mmio = pci_config_read(pci_addr) & ~0xFFF; - c_this->meta = meta; + mmio = pci_config_read(pci_addr) & ~0xFFF; + + prot_domains_reg(c_this, mmio, mmio_sz, meta, meta_sz, false); +} +/*---------------------------------------------------------------------------*/ +/** + * \brief Initialize the PCI root complex driver. + */ +void +pci_root_complex_init(void) +{ + uint32_t rcba_addr; + pci_config_addr_t pci = { .raw = 0 }; + pci.dev = 31; + pci.reg_off = 0xF0; /* Root Complex Base Address Register */ + + /* masked to clear non-address bits. */ + rcba_addr = pci_config_read(pci) & ~0x3FFF; + + PROT_DOMAINS_INIT_ID(root_complex_drv); + prot_domains_reg(&root_complex_drv, rcba_addr, 0x4000, 0, 0, false); + SYSCALLS_INIT(pci_irq_agent_set_pirq); + SYSCALLS_AUTHZ(pci_irq_agent_set_pirq, root_complex_drv); +} +/*---------------------------------------------------------------------------*/ +/** + * \brief Prevent further invocations of pci_irq_agent_set_pirq. + */ +void +pci_root_complex_lock(void) +{ + SYSCALLS_DEAUTHZ(pci_irq_agent_set_pirq, root_complex_drv); } /*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/drivers/legacy_pc/pci.h b/cpu/x86/drivers/legacy_pc/pci.h index c938f9c6c..fff53a048 100644 --- a/cpu/x86/drivers/legacy_pc/pci.h +++ b/cpu/x86/drivers/legacy_pc/pci.h @@ -33,6 +33,8 @@ #include #include "helpers.h" +#include +#include "prot-domains.h" /** PCI configuration register identifier for Base Address Registers */ #define PCI_CONFIG_REG_BAR0 0x10 @@ -98,22 +100,23 @@ uint32_t pci_config_read(pci_config_addr_t addr); void pci_config_write(pci_config_addr_t addr, uint32_t data); void pci_command_enable(pci_config_addr_t addr, uint32_t flags); -/** - * PCI device driver instance with an optional single MMIO range and optional - * metadata. - */ -typedef struct pci_driver { - uintptr_t mmio; /**< MMIO range base address */ - uintptr_t meta; /**< Driver-defined metadata base address */ -} pci_driver_t; +typedef dom_client_data_t pci_driver_t; -void pci_init(pci_driver_t *c_this, pci_config_addr_t pci_addr, uintptr_t meta); -int pci_irq_agent_set_pirq(IRQAGENT agent, INTR_PIN pin, PIRQ pirq); +void pci_init(pci_driver_t *c_this, + pci_config_addr_t pci_addr, + size_t mmio_sz, + uintptr_t meta, + size_t meta_sz); +void pci_irq_agent_set_pirq(IRQAGENT agent, INTR_PIN pin, PIRQ pirq); void pci_pirq_set_irq(PIRQ pirq, uint8_t irq, uint8_t route_to_legacy); +void pci_root_complex_init(void); +void pci_root_complex_lock(void); #define PCI_MMIO_READL(c_this, dest, reg_addr) \ - dest = *((volatile uint32_t *)((c_this).mmio + (reg_addr))) + dest = *((volatile uint32_t *) \ + (((uintptr_t)PROT_DOMAINS_MMIO(c_this)) + (reg_addr))) #define PCI_MMIO_WRITEL(c_this, reg_addr, src) \ - *((volatile uint32_t *)((c_this).mmio + (reg_addr))) = (src) + *((volatile uint32_t *) \ + (((uintptr_t)PROT_DOMAINS_MMIO(c_this)) + (reg_addr))) = (src) #endif /* CPU_X86_DRIVERS_LEGACY_PC_PCI_H_ */ diff --git a/cpu/x86/drivers/legacy_pc/shared-isr.c b/cpu/x86/drivers/legacy_pc/shared-isr.c index e13c7ab93..f04f6aba8 100644 --- a/cpu/x86/drivers/legacy_pc/shared-isr.c +++ b/cpu/x86/drivers/legacy_pc/shared-isr.c @@ -62,7 +62,8 @@ shared_isr_init(void) void shared_isr_stub(void); __asm__ __volatile__ ( - ISR_STUB("shared_isr_stub", 0, "shared_handler") + ISR_STUB("shared_isr_stub", 0, "shared_handler", 0) + : ); while(client < &_edata_shared_isr) { @@ -91,11 +92,10 @@ shared_isr_init(void) (client->pin == consistency_check_client->pin) && (client->pirq == consistency_check_client->pirq)); } else { - idt_set_intr_gate_desc(PIC_INT(client->irq), (uint32_t)shared_isr_stub); + idt_set_intr_gate_desc(PIC_INT(client->irq), (uint32_t)shared_isr_stub, + GDT_SEL_CODE_INT, PRIV_LVL_INT); - assert(pci_irq_agent_set_pirq(client->agent, - client->pin, - client->pirq) == 0); + pci_irq_agent_set_pirq(client->agent, client->pin, client->pirq); pci_pirq_set_irq(client->pirq, client->irq, 1); diff --git a/cpu/x86/drivers/legacy_pc/uart-16x50.c b/cpu/x86/drivers/legacy_pc/uart-16x50.c index 296719faa..d1f2c498d 100644 --- a/cpu/x86/drivers/legacy_pc/uart-16x50.c +++ b/cpu/x86/drivers/legacy_pc/uart-16x50.c @@ -28,9 +28,12 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "uart-16x50.h" #include #include "helpers.h" +#include "paging.h" +#include "prot-domains.h" +#include "syscalls.h" +#include "uart-16x50.h" /* Refer to Intel Quark SoC X1000 Datasheet, Chapter 18 for more details on * UART operation. @@ -64,24 +67,22 @@ typedef struct uart_16x50_regs { volatile uint32_t mcr, lsr, msr, scr, usr, htx, dmasa; } uart_16x50_regs_t; -/*---------------------------------------------------------------------------*/ -/** - * \brief Initialize an MMIO-programmable 16X50 UART. - * \param c_this Structure that will be initialized to represent the device. - * \param pci_addr PCI address of device. - * \param dl Divisor setting to configure the baud rate. +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING +/* When paging-based protection domains are in use, at least one page of memory + * must be reserved to facilitate access to the MMIO region, since that is the + * smallest unit of memory that can be managed with paging: */ -void -uart_16x50_init(uart_16x50_driver_t *c_this, - pci_config_addr_t pci_addr, - uint16_t dl) -{ - /* This assumes that the UART had an MMIO range assigned to it by the - * firmware during boot. - */ - pci_init(c_this, pci_addr, 0); +#define UART_MMIO_SZ MIN_PAGE_SIZE +#else +#define UART_MMIO_SZ sizeof(uart_16x50_regs_t) +#endif - uart_16x50_regs_t *regs = (uart_16x50_regs_t *)c_this->mmio; +void uart_16x50_setup(uart_16x50_driver_t c_this, uint16_t dl); + +/*---------------------------------------------------------------------------*/ +SYSCALLS_DEFINE(uart_16x50_setup, uart_16x50_driver_t c_this, uint16_t dl) +{ + uart_16x50_regs_t *regs = (uart_16x50_regs_t *)PROT_DOMAINS_MMIO(c_this); /* Set the DLAB bit to enable access to divisor settings. */ regs->lcr = UART_LCR_7_DLAB; @@ -109,10 +110,9 @@ uart_16x50_init(uart_16x50_driver_t *c_this, * This procedure will block indefinitely until the UART is ready * to accept the character to be transmitted. */ -void -uart_16x50_tx(uart_16x50_driver_t c_this, uint8_t c) +SYSCALLS_DEFINE(uart_16x50_tx, uart_16x50_driver_t c_this, uint8_t c) { - struct uart_16x50_regs *regs = (uart_16x50_regs_t *)c_this.mmio; + uart_16x50_regs_t *regs = (uart_16x50_regs_t *)PROT_DOMAINS_MMIO(c_this); /* Wait for space in TX FIFO. */ while((regs->lsr & UART_LSR_5_THRE) == 0); @@ -121,3 +121,26 @@ uart_16x50_tx(uart_16x50_driver_t c_this, uint8_t c) regs->rbr_thr_dll = c; } /*---------------------------------------------------------------------------*/ +/** + * \brief Initialize an MMIO-programmable 16X50 UART. + * \param c_this Structure that will be initialized to represent the device. + * \param pci_addr PCI address of device. + * \param dl Divisor setting to configure the baud rate. + */ +void +uart_16x50_init(uart_16x50_driver_t *c_this, + pci_config_addr_t pci_addr, + uint16_t dl) +{ + /* This assumes that the UART had an MMIO range assigned to it by the + * firmware during boot. + */ + pci_init(c_this, pci_addr, UART_MMIO_SZ, 0, 0); + SYSCALLS_INIT(uart_16x50_setup); + SYSCALLS_AUTHZ(uart_16x50_setup, *c_this); + SYSCALLS_INIT(uart_16x50_tx); + SYSCALLS_AUTHZ(uart_16x50_tx, *c_this); + + uart_16x50_setup(*c_this, dl); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/drivers/quarkX1000/eth.c b/cpu/x86/drivers/quarkX1000/eth.c index c9322d6a7..5c16b10a5 100644 --- a/cpu/x86/drivers/quarkX1000/eth.c +++ b/cpu/x86/drivers/quarkX1000/eth.c @@ -35,6 +35,7 @@ #include "dma.h" #include "eth.h" #include "helpers.h" +#include "syscalls.h" #include "net/ip/uip.h" #include "pci.h" @@ -158,12 +159,29 @@ typedef struct quarkX1000_eth_meta { /* Transmit descriptor */ volatile quarkX1000_eth_tx_desc_t tx_desc; /* Transmit DMA packet buffer */ - volatile uint8_t tx_buf[UIP_BUFSIZE]; + volatile uint8_t tx_buf[ALIGN(UIP_BUFSIZE, 4)]; /* Receive descriptor */ volatile quarkX1000_eth_rx_desc_t rx_desc; /* Receive DMA packet buffer */ - volatile uint8_t rx_buf[UIP_BUFSIZE]; -} quarkX1000_eth_meta_t; + volatile uint8_t rx_buf[ALIGN(UIP_BUFSIZE, 4)]; + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING + /* Domain-defined metadata must fill an even number of pages, since that is + * the minimum granularity of access control supported by paging. However, + * using the "aligned(4096)" attribute causes the alignment of the kernel + * data section to increase, which causes problems when generating UEFI + * binaries, as is described in the linker script. Thus, it is necessary + * to manually pad the structure to fill a page. This only works if the + * sizes of the actual fields of the structure are collectively less than a + * page. + */ + uint8_t pad[MIN_PAGE_SIZE - + (sizeof(quarkX1000_eth_tx_desc_t) + + ALIGN(UIP_BUFSIZE, 4) + + sizeof(quarkX1000_eth_rx_desc_t) + + ALIGN(UIP_BUFSIZE, 4))]; +#endif +} __attribute__((packed)) quarkX1000_eth_meta_t; #define LOG_PFX "quarkX1000_eth: " @@ -188,37 +206,18 @@ typedef struct quarkX1000_eth_meta { #define REG_ADDR_TX_DESC_LIST 0x1010 #define REG_ADDR_DMA_OPERATION 0x1018 -static quarkX1000_eth_driver_t drv; +PROT_DOMAINS_ALLOC(quarkX1000_eth_driver_t, drv); static quarkX1000_eth_meta_t ATTR_BSS_DMA meta; +void quarkX1000_eth_setup(uintptr_t meta_phys_base); + /*---------------------------------------------------------------------------*/ -/** - * \brief Initialize the first Quark X1000 Ethernet MAC. - * - * This procedure assumes that an MMIO range for the device was - * previously assigned, e.g. by firmware. - */ -void -quarkX1000_eth_init(void) +SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_setup, drv, uintptr_t meta_phys_base) { - pci_config_addr_t pci_addr = { .raw = 0 }; uip_eth_addr mac_addr; uint32_t mac_tmp1, mac_tmp2; - - /* PCI address from section 15.4 of Intel Quark SoC X1000 Datasheet. */ - - pci_addr.dev = 20; - pci_addr.func = 6; - - /* Activate MMIO and DMA access. */ - pci_command_enable(pci_addr, PCI_CMD_2_BUS_MST_EN | PCI_CMD_1_MEM_SPACE_EN); - - printf(LOG_PFX "Activated MMIO and DMA access.\n"); - - pci_addr.reg_off = PCI_CONFIG_REG_BAR0; - - /* Configure the device MMIO range and initialize the driver structure. */ - pci_init(&drv, pci_addr, (uintptr_t)&meta); + quarkX1000_eth_meta_t *loc_meta = + (quarkX1000_eth_meta_t *)PROT_DOMAINS_META(drv); /* Read the MAC address from the device. */ PCI_MMIO_READL(drv, mac_tmp1, REG_ADDR_MACADDR_HI); @@ -246,29 +245,37 @@ quarkX1000_eth_init(void) uip_setethaddr(mac_addr); /* Initialize transmit descriptor. */ - meta.tx_desc.tdes0 = 0; - meta.tx_desc.tdes1 = 0; + loc_meta->tx_desc.tdes0 = 0; + loc_meta->tx_desc.tdes1 = 0; - meta.tx_desc.buf1_ptr = (uint8_t *)meta.tx_buf; - meta.tx_desc.tx_end_of_ring = 1; - meta.tx_desc.first_seg_in_frm = 1; - meta.tx_desc.last_seg_in_frm = 1; - meta.tx_desc.tx_end_of_ring = 1; + loc_meta->tx_desc.buf1_ptr = + (uint8_t *)PROT_DOMAINS_META_OFF_TO_PHYS( + (uintptr_t)&loc_meta->tx_buf, meta_phys_base); + loc_meta->tx_desc.tx_end_of_ring = 1; + loc_meta->tx_desc.first_seg_in_frm = 1; + loc_meta->tx_desc.last_seg_in_frm = 1; + loc_meta->tx_desc.tx_end_of_ring = 1; /* Initialize receive descriptor. */ - meta.rx_desc.rdes0 = 0; - meta.rx_desc.rdes1 = 0; + loc_meta->rx_desc.rdes0 = 0; + loc_meta->rx_desc.rdes1 = 0; - meta.rx_desc.buf1_ptr = (uint8_t *)meta.rx_buf; - meta.rx_desc.own = 1; - meta.rx_desc.first_desc = 1; - meta.rx_desc.last_desc = 1; - meta.rx_desc.rx_buf1_sz = UIP_BUFSIZE; - meta.rx_desc.rx_end_of_ring = 1; + loc_meta->rx_desc.buf1_ptr = + (uint8_t *)PROT_DOMAINS_META_OFF_TO_PHYS( + (uintptr_t)&loc_meta->rx_buf, meta_phys_base); + loc_meta->rx_desc.own = 1; + loc_meta->rx_desc.first_desc = 1; + loc_meta->rx_desc.last_desc = 1; + loc_meta->rx_desc.rx_buf1_sz = UIP_BUFSIZE; + loc_meta->rx_desc.rx_end_of_ring = 1; /* Install transmit and receive descriptors. */ - PCI_MMIO_WRITEL(drv, REG_ADDR_RX_DESC_LIST, (uint32_t)&meta.rx_desc); - PCI_MMIO_WRITEL(drv, REG_ADDR_TX_DESC_LIST, (uint32_t)&meta.tx_desc); + PCI_MMIO_WRITEL(drv, REG_ADDR_RX_DESC_LIST, + PROT_DOMAINS_META_OFF_TO_PHYS( + (uintptr_t)&loc_meta->rx_desc, meta_phys_base)); + PCI_MMIO_WRITEL(drv, REG_ADDR_TX_DESC_LIST, + PROT_DOMAINS_META_OFF_TO_PHYS( + (uintptr_t)&loc_meta->tx_desc, meta_phys_base)); PCI_MMIO_WRITEL(drv, REG_ADDR_MAC_CONF, /* Set the RMII speed to 100Mbps */ @@ -302,28 +309,32 @@ quarkX1000_eth_init(void) * If a frame is received, this procedure copies it into the * global uip_buf buffer. */ -void -quarkX1000_eth_poll(uint16_t *frame_len) +SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_poll, drv, uint16_t * frame_len) { + uint16_t *loc_frame_len; uint16_t frm_len = 0; + quarkX1000_eth_meta_t *loc_meta = + (quarkX1000_eth_meta_t *)PROT_DOMAINS_META(drv); + + PROT_DOMAINS_VALIDATE_PTR(loc_frame_len, frame_len, sizeof(*frame_len)); /* Check whether the RX descriptor is still owned by the device. If not, * process the received frame or an error that may have occurred. */ - if(meta.rx_desc.own == 0) { - if(meta.rx_desc.err_summary) { + if(loc_meta->rx_desc.own == 0) { + if(loc_meta->rx_desc.err_summary) { fprintf(stderr, LOG_PFX "Error receiving frame: RDES0 = %08x, RDES1 = %08x.\n", - meta.rx_desc.rdes0, meta.rx_desc.rdes1); + loc_meta->rx_desc.rdes0, loc_meta->rx_desc.rdes1); assert(0); } - frm_len = meta.rx_desc.frm_len; + frm_len = loc_meta->rx_desc.frm_len; assert(frm_len <= UIP_BUFSIZE); - memcpy(uip_buf, (void *)meta.rx_buf, frm_len); + memcpy(uip_buf, (void *)loc_meta->rx_buf, frm_len); /* Return ownership of the RX descriptor to the device. */ - meta.rx_desc.own = 1; + loc_meta->rx_desc.own = 1; /* Request that the device check for an available RX descriptor, since * ownership of the descriptor was just transferred to the device. @@ -331,7 +342,7 @@ quarkX1000_eth_poll(uint16_t *frame_len) PCI_MMIO_WRITEL(drv, REG_ADDR_RX_POLL_DEMAND, 1); } - *frame_len = frm_len; + *loc_frame_len = frm_len; } /*---------------------------------------------------------------------------*/ /** @@ -343,27 +354,29 @@ quarkX1000_eth_poll(uint16_t *frame_len) * buffer and signals to the device that a new frame is available to be * transmitted. */ -void -quarkX1000_eth_send(void) +SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_send, drv) { + quarkX1000_eth_meta_t *loc_meta = + (quarkX1000_eth_meta_t *)PROT_DOMAINS_META(drv); + /* Wait until the TX descriptor is no longer owned by the device. */ - while(meta.tx_desc.own == 1); + while(loc_meta->tx_desc.own == 1); /* Check whether an error occurred transmitting the previous frame. */ - if(meta.tx_desc.err_summary) { + if(loc_meta->tx_desc.err_summary) { fprintf(stderr, LOG_PFX "Error transmitting frame: TDES0 = %08x, TDES1 = %08x.\n", - meta.tx_desc.tdes0, meta.tx_desc.tdes1); + loc_meta->tx_desc.tdes0, loc_meta->tx_desc.tdes1); assert(0); } /* Transmit the next frame. */ assert(uip_len <= UIP_BUFSIZE); - memcpy((void *)meta.tx_buf, uip_buf, uip_len); + memcpy((void *)loc_meta->tx_buf, uip_buf, uip_len); - meta.tx_desc.tx_buf1_sz = uip_len; + loc_meta->tx_desc.tx_buf1_sz = uip_len; - meta.tx_desc.own = 1; + loc_meta->tx_desc.own = 1; /* Request that the device check for an available TX descriptor, since * ownership of the descriptor was just transferred to the device. @@ -371,3 +384,40 @@ quarkX1000_eth_send(void) PCI_MMIO_WRITEL(drv, REG_ADDR_TX_POLL_DEMAND, 1); } /*---------------------------------------------------------------------------*/ +/** + * \brief Initialize the first Quark X1000 Ethernet MAC. + * + * This procedure assumes that an MMIO range for the device was + * previously assigned, e.g. by firmware. + */ +void +quarkX1000_eth_init(void) +{ + pci_config_addr_t pci_addr = { .raw = 0 }; + + /* PCI address from section 15.4 of Intel Quark SoC X1000 Datasheet. */ + + pci_addr.dev = 20; + pci_addr.func = 6; + + /* Activate MMIO and DMA access. */ + pci_command_enable(pci_addr, PCI_CMD_2_BUS_MST_EN | PCI_CMD_1_MEM_SPACE_EN); + + printf(LOG_PFX "Activated MMIO and DMA access.\n"); + + pci_addr.reg_off = PCI_CONFIG_REG_BAR0; + + PROT_DOMAINS_INIT_ID(drv); + /* Configure the device MMIO range and initialize the driver structure. */ + pci_init(&drv, pci_addr, MMIO_SZ, + (uintptr_t)&meta, sizeof(quarkX1000_eth_meta_t)); + SYSCALLS_INIT(quarkX1000_eth_setup); + SYSCALLS_AUTHZ(quarkX1000_eth_setup, drv); + SYSCALLS_INIT(quarkX1000_eth_poll); + SYSCALLS_AUTHZ(quarkX1000_eth_poll, drv); + SYSCALLS_INIT(quarkX1000_eth_send); + SYSCALLS_AUTHZ(quarkX1000_eth_send, drv); + + quarkX1000_eth_setup(prot_domains_lookup_meta_phys_base(&drv)); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/drivers/quarkX1000/gpio.c b/cpu/x86/drivers/quarkX1000/gpio.c index 7d7d7dd64..642cad310 100644 --- a/cpu/x86/drivers/quarkX1000/gpio.c +++ b/cpu/x86/drivers/quarkX1000/gpio.c @@ -30,8 +30,11 @@ #include "gpio.h" +#include #include "helpers.h" +#include "paging.h" #include "shared-isr.h" +#include "syscalls.h" /* GPIO Controler Registers */ #define SWPORTA_DR 0x00 @@ -51,25 +54,55 @@ #define GPIO_IRQ 9 +#define HIGHEST_REG LS_SYNC + +#define MMIO_SZ MIN_PAGE_SIZE + +PROT_DOMAINS_ALLOC(pci_driver_t, drv); + struct gpio_internal_data { - pci_driver_t pci; quarkX1000_gpio_callback callback; }; static struct gpio_internal_data data; +void quarkX1000_gpio_mmin(uint32_t offset, uint32_t *res); +SYSCALLS_DEFINE_SINGLETON(quarkX1000_gpio_mmin, drv, + uint32_t offset, uint32_t *res) +{ + uint32_t *loc_res; + + PROT_DOMAINS_VALIDATE_PTR(loc_res, res, sizeof(*res)); + if(HIGHEST_REG < offset) { + halt(); + } + + PCI_MMIO_READL(drv, *loc_res, offset); +} + static inline uint32_t read(uint32_t offset) { uint32_t res; - PCI_MMIO_READL(data.pci, res, offset); + quarkX1000_gpio_mmin(offset, &res); return res; } +void quarkX1000_gpio_mmout(uint32_t offset, uint32_t val); +SYSCALLS_DEFINE_SINGLETON(quarkX1000_gpio_mmout, drv, + uint32_t offset, uint32_t val) +{ + if(HIGHEST_REG < offset) { + halt(); + } + + PCI_MMIO_WRITEL(drv, offset, val); +} + static inline void write(uint32_t offset, uint32_t val) { - PCI_MMIO_WRITEL(data.pci, offset, val); + quarkX1000_gpio_mmout(offset, val); } /* value must be 0x0 or 0x1 */ @@ -231,7 +264,12 @@ quarkX1000_gpio_init(void) pci_command_enable(pci_addr, PCI_CMD_1_MEM_SPACE_EN); - pci_init(&data.pci, pci_addr, 0); + PROT_DOMAINS_INIT_ID(drv); + pci_init(&drv, pci_addr, MMIO_SZ, 0, 0); + SYSCALLS_INIT(quarkX1000_gpio_mmin); + SYSCALLS_AUTHZ(quarkX1000_gpio_mmin, drv); + SYSCALLS_INIT(quarkX1000_gpio_mmout); + SYSCALLS_AUTHZ(quarkX1000_gpio_mmout, drv); data.callback = 0; diff --git a/cpu/x86/drivers/quarkX1000/i2c-registers.h b/cpu/x86/drivers/quarkX1000/i2c-registers.h index 7b9e4cec0..3ff7746ec 100644 --- a/cpu/x86/drivers/quarkX1000/i2c-registers.h +++ b/cpu/x86/drivers/quarkX1000/i2c-registers.h @@ -61,6 +61,8 @@ #define QUARKX1000_IC_ENABLE_STATUS 0x9C #define QUARKX1000_IC_FS_SPKLEN 0xA0 +#define QUARKX1000_IC_HIGHEST QUARKX1000_IC_FS_SPKLEN + /* IC_CON */ #define QUARKX1000_IC_CON_MASTER_MODE_SHIFT 0 #define QUARKX1000_IC_CON_MASTER_MODE_MASK 0x01 diff --git a/cpu/x86/drivers/quarkX1000/i2c.c b/cpu/x86/drivers/quarkX1000/i2c.c index 4e5669079..746e52b96 100644 --- a/cpu/x86/drivers/quarkX1000/i2c.c +++ b/cpu/x86/drivers/quarkX1000/i2c.c @@ -32,7 +32,9 @@ #include "i2c.h" #include "i2c-registers.h" +#include "paging.h" #include "shared-isr.h" +#include "syscalls.h" #define I2C_CLOCK_SPEED 25 /* kHz */ #define I2C_FIFO_DEPTH 16 @@ -49,11 +51,15 @@ #define I2C_IRQ 9 +#define MMIO_SZ MIN_PAGE_SIZE + typedef enum { I2C_DIRECTION_READ, I2C_DIRECTION_WRITE } I2C_DIRECTION; +PROT_DOMAINS_ALLOC(pci_driver_t, drv); + struct quarkX1000_i2c_config { QUARKX1000_I2C_SPEED speed; QUARKX1000_I2C_ADDR_MODE addressing_mode; @@ -66,8 +72,6 @@ struct quarkX1000_i2c_config { struct i2c_internal_data { struct quarkX1000_i2c_config config; - pci_driver_t pci; - I2C_DIRECTION direction; uint8_t rx_len; @@ -82,18 +86,46 @@ struct i2c_internal_data { static struct i2c_internal_data device; -static uint32_t +static int inited = 0; + +void quarkX1000_i2c_mmin(uint32_t offset, uint32_t *res); +SYSCALLS_DEFINE_SINGLETON(quarkX1000_i2c_mmin, drv, + uint32_t offset, uint32_t *res) +{ + uint32_t *loc_res; + + PROT_DOMAINS_VALIDATE_PTR(loc_res, res, sizeof(*res)); + if(QUARKX1000_IC_HIGHEST < offset) { + halt(); + } + + PCI_MMIO_READL(drv, *loc_res, offset); +} + +static inline uint32_t read(uint32_t offset) { uint32_t res; - PCI_MMIO_READL(device.pci, res, offset); + quarkX1000_i2c_mmin(offset, &res); + return res; } -static void +void quarkX1000_i2c_mmout(uint32_t offset, uint32_t val); +SYSCALLS_DEFINE_SINGLETON(quarkX1000_i2c_mmout, drv, + uint32_t offset, uint32_t val) +{ + if(QUARKX1000_IC_HIGHEST < offset) { + halt(); + } + + PCI_MMIO_WRITEL(drv, offset, val); +} + +static inline void write(uint32_t offset, uint32_t val) { - PCI_MMIO_WRITEL(device.pci, offset, val); + quarkX1000_i2c_mmout(offset, val); } static uint32_t @@ -504,7 +536,7 @@ quarkX1000_i2c_polling_read(uint8_t *buf, uint8_t len, uint16_t addr) int quarkX1000_i2c_is_available(void) { - return device.pci.mmio ? 1 : 0; + return inited; } DEFINE_SHARED_IRQ(I2C_IRQ, IRQAGENT3, INTC, PIRQC, i2c_isr); @@ -522,7 +554,14 @@ quarkX1000_i2c_init(void) pci_command_enable(pci_addr, PCI_CMD_1_MEM_SPACE_EN); - pci_init(&device.pci, pci_addr, 0); + PROT_DOMAINS_INIT_ID(drv); + pci_init(&drv, pci_addr, MMIO_SZ, 0, 0); + SYSCALLS_INIT(quarkX1000_i2c_mmin); + SYSCALLS_AUTHZ(quarkX1000_i2c_mmin, drv); + SYSCALLS_INIT(quarkX1000_i2c_mmout); + SYSCALLS_AUTHZ(quarkX1000_i2c_mmout, drv); + + inited = 1; return 0; } diff --git a/cpu/x86/drivers/quarkX1000/imr-conf.c b/cpu/x86/drivers/quarkX1000/imr-conf.c index b2646e892..8c5b6703a 100644 --- a/cpu/x86/drivers/quarkX1000/imr-conf.c +++ b/cpu/x86/drivers/quarkX1000/imr-conf.c @@ -28,9 +28,9 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "dma.h" #include "imr.h" - -extern int _sbss_dma_addr, _ebss_dma_addr; +#include "msg-bus.h" /*---------------------------------------------------------------------------*/ void @@ -49,6 +49,8 @@ quarkX1000_imr_conf(void) imr.rdmsk.cpu0 = imr.rdmsk.cpu_0 = 1; imr.wrmsk.cpu0 = imr.wrmsk.cpu_0 = 1; + quarkX1000_msg_bus_init(); + imr.lo.addr = 0; imr.hi.addr = (((uint32_t)&_sbss_dma_addr) - 1) >> QUARKX1000_IMR_SHAMT; quarkX1000_imr_write(imr_idx, imr); @@ -69,5 +71,12 @@ quarkX1000_imr_conf(void) quarkX1000_imr_write(imr_idx, imr); imr_idx++; } + +#ifndef DBG_IMRS + /* The IMRs are locked by the hardware, but the message bus could still + * provide access to other potentially-sensitive functionality. + */ + quarkX1000_msg_bus_lock(); +#endif } /*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/drivers/quarkX1000/msg-bus.c b/cpu/x86/drivers/quarkX1000/msg-bus.c index e7a4bca44..fc64a6f8c 100644 --- a/cpu/x86/drivers/quarkX1000/msg-bus.c +++ b/cpu/x86/drivers/quarkX1000/msg-bus.c @@ -30,6 +30,9 @@ #include "msg-bus.h" #include "pci.h" +#include "syscalls.h" + +PROT_DOMAINS_ALLOC(dom_client_data_t, quarkX1000_msg_bus); /** Message bus control register */ #define MCR_PCI_REG_ADDR 0xD0 @@ -83,15 +86,21 @@ request_op(uint8_t port, uint32_t reg_off, uint8_t opcode) * \param reg_off Register/offset identifier of message bus register to read. * \param val Storage location for value that has been read. */ -void -quarkX1000_msg_bus_read(uint8_t port, uint32_t reg_off, uint32_t *val) +SYSCALLS_DEFINE_SINGLETON(quarkX1000_msg_bus_read, + quarkX1000_msg_bus, + uint8_t port, + uint32_t reg_off, + uint32_t *val) { + uint32_t *loc_val; pci_config_addr_t pci_addr = { .raw = 0 }; + PROT_DOMAINS_VALIDATE_PTR(loc_val, val, sizeof(*val)); + request_op(port, reg_off, 0x10); pci_addr.reg_off = MDR_PCI_REG_ADDR; - *val = pci_config_read(pci_addr); + *loc_val = pci_config_read(pci_addr); } /*---------------------------------------------------------------------------*/ /** @@ -100,8 +109,11 @@ quarkX1000_msg_bus_read(uint8_t port, uint32_t reg_off, uint32_t *val) * \param reg_off Register/offset identifier of message bus register to write. * \param val Value to write. */ -void -quarkX1000_msg_bus_write(uint8_t port, uint32_t reg_off, uint32_t val) +SYSCALLS_DEFINE_SINGLETON(quarkX1000_msg_bus_write, + quarkX1000_msg_bus, + uint8_t port, + uint32_t reg_off, + uint32_t val) { pci_config_addr_t pci_addr = { .raw = 0 }; @@ -111,3 +123,21 @@ quarkX1000_msg_bus_write(uint8_t port, uint32_t reg_off, uint32_t val) request_op(port, reg_off, 0x11); } /*---------------------------------------------------------------------------*/ +void +quarkX1000_msg_bus_init(void) +{ + PROT_DOMAINS_INIT_ID(quarkX1000_msg_bus); + prot_domains_reg(&quarkX1000_msg_bus, 0, 0, 0, 0, true); + SYSCALLS_INIT(quarkX1000_msg_bus_read); + SYSCALLS_AUTHZ(quarkX1000_msg_bus_read, quarkX1000_msg_bus); + SYSCALLS_INIT(quarkX1000_msg_bus_write); + SYSCALLS_AUTHZ(quarkX1000_msg_bus_write, quarkX1000_msg_bus); +} +/*---------------------------------------------------------------------------*/ +void +quarkX1000_msg_bus_lock(void) +{ + SYSCALLS_DEAUTHZ(quarkX1000_msg_bus_read, quarkX1000_msg_bus); + SYSCALLS_DEAUTHZ(quarkX1000_msg_bus_write, quarkX1000_msg_bus); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/drivers/quarkX1000/msg-bus.h b/cpu/x86/drivers/quarkX1000/msg-bus.h index 0b810e06d..11bab0849 100644 --- a/cpu/x86/drivers/quarkX1000/msg-bus.h +++ b/cpu/x86/drivers/quarkX1000/msg-bus.h @@ -44,6 +44,8 @@ * the message bus. */ +void quarkX1000_msg_bus_init(void); +void quarkX1000_msg_bus_lock(void); void quarkX1000_msg_bus_read(uint8_t port, uint32_t reg_off, uint32_t *val); void quarkX1000_msg_bus_write(uint8_t port, uint32_t reg_off, uint32_t val); diff --git a/cpu/x86/drivers/quarkX1000/uart.c b/cpu/x86/drivers/quarkX1000/uart.c index 23731ba93..dcd0af8f2 100644 --- a/cpu/x86/drivers/quarkX1000/uart.c +++ b/cpu/x86/drivers/quarkX1000/uart.c @@ -32,8 +32,8 @@ #include "uart-16x50.h" #include -static uart_16x50_driver_t quarkX1000_uart0; -static uart_16x50_driver_t quarkX1000_uart1; +PROT_DOMAINS_ALLOC(uart_16x50_driver_t, quarkX1000_uart0); +PROT_DOMAINS_ALLOC(uart_16x50_driver_t, quarkX1000_uart1); /* Divisor setting for 115200 baud from section 18.2.2 of Intel Quark SoC * X1000 Datasheet. @@ -49,6 +49,7 @@ void quarkX1000_uart_init(quarkX1000_uart_dev_t dev) { pci_config_addr_t pci_addr; + uart_16x50_driver_t *drv; assert((dev == QUARK_X1000_UART_0) || (dev == QUARK_X1000_UART_1)); @@ -59,7 +60,14 @@ quarkX1000_uart_init(quarkX1000_uart_dev_t dev) pci_addr.func = (dev == QUARK_X1000_UART_0) ? 1 : 5; pci_addr.reg_off = PCI_CONFIG_REG_BAR0; - uart_16x50_init((dev == QUARK_X1000_UART_0) ? &quarkX1000_uart0 : &quarkX1000_uart1, pci_addr, QUARK_X1000_UART_DL_115200); + if(dev == QUARK_X1000_UART_0) { + drv = &quarkX1000_uart0; + PROT_DOMAINS_INIT_ID(quarkX1000_uart0); + } else { + drv = &quarkX1000_uart1; + PROT_DOMAINS_INIT_ID(quarkX1000_uart1); + } + uart_16x50_init(drv, pci_addr, QUARK_X1000_UART_DL_115200); } /*---------------------------------------------------------------------------*/ /** diff --git a/cpu/x86/helpers.h b/cpu/x86/helpers.h index 91b120a9e..1ef312e14 100644 --- a/cpu/x86/helpers.h +++ b/cpu/x86/helpers.h @@ -37,6 +37,17 @@ void halt(void) __attribute__((__noreturn__)); +#define STRINGIFY(x) #x +/* The C preprocessor will not expand macro arguments that are converted to + * strings in the macro body using the '#' operator. The EXP_STRINGIFY macro + * introduces an additional level of argument expansion for instances where + * the developer wishes to convert the expanded argument to a string. + */ +#define EXP_STRINGIFY(x) STRINGIFY(x) + +#define ALIGN(x, amt) \ + (((x) & ~((amt) - 1)) + ((((x) & ((amt) - 1)) == 0) ? 0 : (amt))) + /** Wrappers for the assembly 'out' instruction. */ void outb(uint16_t port, uint8_t val); void outl(uint16_t port, uint32_t val); diff --git a/cpu/x86/init/common/cpu.c b/cpu/x86/init/common/cpu.c index a174853cc..94ec2ddab 100644 --- a/cpu/x86/init/common/cpu.c +++ b/cpu/x86/init/common/cpu.c @@ -28,11 +28,13 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "cpu.h" #include "gdt.h" #include "helpers.h" #include "idt.h" #include "interrupt.h" #include "irq.h" +#include "stacks.h" static void double_fault_handler(struct interrupt_context context) @@ -40,16 +42,79 @@ double_fault_handler(struct interrupt_context context) halt(); } /*---------------------------------------------------------------------------*/ -void -cpu_init(void) +/* The OS has switched to its own segment descriptors. However, the protection + * domain support, if enabled, has not yet been fully activated. + */ +static void +boot_stage1(void) { - gdt_init(); idt_init(); /* Set an interrupt handler for Double Fault exception. This way, we avoid * the system to triple fault, leaving no trace about what happened. */ - SET_INTERRUPT_HANDLER(8, 1, double_fault_handler); + SET_EXCEPTION_HANDLER(8, 1, double_fault_handler); - irq_init(); + /* Initialize protection domain support, if enabled */ + prot_domains_init(); + + prot_domains_leave_boot_stage1(); +} +/*---------------------------------------------------------------------------*/ +int main(void); +/* This routine runs with the initial, flat address space, even if protection + * domain support is enabled. The goal behind the design of this routine is to + * keep it as short as possible, since it is unable to directly reference data + * and invoke functions that are intended to be accessible later after the + * system has booted when a multi-segment protection domain model is in use. + */ +void +cpu_boot_stage0(void) +{ + /* Reserve three stack slots for return addresses */ + uintptr_t top_of_stack = STACKS_INIT_TOP; + +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE + uintptr_t *top_of_stack_ptr = (uintptr_t *)top_of_stack; + + top_of_stack_ptr[0] = (uintptr_t)prot_domains_launch_kernel; + top_of_stack_ptr[1] = (uintptr_t)prot_domains_launch_app; +#endif + + /* Perform common GDT initialization */ + gdt_init(); + + /* Switch all data segment registers to the newly-initialized flat data + * descriptor. + */ + __asm__( + "mov %0, %%ds\n\t" + "mov %0, %%es\n\t" + "mov %0, %%fs\n\t" + "mov %0, %%gs\n\t" + : + : "r" (GDT_SEL_DATA_FLAT) + ); + + /** + * Perform specific GDT initialization tasks for the protection domain + * implementation that is enabled, if any. + */ + prot_domains_gdt_init(); + + /* Do not pass memory operands to the asm block below, since it is + * switching from the flat address space to a multi-segment address space + * model if such a model is used by the enabled protection domain + * implementation, if any. + */ + __asm__( + "mov %[_ss_], %%ss\n\t" + "mov %[_esp_], %%esp\n\t" + "ljmp %[_cs_], %[_stage1_]\n\t" + : + : [_ss_] "r" (GDT_SEL_STK_EXC), + [_esp_] "r" (top_of_stack), + [_cs_] "i" ((uint16_t)GDT_SEL_CODE_EXC), + [_stage1_] "i" (boot_stage1) + ); } diff --git a/cpu/x86/init/common/cpu.h b/cpu/x86/init/common/cpu.h index a56d0db5a..4fd9b835b 100644 --- a/cpu/x86/init/common/cpu.h +++ b/cpu/x86/init/common/cpu.h @@ -31,6 +31,8 @@ #ifndef CPU_H #define CPU_H -void cpu_init(void); +#include "prot-domains.h" + +void cpu_boot_stage0(void) ATTR_CODE_BOOT; #endif /* CPU_H */ diff --git a/cpu/x86/init/common/gdt.c b/cpu/x86/init/common/gdt.c index 39a8a7ce4..f7fa10342 100644 --- a/cpu/x86/init/common/gdt.c +++ b/cpu/x86/init/common/gdt.c @@ -29,45 +29,15 @@ */ #include +#include "gdt.h" +#include "gdt-layout.h" +#include "helpers.h" +#include "prot-domains.h" +#include "segmentation.h" -#define NUM_DESC 3 - -#define GDT_IDX_NULL 0 -#define GDT_IDX_CODE 1 -#define GDT_IDX_DATA 2 - -/* All code in the x86 port of Contiki runs at ring (privilege) level 0 */ -#define PRIV_LVL 0 - -/* Compute GDT selector from descriptor index and requested privilege level */ -#define GDT_SEL(IDX, RPL) (((IDX) << 3) | (RPL)) - -#define GDT_SEL_NULL GDT_SEL(GDT_IDX_NULL, 0) -#define GDT_SEL_CODE GDT_SEL(GDT_IDX_CODE, PRIV_LVL) -#define GDT_SEL_DATA GDT_SEL(GDT_IDX_DATA, PRIV_LVL) - -/* Each define here is for a specific flag in the descriptor. Refer to Intel - * Combined Manual (Intel 64 and IA-32 Architectures Software Developer's - * Manual), Vol. 3, Section 3.4.5 for a description of each flag. - */ -#define SEG_DESCTYPE(x) ((x) << 0x04) /* Descriptor type (0 for system, 1 for code/data) */ -#define SEG_PRES(x) ((x) << 0x07) /* Present */ -#define SEG_SAVL(x) ((x) << 0x0C) /* Available for system use */ -#define SEG_LONG(x) ((x) << 0x0D) /* Long mode */ -#define SEG_SIZE(x) ((x) << 0x0E) /* Size (0 for 16-bit, 1 for 32) */ -#define SEG_GRAN(x) ((x) << 0x0F) /* Granularity (0 for 1B - 1MB, 1 for 4KB - 4GB) */ -#define SEG_PRIV(x) (((x) & 0x03) << 0x05) /* Set privilege level (0 - 3) */ - -#define SEG_DATA_RDWR 0x02 /* Read/Write */ -#define SEG_CODE_EXRD 0x0A /* Execute/Read */ - -#define GDT_CODE_PL0 SEG_DESCTYPE(1) | SEG_PRES(1) | SEG_SAVL(0) | \ - SEG_LONG(0) | SEG_SIZE(1) | SEG_GRAN(1) | \ - SEG_PRIV(0) | SEG_CODE_EXRD - -#define GDT_DATA_PL0 SEG_DESCTYPE(1) | SEG_PRES(1) | SEG_SAVL(0) | \ - SEG_LONG(0) | SEG_SIZE(1) | SEG_GRAN(1) | \ - SEG_PRIV(0) | SEG_DATA_RDWR +#define GDT_MEM_PL0 (SEG_DESCTYPE_NSYS | SEG_GRAN_PAGE) +#define GDT_CODE_PL0 (GDT_MEM_PL0 | SEG_TYPE_CODE_EXRD) +#define GDT_DATA_PL0 (GDT_MEM_PL0 | SEG_TYPE_DATA_RDWR) typedef struct gdtr { @@ -75,41 +45,53 @@ typedef struct gdtr uint32_t base; } __attribute__((packed)) gdtr_t; -typedef uint64_t segment_desc_t; - /* From Intel Combined Manual, Vol. 3 , Section 3.5.1: The base addresses of * the GDT should be aligned on an eight-byte boundary to yield the best * processor performance. */ -static segment_desc_t gdt[NUM_DESC] __attribute__ ((aligned (8))); +segment_desc_t __attribute__ ((aligned(8))) ATTR_BSS_GDT_START + gdt[GDT_NUM_FIXED_DESC]; -static void -set_descriptor(unsigned int index, uint32_t base, uint32_t limit, uint16_t flag) +#define GDT_LEN \ + ((((uintptr_t)&_ebss_gdt_addr) - \ + (uintptr_t)gdt)/sizeof(segment_desc_t)) + +/*---------------------------------------------------------------------------*/ +static void ATTR_CODE_BOOT +set_descriptor(unsigned int index, + uint32_t base, + uint32_t len, + uint16_t flag) { segment_desc_t descriptor; - if (index >= NUM_DESC) - return; + if(GDT_LEN <= index) { + halt(); + } - /* Create the high 32 bit segment */ - descriptor = limit & 0x000F0000; /* set limit bits 19:16 */ - descriptor |= (flag << 8) & 0x00F0FF00; /* set type, p, dpl, s, g, d/b, l and avl fields */ - descriptor |= (base >> 16) & 0x000000FF; /* set base bits 23:16 */ - descriptor |= base & 0xFF000000; /* set base bits 31:24 */ - - /* Shift by 32 to allow for low part of segment */ - descriptor <<= 32; - - /* Create the low 32 bit segment */ - descriptor |= base << 16; /* set base bits 15:0 */ - descriptor |= limit & 0x0000FFFF; /* set limit bits 15:0 */ + segment_desc_init(&descriptor, base, len, flag); /* Save descriptor into gdt */ gdt[index] = descriptor; } +/*---------------------------------------------------------------------------*/ +void +gdt_copy_desc_change_dpl(unsigned int dest_idx, + unsigned int src_idx, + unsigned dpl) +{ + segment_desc_t desc; + if((GDT_LEN <= dest_idx) || (GDT_LEN <= src_idx)) { + halt(); + } -/* This function initializes the Global Offset Table. For simplicity, the + desc = gdt[src_idx]; + SEG_SET_FLAG(desc, DPL, dpl); + gdt[dest_idx] = desc; +} +/*---------------------------------------------------------------------------*/ +/* This function initializes the Global Descriptor Table. For simplicity, the * memory is organized following the flat model. Thus, memory appears to * Contiki as a single continuous address space. Code, data, and stack * are all contained in this address space (so called linear address space). @@ -120,29 +102,35 @@ gdt_init(void) gdtr_t gdtr; /* Initialize gdtr structure */ - gdtr.limit = sizeof(segment_desc_t) * NUM_DESC - 1; + gdtr.limit = sizeof(segment_desc_t) * GDT_LEN - 1; gdtr.base = (uint32_t) &gdt; /* Initialize descriptors */ set_descriptor(GDT_IDX_NULL, 0, 0, 0); - set_descriptor(GDT_IDX_CODE, 0, 0x0FFFFF, GDT_CODE_PL0); - set_descriptor(GDT_IDX_DATA, 0, 0x0FFFFF, GDT_DATA_PL0); + set_descriptor(GDT_IDX_CODE_FLAT, 0, 0x100000, GDT_CODE_PL0); + set_descriptor(GDT_IDX_DATA_FLAT, 0, 0x100000, GDT_DATA_PL0); - /* Load GDTR register and update segment registers. - * - * CS register cannot be changed directly. For that reason, we do a far jump. - */ - __asm__ ("lgdt %[_gdtr_]\n\t" - "jmp %[_cs_], $1f\n\t" - "1:\n\t" - "mov %[_ds_], %%ds\n\t" - "mov %[_ds_], %%ss\n\t" - "mov %[_ds_], %%es\n\t" - "mov %[_ds_], %%fs\n\t" - "mov %[_ds_], %%gs\n\t" - : - : [_gdtr_] "m" (gdtr), - [_cs_] "i" (GDT_SEL_CODE), - [_ds_] "r" (GDT_SEL_DATA) - ); + /* Load GDTR */ + __asm__ __volatile__ ("lgdt %0" :: "m" (gdtr)); } +/*---------------------------------------------------------------------------*/ +void +gdt_insert(unsigned int idx, segment_desc_t desc) +{ + if(GDT_LEN <= idx) { + halt(); + } + + gdt[idx] = desc; +} +/*---------------------------------------------------------------------------*/ +void +gdt_lookup(unsigned int idx, segment_desc_t *desc) +{ + if((GDT_LEN <= idx) || (desc == NULL)) { + halt(); + } + + *desc = gdt[idx]; +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/init/common/gdt.h b/cpu/x86/init/common/gdt.h index 3db17f08c..37f1f4dbe 100644 --- a/cpu/x86/init/common/gdt.h +++ b/cpu/x86/init/common/gdt.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015, Intel Corporation. All rights reserved. + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -31,6 +31,32 @@ #ifndef GDT_H #define GDT_H -void gdt_init(void); +#include "gdt-layout.h" +#include "prot-domains.h" +#include "segmentation.h" + +extern segment_desc_t gdt[]; +extern int _ebss_gdt_addr; + +#define GDT_IDX_OF_DESC(ptr) \ + ((((uintptr_t)(ptr)) - ((uintptr_t)&gdt))/ \ + sizeof(segment_desc_t)) + +/** + * \brief Compute the selector for a GDT entry allocated somewhere besides gdt.c. + * \param ptr Pointer to GDT descriptor. + * \param rpl Requested Privilege Level. + */ +#define GDT_SEL_OF_DESC(ptr, rpl) GDT_SEL(GDT_IDX_OF_DESC(ptr), rpl) + +#define ATTR_BSS_GDT __attribute__((section(".gdt_bss"))) +#define ATTR_BSS_GDT_START __attribute__((section(".gdt_bss_start"))) + +void gdt_copy_desc_change_dpl(unsigned int dest_idx, + unsigned int src_idx, + unsigned dpl); +void gdt_init(void) ATTR_CODE_BOOT; +void gdt_insert(unsigned int idx, segment_desc_t desc); +void gdt_lookup(unsigned int idx, segment_desc_t *desc); #endif /* GDT_H */ diff --git a/cpu/x86/init/common/idt.c b/cpu/x86/init/common/idt.c index d561f49f7..441668a75 100644 --- a/cpu/x86/init/common/idt.c +++ b/cpu/x86/init/common/idt.c @@ -28,9 +28,13 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "gdt-layout.h" +#include "prot-domains.h" #include #include "helpers.h" +#include "segmentation.h" +#include "idt.h" #define NUM_DESC 256 @@ -55,22 +59,27 @@ typedef struct intr_gate_desc { * of the IDT should be aligned on an 8-byte boundary to maximize performance * of cache line fills. */ -static intr_gate_desc_t idt[NUM_DESC] __attribute__ ((aligned(8))); +static intr_gate_desc_t __attribute__((aligned(8))) ATTR_BSS_KERN + idt[NUM_DESC]; +/*---------------------------------------------------------------------------*/ /* XXX: If you change this function prototype, make sure you fix the assembly - * code in SET_INTERRUPT_HANDLER macro in interrupt.h. Otherwise, you might + * code in SET_INT_EXC_HANDLER macro in interrupt.h. Otherwise, you might * face a very-hard-to-find bug in the interrupt handling system. */ void -idt_set_intr_gate_desc(int intr_num, uint32_t offset) +idt_set_intr_gate_desc(int intr_num, + uint32_t offset, + uint16_t cs, + uint16_t dpl) { intr_gate_desc_t *desc = &idt[intr_num]; desc->offset_low = offset & 0xFFFF; - desc->selector = 0x08; /* Offset in GDT for code segment */ + desc->selector = cs; desc->fixed = BIT(9) | BIT(10); desc->d = 1; - desc->dpl = 0; + desc->dpl = dpl; desc->p = 1; desc->offset_high = (offset >> 16) & 0xFFFF; } diff --git a/cpu/x86/init/common/idt.h b/cpu/x86/init/common/idt.h index d29b97153..18f168ad8 100644 --- a/cpu/x86/init/common/idt.h +++ b/cpu/x86/init/common/idt.h @@ -32,8 +32,12 @@ #define IDT_H #include +#include "prot-domains.h" -void idt_init(void); -void idt_set_intr_gate_desc(int intr_num, uint32_t offset); +void idt_init(void) ATTR_CODE_BOOT; +void idt_set_intr_gate_desc(int intr_num, + uint32_t offset, + uint16_t cs, + uint16_t dpl); #endif /* IDT_H */ diff --git a/cpu/x86/init/common/interrupt.h b/cpu/x86/init/common/interrupt.h index 601695bde..10b906be4 100644 --- a/cpu/x86/init/common/interrupt.h +++ b/cpu/x86/init/common/interrupt.h @@ -32,10 +32,17 @@ #define INTERRUPT_H #include +#include "gdt-layout.h" #include "idt.h" struct interrupt_context { + /* The general-purpose register values are saved by the pushal instruction in + * the interrupt dispatcher. Having access to these saved values may be + * useful in some future interrupt or exception handler, and saving and later + * restoring them also enables the ISR to freely overwrite the EAX, ECX, and + * EDX registers as is permitted by the cdecl calling convention. + */ uint32_t edi; uint32_t esi; uint32_t ebp; @@ -44,16 +51,28 @@ struct interrupt_context { uint32_t edx; uint32_t ecx; uint32_t eax; + /* These two values are pushed on the stack by the CPU when it delivers an + * exception with an associated error code. Currently, only the double fault + * handler accepts this structure as a parameter, and that type of exception + * does have an associated error code. + */ uint32_t error_code; uint32_t eip; + /* The CPU pushes additional values beyond these on the stack, specifically + * the code segment descriptor and flags. If a privilege-level change occurs + * during delivery, the CPU additionally pushes the stack pointer and stack + * segment descriptor. + */ }; -#define ISR_STUB(label_str, has_error_code, handler_str) \ +#define ISR_STUB(label_str, has_error_code, handler_str, exc) \ "jmp 2f\n\t" \ ".align 4\n\t" \ label_str ":\n\t" \ " pushal\n\t" \ + PROT_DOMAINS_ENTER_ISR(exc) \ " call " handler_str "\n\t" \ + PROT_DOMAINS_LEAVE_ISR(exc) \ " popal\n\t" \ " .if " #has_error_code "\n\t" \ " add $4, %%esp\n\t" \ @@ -72,6 +91,14 @@ struct interrupt_context { * void handler(void) * Otherwise, it should be: * void handler(struct interrupt_context context) + * exc: 0 if this is an interrupt, which should be handled + * at the interrupt privilege level. 1 if this is an + * exception, which should be handled at the + * exception privilege level. + * dpl: Privilege level for IDT descriptor, which is the + * numerically-highest privilege level that can + * generate this interrupt with a software interrupt + * instruction. * * Since there is no easy way to write an Interrupt Service Routines * (ISR) in C (for further information on this, see [1]), we provide @@ -81,18 +108,30 @@ struct interrupt_context { * * [1] http://wiki.osdev.org/Interrupt_Service_Routines */ -#define SET_INTERRUPT_HANDLER(num, has_error_code, handler) \ - do { \ - __asm__ __volatile__ ( \ - "push $1f\n\t" \ - "push %0\n\t" \ - "call %P1\n\t" \ - "add $8, %%esp\n\t" \ - ISR_STUB("1", has_error_code, "%P2") \ - :: "g" (num), "i" (idt_set_intr_gate_desc), "i" (handler) \ - : "eax", "ecx", "edx" \ - ); \ +#define SET_INT_EXC_HANDLER(num, has_error_code, handler, exc, dpl) \ + do { \ + __asm__ __volatile__ ( \ + "pushl %[_dpl_]\n\t" \ + "pushl %[_cs_]\n\t" \ + "pushl $1f\n\t" \ + "pushl %[_isr_num_]\n\t" \ + "call idt_set_intr_gate_desc\n\t" \ + "add $16, %%esp\n\t" \ + ISR_STUB("1", has_error_code, "%P[_handler_]", exc) \ + : \ + : [_isr_num_] "g" (num), \ + [_handler_] "i" (handler), \ + [_cs_] "i" (exc ? GDT_SEL_CODE_EXC : GDT_SEL_CODE_INT), \ + [_dpl_] "i" (dpl) \ + /* the invocation of idt_set_intr_gate_desc may clobber */ \ + /* the caller-saved registers: */ \ + : "eax", "ecx", "edx" \ + ); \ } while (0) +#define SET_INTERRUPT_HANDLER(num, has_error_code, handler) \ + SET_INT_EXC_HANDLER(num, has_error_code, handler, 0, PRIV_LVL_INT) +#define SET_EXCEPTION_HANDLER(num, has_error_code, handler) \ + SET_INT_EXC_HANDLER(num, has_error_code, handler, 1, PRIV_LVL_EXC) /* Disable maskable hardware interrupts */ #define DISABLE_IRQ() \ diff --git a/cpu/x86/mm/README.md b/cpu/x86/mm/README.md new file mode 100644 index 000000000..8990beec9 --- /dev/null +++ b/cpu/x86/mm/README.md @@ -0,0 +1,669 @@ +X86 Lightweight Protection Domain Support for Contiki +===================================================== + +Introduction +------------ + +The X86 port of Contiki implements a simple, lightweight form of +protection domains using a pluggable framework. Currently, the +following plugin is available: + + - Flat memory model with paging. + +For an introduction to paging and possible ways in which it can be +used, refer to the following resources: + + - Intel Combined Manual (Intel 64 and IA-32 Architectures Software + Developer's Manual), Vol. 3, Chapter 4 + - Programming the 80386, by John H. Crawford and Patrick + P. Gelsinger, Chapter 5 + +The overall goal of a protection domain implementation within this +framework is to define a set of resources that should be accessible to +each protection domain and to prevent that protection domain from +accessing other resources. The details of each implementation of +protection domains may differ substantially, but they should all be +guided by the principle of least privilege [1]. However, that +idealized principle is balanced against the practical objectives of +limiting the number of relatively time-consuming context switches and +minimizing changes to existing code. In fact, no changes were made to +code outside of the CPU- and platform-specific code directories for +the initial plugin. + +Each protection domain can optionally be associated with a metadata +and/or MMIO region. The hardware can support additional regions per +protection domain, but that would increase complexity and is unneeded +for the existing protection domains. + +After boot, all code runs in the context of some protection domain. +Two default protection domains are implemented: + +- kern: Kernel protection domain that is more privileged than any + other protection domain. As little code as possible should be placed + in this protection domain. +- app: Application protection domain used whenever special privileges + are not required. + +Additional protection domains are defined as needed. For example, +each driver may reside in a separate protection domain, although not +all drivers require additional privileges beyond those available in +the relevant scheduling context in the app protection domain. The +Ethernet and UART drivers are assigned separate protection domains. +Non-driver protection domains can also be defined. Other drivers only +require access to programmed IO ports accessible via the IN* and OUT* +instructions, and such drivers do not require separate protection +domains. They run in the Contiki preemptive scheduling context and +the kernel protection domain, both of which are granted access to all +IO ports. + +Each protection domain may have associated system calls. A system +call transfers control from a client protection domain to a defined +entrypoint in a server protection domain. As their name suggests, +system calls adhere to a synchronous call-return model (rather than +some alternative such as an asynchronous message-passing model). To +invoke a system call, the client provides two identifiers to the +system call dispatcher. The first identifies the server domain and +the second identifies the system call to be invoked. The protection +domain implementation should associate allowable system calls with +particular server protection domains and reject any system call +requests that are not within that set of allowable system calls. The +system call implementations do not restrict the clients that are +permitted to invoke each system call. No modifications that the +client can make to the server domain and system call identifiers can +open up new entrypoints into the server domain. The entrypoints are +fixed at boot time. + +However, if the identifiers were stored in shared memory, it may be +possible for a protection domain to influence the system calls issued +by some other protection domain, which may be undesirable. Thus, the +server domain identifiers are stored in memory that can only be +written by the kernel protection domain and the system call +identifiers are embedded in the code. + +The system call dispatcher is responsible for reconfiguring the system +to enforce the appropriate resource access controls for the server +protection domain. It should then transfer control to the approved +entrypoint for the requested system call. + +Contiki defines a process concept that is orthogonal to protection +domains [2]. A single Contiki process may run code in multiple +protection domains at various points in time. Contiki processes run +in a cooperative scheduling context. Contiki also defines a +preemptive scheduling context for interrupt handlers and real-time +timers. When protection domain support is enabled, interrupts are +only enabled when the application protection domain is active and is +running code in the cooperative scheduling context. Code running in +the preemptive context may also invoke multiple protection domains. +Contiki can also support preemptive multithreading, but support for +that has not yet been added to the X86 port so we do not discuss it +further. + +A single stack is shared by all code that runs in the cooperative +scheduling context in all protection domains, and separate stacks are +defined for short interrupt dispatchers in the preemptive scheduling +context and for exception handlers and software system call +dispatchers. Except for the interrupt dispatchers, code in the +preemptive scheduling context also shares the same stack with the +cooperative scheduling context. All protection domains also share a +main data section, so similar considerations are also relevant to +that. + +Introducing multi-core support would complicate things further, since +another core running a protection domain that the first core never +invoked could access data from the protection domain on the first +core. It may be possible to adequately address such concerns by +allocating per-core stacks. + +Note that this stack arrangement means that a given protection domain +may read and write data written to the stack by some other protection +domain. For example, a protection domain B may push data onto the +stack and later pop that data off of the stack, but a protection +domain A that invoked protection domain B may still be able to read +the data that was pushed and popped to and from the stack, since +popping the data off of the stack does not automatically erase that +stack memory location. Another possibility is that protection domain +B may modify a stack entry pushed by protection domain A before it +invoked protection domain B, and protection domain A may later use the +modified value. Permitting legitimate accesses to callers' stacks is +in fact the primary motivation for this stack arrangement, in that it +makes it simple for A to pass data to and from B (on the shared stack) +when requesting services from B. A system call invocation is nearly +transparent to the developer, appearing almost identical to an +ordinary function call. However, B can access any data on the stack. +The third case is that A can read data placed on the stack by B after +B returns, unless B wipes that data from the stack before returning. +A related sub-case is that if an interrupt handler is invoked, it +pushes the current contents of the general-purpose registers onto the +stack, which may then be revealed to other protection domains besides +the one that was interrupted. However, interrupts are only actually +enabled in the application protection domain. + +Similarly, register contents may be accessed and modified across +protection domain boundaries. + +For the reasons described above, each protection domain should only +invoke other protection domains that it trusts to properly handle data +on the stack. + +Design +------ + +### Boot Process + +The system boots in the following phases. + +#### UEFI Bootstrap + +Primary implementation sources: + + - cpu/x86/uefi/bootstrap_uefi.c + +When the OS is compiled as a UEFI binary, a short bootstrap phase that +is UEFI-compliant is run initially. It simply performs a minimal set +of functions to exit the UEFI boot services and then transfer control +to the Multiboot bootstrap phase. + +#### Multiboot Bootstrap + +Primary implementation sources: + + - cpu/x86/bootstrap_quarkX1000.S + +This phase disables interrupts, sets the stack pointer to the top of +the main stack, and then invokes boot stage 0. + +#### Boot Stage 0 + +Primary implementation sources: + + - cpu/x86/init/common/cpu.c + - cpu/x86/init/common/gdt.c + +The UEFI firmware or Multiboot-compliant bootloader should have +configured an initial Global Descriptor Table (GDT) with flat segments +and configured the CPU to operate in protected mode with paging +disabled. Flat segments each map the whole 4GiB physical memory +space. This is the state of the system when the OS enters boot stage +0. This stage is responsible for setting up a new GDT and loading the +segment registers with the appropriate descriptors from the new GDT to +enable boot stage 1 to run. + +#### Boot Stage 1 + +Primary implementation sources: + + - cpu/x86/init/common/cpu.c + - cpu/x86/init/common/idt.c + - cpu/x86/mm/prot-domains.c + +Boot stage 1 intializes the Interrupt Descriptor Table (IDT) and +installs a handler for double-fault exceptions. Handlers for +additional interrupts and exceptions are installed later in boot +stages 1 and 2. + +This stage also initializes protection domain support and enters the +kernel protection domain. + +#### Boot Stage 2 + +Primary implementation sources: + + - cpu/x86/init/common/cpu.c + - platform/galileo/contiki-main.c + +The entrypoint for the kernel protection domain is 'main'. Boot stage +2 initializes hardware devices and associated interrupts. It then +transfers control to the application protection domain. Note that +this is a transfer of control, not a call that would be matched with +some future return. This is an important distinction, because +protection domains are not reentrant. Thus, if the kernel protection +domain called the application protection domain, it would not be +possible to invoke any kernel system calls until the system is reset, +since the application protection domain never exits/returns while the +system is running. There are not actually any kernel system calls +provided in the initial implementation of protection domains, but they +may be added in the future. + +The core protection domain configuration (e.g. allowable system calls +and entrypoints, registered protection domains, etc.) is frozen by the +conclusion of boot stage 2 to help prevent erroneous changes that +could reduce the robustness of the system. The way that it is frozen +is that there are no kernel system calls that are intended to permit +changes to the core protection domain configuration. Thus, once the +kernel protection domain has exited, the only way the core protection +domain configuration can change would be due to undesirable memory +manipulations (e.g. due to a faulty device driver). + +#### Boot Stage 3 + +Primary implementation sources: + + - platform/galileo/contiki-main.c + +Boot stage 3 performs initialization procedures that are less +tightly-coupled to hardware. For example, it launches Contiki +processes and invokes Contiki configuration routines. + +### Privilege Levels + +When protection domain support is inactive, all code runs at +ring/privilege level 0. When protection domain support is active, +only exception handlers and system call dispatchers (including +dispatchers for system call returns) run at ring level 0. Code in the +preemptive scheduling context runs at ring level 2 and code in the +cooperative scheduling context runs at ring level 3. Ring levels with +higher numbers are less privileged than those with lower numbers. +Ring level 1 is unused. + +### IO and Interrupt Privileges + +The kernel protection domain cooperative scheduling context needs +access to IO ports, for device initialization. Other protection +domains may also require such access. The IO Privilege Level (IOPL) +that is assigned to a protection domain using the relevant bits in the +EFLAGS field could be set according to whether IO port access is +required in that protection domain. However, this would introduce +additional complexity and overhead in the critical system call and +return dispatchers. Instead, the IOPL is always set to block IO +access from the cooperative scheduling context. Port IO instructions +in that context will then generate general protection faults, and the +exception handler decodes and emulates authorized port IO +instructions. + +Interrupts are handled at ring level 2, since they do not use any +privileged instructions. They do cause the interrupt flag to be +cleared as they are delivered. The interrupt flag can only be +modified by instructions executing at a ring level that is numerically +less than or equal to the IOPL. Each interrupt handler needs to set +the interrupt flag using the IRET instruction when it returns. +Protection domains that require access to port IO (currently just the +kernel protection domain) are configured with an IOPL of 3 whereas +others are configured with an IOPL of 2. That is why interrupts are +configured to run at ring level 2. Interrupts are only enabled in the +application protection domain. + +Some interrupt handlers require access to port IO, and all are +permitted such access, since they need it anyway for restoring the +interrupt flag when returning. IO port access is a very powerful +privilege, since it can be used to remap MMIO regions of PCI devices, +reconfigure PCI devices, etc. Thus, further restricting access to IO +ports may improve the robustness of the system, but would increase +complexity and space requirements and possibly necessitate additional +context switches, since IO port access is controlled by the combined +settings of IOPL as well as an optional IO bitmap in the TSS. + +### Interrupt and Exception Dispatching + +Primary implementation sources: + - cpu/x86/init/common/interrupt.h + +Separate stacks are allocated for dispatching interrupts and +exceptions. However, to save space, the main bodies of some interrupt +and exception handlers are run on the main stack. A handler may +expect to have access to data from the interrupt or exception stack, +so the interrupt or exception dispatcher copies that data prior to +pivoting to the main stack and executing the handler. + +### Protection Domain Control Structures (PDCSes) + +Each protection domain is managed by the kernel and privileged +functions using a PDCS. The PDCS structure is entirely +software-defined. The initial protection domain plugin does not +support re-entrant protection domains to simplify the implementation +of the plugin by enabling domain-specific information (e.g. system +call return address) to be trivially stored in each PDCS. + +### Paging-Based Protection Domains + +Primary implementation sources: + + - cpu/x86/mm/paging-prot-domains.c + - cpu/x86/mm/syscalls-int.c + - cpu/x86/mm/syscalls-int-asm.S + +#### Introduction + +Only a single page table is used for all protection domains. A flat +memory model is used. Almost all linear-to-physical address mappings +are identity mappings, with the exceptions being the MMIO and metadata +regions. The X86 port of Contiki currently only supports at most one +MMIO and one metadata range per driver, and the paging-based +protection domain implementation always starts at particular linear +addresses when mapping an MMIO or metadata range. This may reduce +overhead, due to the way protection domain switches are implemented. + +#### System Call and Return Dispatching + +The system call dispatcher executes at ring level 0, since it uses the +privileged INVLPG or MOV CR3 instructions to invalidate TLB entries. +The dispatcher modifies page table entries to grant only the +permissions required by the protection domain being activated. It +then optionally uses the INVLPG instruction to invalidate any TLB +entries for any page table entries that were modified. If INVLPG is +not used to invalidate specific TLB entries, then CR3 is reloaded to +invalidate the entire TLB (global entries would be excluded, but they +are not used in this implementation). + +It is more efficient to always start at a particular linear address +when mapping an MMIO or metadata region, since the page table entries +for that region can be updated to unmap any previous region of that +type, map the new region, and then invalidated to cause the new +settings to take effect. The alternative using an identity +linear-to-physical address mapping for regions would be to unmap the +previous region by editing one set of page table entries and to then +map the new region by editing a different set of page table entries +and to finally perform invalidations for both sets of page table +entries. Another drawback of such an identity address mapping is that +additional page tables may need to be allocated to represent the +various MMIO regions, since page tables are indexed by linear address +and MMIO regions are often at high physical addresses. Note that this +is specific to MMIO regions, since metadata regions are not at +particularly high physical addresses. Additionally, if different base +linear addresses are used, it is necessary to communicate those to the +system call handler code so that the regions can be accessed. This +would require care to prevent an adversary from manipulating the +addresses and it may increase complexity. + +The overall process of handling a system call can be illustrated at a +high level as follows. Some minor steps are omitted in the interest +of clarity and brevity. + +``` + == BEGIN Client protection domain ========================================== + -- BEGIN Caller ------------------------------------------------------------ + 1. Call system call stub. + -- + 20. Continue execution... + -- END Caller -------------------------------------------------------------- + -- BEGIN System call stub -------------------------------------------------- + 2. Already in desired (server) protection domain? + - No: Issue software interrupt #100 to request system call. + - Yes: Jump to system call body. + -- END System call stub ---------------------------------------------------- + == END Client protection domain ============================================ + == BEGIN Ring level 0 ====================================================== + -- BEGIN System call dispatcher--------------------------------------------- + 3. Check that the requested system call is allowed. Get entrypoint. + 4. Check that the server protection domain is available (not yet present + in the protection domain call stack) and then mark it as busy. + 5. Save the caller return address from the main stack into the client + PDCS. + 6. Overwrite the caller return address on the main stack to point to + system call return stub. + 7. Push server protection domain onto protection domain call stack. + 8. Update the interrupt return stack EIP to start of system call body. + 9. Update and invalidate page table entries to grant only the permissions + required by the server protection domain. + 10. Update interrupt flag to disable interrupts, since interrupts are only + enabled in app protection domain, which exports no system calls. + 11. Perform interrupt return (IRET). + -- END System call dispatcher ---------------------------------------------- + -- BEGIN System call return dispatcher ------------------------------------- + 15. Mark protection domain on top of protection domain call stack as + available. + 16. Retrieve the caller return address from the kernel data structure for + the client protection domain and use it to overwrite the EIP in the + interrupt return stack. + 17. Update and invalidate page table entries to grant only the permissions + required by the client protection domain. + 18. Update interrupt flag to only enable interrupts if returning to app + protection domain cooperative scheduling context. + 19. Perform interrupt return (IRET). + -- END System call dispatcher ---------------------------------------------- + == END Ring level 0 ======================================================== + == BEGIN Server protection domain ========================================== + -- BEGIN System call body -------------------------------------------------- + 12. Execute the work for the requested system call. + 13. Return (to system call return stub, unless invoked from server + protection domain, in which case return is to caller). + -- END System call body ---------------------------------------------------- + -- BEGIN System call return stub ------------------------------------------- + 14. Issue software interrupt #101 to request system call return. + -- END System call return stub --------------------------------------------- + == END Server protection domain ============================================ +``` + +The first step in performing a system call is to invoke a system call +stub that actually issues the software interrupt to request a system +call dispatch. This approach reduces disruption to existing code, +since macros are used to generate separate stubs and corresponding +system call bodies with a single system call signature definition. + +#### Memory Layout + +The approximate memory layout of the system is depicted below, +starting with the highest physical addresses and proceeding to lower +physical addresses. Optional permissions are denoted with +parentheses. See cpu/x86/quarkX1000_paging.ld for details of how this +memory layout is implemented. + +``` + | Kernel | App | Other | + ... +--------+--------+--------+ + +------------------------------------------+ | | | | + | Domain X MMIO | | | | (RW) | + +------------------------------------------+ | | | | + ... | | | | + +------------------------------------------+ | | | | + | Domain X DMA-accessible metadata | | | | (RW) | + | (section .dma_bss) | | | | | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Domain X metadata (section .meta_bss) | | | | (RW) | + +------------------------------------------+ | | | | + ... | | | | + +------------------------------------------+ | | | | + | Kernel-private data | | RW | | | + | (sections .prot_dom_bss, .gdt_bss, etc.) | | | | | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | System call data (section .syscall_bss) | | RW | R | R | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Kernel-owned data (section .kern_bss) | | RW | R | R | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Page-aligned, Kernel-owned data | | RW | R | R | + | (section .page_aligned_kern_bss) | | | | | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Common data | | RW | RW | RW | + | (sections .data, .rodata*, .bss, etc.) | | | | | + +------------------------------------------+ | | | | + (not-present guard band page) | | | | + +------------------------------------------+ | | | | + | Exception stack | | RW | RW | RW | + | (section .exc_stack) | | | | | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Interrupt stack | | RW | RW | RW | + | (section .int_stack) | | | | | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Main stack (section .main_stack) | | RW | RW | RW | + +------------------------------------------+ | | | | + (not-present guard band page) | | | | + +------------------------------------------+ | | | | + | Main code (.text) | | RX | RX | RX | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Bootstrap code (section .boot_text) | | | | | + +------------------------------------------+ | | | | + +------------------------------------------+ | | | | + | Multiboot header | | | | | + +------------------------------------------+ | | | | + ... +``` + +The only protection domain that is permitted to access kernel-owned +data is the kernel protection domain. Some devices can also be +instructed to perform DMA to kernel-owned data, although that is an +incorrect configuration. + +Paging only differentiates between memory accesses from ring 3 (user +level) and those from rings 0-2 (supervisor level). To avoid granting +code running in the preemptive scheduling context supervisory write +access to kernel data structures (including the page tables), those +structures are marked read-only (except when the kernel protection +domain is active) and the Write Protect (WP) bit in Control Register 0 +(CR0) is cleared only when it is necessary to update a write-protected +structure. Only ring 0 is allowed to modify CR0. + +Optional metadata for each protection domain is intended to only be +accessible from the associated protection domain and devices. + +Read accesses to executable code have not been observed to be needed +in at least a limited set of tests, but they are permitted, since +paging does not support an execute-only permission setting. On the +other hand, the Execute-Disable feature is used to prevent execution +of non-code memory regions. All non-startup code is mapped in all +protection domains. Limiting the code that is executable within each +protection domain to just the code that is actually needed within that +protection domain could improve the robustness of the system, but it +is challenging to determine all code that may be needed in a given +protection domain (e.g. all needed library routines). + +Stack accesses to non-stack memory are not needed, but they are +permitted. However, one page of unmapped linear address space is +placed above and below the stacks to detect erroneous stack accesses +to those linear address regions, which are the types of accesses most +likely to occur during a stack overflow or underflow condition. The +main stack is placed just below the interrupt stack, which is just +below the exception stack. Stack overflows are more common than stack +underflows, which motivates arranging the stacks such that an overflow +from a less-critical stack will not affect a more-critical stack. +Furthermore, the main stack is the most likely to overflow, since the +code that uses it is typically the most voluminous and difficult to +characterize. That provides additional motivation for positioning it +such that an overflow results in an immediate page fault. An +alternative design placing each stack on a separate group of +contiguous pages may improve the robustness of the system by +permitting the insertion of unmapped guard pages around them to +generate page faults in the event an overflow or underflow occurs on +any stack. However, that would consume additional memory. + +Data in the .rodata sections is marked read/write, even though it may +be possible to improve the robustness of the system by marking that +data as read-only. Doing so would introduce additional complexity +into the system. + +### Pointer Validation + +Primary implementation sources: + - cpu/x86/mm/syscalls.h + +At the beginning of each system call routine, it is necessary to check +that any untrusted pointer that could have been influenced by a caller +(i.e. a stack parameter or global variable) refers to a location above +the return address and to halt otherwise. This is to prevent a +protection domain from calling a different protection domain and +passing a pointer that references a location in the callee's stack +other than its parameters to influence the execution of the callee in +an unintended manner. For example, if an incoming pointer referenced +the return address, it could potentially redirect execution with the +privileges of the callee protection domain. + +It is also necessary to check that the pointer is either within the +stack region or the shared data region (or a guard band region, since +that will generate a fault) to prevent redirection of data accesses to +MMIO or metadata regions. + +The pointer is both validated and copied to a new storage location, +which must be within the callee's local stack region (excluding the +parameter region). This is to mitigate scenarios such as two pointers +being validated and an adversary later inducing a write through one of +the pointers to the other pointer to corrupt the latter pointer before +it is used. + +Any pointer whose value is fixed at link or load time does not need to +be validated prior to use, since no adversary within the defined +threat model is able to influence the link or load process. + +### DMA Restrictions + +Primary implementation sources: + - cpu/x86/drivers/quarkX1000/imr.c + - cpu/x86/drivers/quarkX1000/imr-conf.c + +The CPU is not the only agent with the ability to issue requests to +the interconnect within the SoC. For example, SoC peripherals such as +the Ethernet driver use DMA to efficiently access memory buffers. +This could introduce a risk that DMA could be used to bypass the +memory protections enforced on the CPU by segmentation or paging. For +example, a device driver could instruct a device to access a memory +region to which the kernel has not granted the driver's protection +domain permission to access. + +The Isolated Memory Region (IMR) feature is configured to restrict the +memory that can be accessed by system agents other than the CPU [3]. +It only allows those system agents to access portions of the Contiki +memory space that are specifically intended to be used with DMA. The +source code for each protection domain specifies that its optional +metadata region needs to be accessible from other system agents +besides the CPU by using ATTR_BSS_DMA instead of ATTR_BSS_META when +allocating storage for the metadata. + +Extending the Framework +----------------------- + +### Adding a New Protection Domain + +The following steps are required. See the existing device drivers for +examples of various types of protection domains and how they are +initialized. + + - Allocate storage for the PDCS and the corresponding + client-accessible data structure using the PROT_DOMAINS_ALLOC + macro. + - Apply the ATTR_BSS_META attribute to the metadata structure, if + applicable. Apply the ATTR_BSS_DMA attribute instead if the + metadata structure needs to be DMA-accessible. Pad the metadata + structure to completely fill an integer multiple of the minimum + page size, 4096, when paging-based protection domains are in use. + See the definition of quarkX1000_eth_meta_t for an example. + - Perform the following steps during boot stage 2: + - Initialize the protection domain ID in the client-accessible data + structure using the PROT_DOMAINS_INIT_ID macro. + - Register the domain. See prot-domains.c:prot_domains_init for an + example of registering a non-driver protection domain. See + cpu/x86/drivers/quarkX1000/eth.c:quarkX1000_eth_init for an + example of registering a PCI driver protection domain with an + MMIO region and a metadata region. + +### Adding a New System Call + +The following steps are required: + + - Define the system call procedure using the SYSCALLS_DEFINE or + SYSCALLS_DEFINE_SINGLETON macro. See + cpu/x86/drivers/legacy_pc/uart-16x50.c:uart_16x50_tx for an example + of a non-singleton system call. See + cpu/x86/drivers/quarkX1000/eth.c:quarkX1000_eth_send for an example + of a singleton system call. A singleton system call is one for + which at most one server protection domain will be associated with + it. + - During boot phase 2, associate the system call with one or more + server protection domains using the SYSCALLS_AUTHZ macro. + +Usage +----- + +To enable protection domain support, add +"X86_CONF_PROT_DOMAINS=paging" to the command line. + +The paging option accepts a sub-option to determine whether the TLB is +fully- or selectively-invalidated during protection domain switches. +By default, full invalidation is selected. Set the +X86_CONF_USE_INVLPG variable to 1 to override the default. + +References +---------- + +[1] J. H. Saltzer, "Protection and the Control of Information Sharing + in Multics," Commun. ACM, vol. 17, no. 7, pp. 388-402, Jul. 1974. + +[2] https://github.com/contiki-os/contiki/wiki/Processes + +[3] "Intel(R) Quark(TM) SoC X1000 Secure Boot Programmer's Reference + Manual," + http://www.intel.com/support/processors/quark/sb/CS-035228.htm diff --git a/cpu/x86/mm/gdt-layout.h b/cpu/x86/mm/gdt-layout.h new file mode 100644 index 000000000..8a5af6cbf --- /dev/null +++ b/cpu/x86/mm/gdt-layout.h @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_GDT_LAYOUT_H_ +#define CPU_X86_MM_GDT_LAYOUT_H_ + +#include "prot-domains.h" + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING +/** + * Number of fixed GDT descriptors. Additional descriptors may be defined + * outside of gdt.c. + */ +#define GDT_NUM_FIXED_DESC 7 +#else +#define GDT_NUM_FIXED_DESC 3 +#endif + +#define GDT_IDX_NULL 0 +/** + * Flat code segment, used at boot and also for the rest of the system's + * runtime when protection domains are disabled + */ +#define GDT_IDX_CODE_FLAT 1 +/** + * Flat data segment, used at boot and also for the rest of the system's + * runtime when protection domains are disabled + */ +#define GDT_IDX_DATA_FLAT 2 + +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE +/** Default (post-boot) code segment */ +#define GDT_IDX_CODE 3 +/** + * Same bounds and permissions as default code segment, but at the interrupt + * handler privilege level + */ +#define GDT_IDX_CODE_INT 4 +/** Stack segment for interrupt handlers */ +#define GDT_IDX_STK_INT 5 + +#define GDT_IDX_CODE_EXC GDT_IDX_CODE_FLAT +/** Default data segment used by code at all privilege levels */ +#define GDT_IDX_DATA 6 +#define GDT_IDX_STK GDT_IDX_DATA +#define GDT_IDX_STK_EXC GDT_IDX_DATA_FLAT +#else +#define GDT_IDX_CODE GDT_IDX_CODE_FLAT +#define GDT_IDX_CODE_INT GDT_IDX_CODE_FLAT +#define GDT_IDX_CODE_EXC GDT_IDX_CODE_FLAT +#define GDT_IDX_DATA GDT_IDX_DATA_FLAT +#define GDT_IDX_STK GDT_IDX_DATA_FLAT +#define GDT_IDX_STK_INT GDT_IDX_DATA_FLAT +#define GDT_IDX_STK_EXC GDT_IDX_DATA_FLAT +#endif + +#define GDT_SEL(idx, rpl) (((idx) << 3) | (rpl)) + +#define DT_SEL_GET_IDX(sel) ((sel) >> 3) + +#define DT_SEL_GET_RPL(sel) ((sel) & 3) + +#define GDT_SEL_NULL GDT_SEL(GDT_IDX_NULL, 0) +#define GDT_SEL_CODE_FLAT GDT_SEL(GDT_IDX_CODE_FLAT, PRIV_LVL_EXC) +#define GDT_SEL_DATA_FLAT GDT_SEL(GDT_IDX_DATA_FLAT, PRIV_LVL_EXC) + +#define GDT_SEL_CODE GDT_SEL(GDT_IDX_CODE, PRIV_LVL_USER) +#define GDT_SEL_CODE_INT GDT_SEL(GDT_IDX_CODE_INT, PRIV_LVL_INT) +#define GDT_SEL_CODE_EXC GDT_SEL(GDT_IDX_CODE_EXC, PRIV_LVL_EXC) + +#define GDT_SEL_DATA GDT_SEL(GDT_IDX_DATA, PRIV_LVL_EXC) + +#define GDT_SEL_STK GDT_SEL(GDT_IDX_STK, PRIV_LVL_USER) +#define GDT_SEL_STK_INT GDT_SEL(GDT_IDX_STK_INT, PRIV_LVL_INT) +#define GDT_SEL_STK_EXC GDT_SEL(GDT_IDX_STK_EXC, PRIV_LVL_EXC) + +#endif /* CPU_X86_MM_GDT_LAYOUT_H_ */ + diff --git a/cpu/x86/mm/paging-prot-domains.c b/cpu/x86/mm/paging-prot-domains.c new file mode 100644 index 000000000..6c28c03e2 --- /dev/null +++ b/cpu/x86/mm/paging-prot-domains.c @@ -0,0 +1,297 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "dma.h" +#include "gdt.h" +#include "gdt-layout.h" +#include "helpers.h" +#include "idt.h" +#include "paging.h" +#include "prot-domains.h" +#include "segmentation.h" +#include "stacks.h" +#include "syscalls.h" +#include "tss.h" + +/*#define DBG_PAGE_ALLOC*/ + +/* Enable PAE-mode paging */ +#define CR4_PAE BIT(5) + +/* Extended Feature Enables MSR */ +#define MSR_EFER 0xC0000080 + +/* Enable Execute Disable bit support */ +#define EFER_NXE BIT(11) + +/* Root page-directory-pointer table */ +static pdpt_t root_pgtbl __attribute__((aligned(32))) ATTR_BSS_KERN; +/* Although the following page tables must be page-aligned, it is infeasible to + * apply the "aligned(4096)" attribute for the reasons described in the linker + * script. + */ +/* Second-level page directory */ +static page_table_t + second_lvl_pgtbl ATTR_BSS_KERN_PAGE_ALIGNED; +/* Leaf-level page table */ +static page_table_t leaf_pgtbl ATTR_BSS_KERN_PAGE_ALIGNED; + +#define LINEAR_ADDR_BOUND (MIN_PAGE_SIZE * ENTRIES_PER_PAGE_TABLE) + +/*---------------------------------------------------------------------------*/ +void +prot_domains_reg(dom_client_data_t *dcd, + uintptr_t mmio, + size_t mmio_sz, + uintptr_t meta, + size_t meta_sz, + bool pio) +{ + dom_id_t dom_id = dcd->dom_id; + volatile struct dom_kern_data *dkd = + prot_domains_kern_data + dom_id; + + /* All addresses and sizes must be page-aligned */ + if((PROT_DOMAINS_ACTUAL_CNT <= dom_id) || + ((mmio & (MIN_PAGE_SIZE - 1)) != 0) || + ((mmio_sz & (MIN_PAGE_SIZE - 1)) != 0) || + ((meta & (MIN_PAGE_SIZE - 1)) != 0) || + ((meta_sz & (MIN_PAGE_SIZE - 1)) != 0) || + (PROT_DOMAINS_MAX_MMIO_SZ < mmio_sz) || + (LINEAR_ADDR_BOUND < (PROT_DOMAINS_META_LINEAR_BASE + meta_sz))) { + halt(); + } + + if((dkd->flags & PROT_DOMAINS_FLAG_INITED) == PROT_DOMAINS_FLAG_INITED) { + halt(); + } + + dkd->mmio = mmio; + dkd->mmio_sz = mmio_sz; + dkd->meta = meta; + dkd->meta_sz = meta_sz; + dkd->flags = PROT_DOMAINS_FLAG_INITED; + if(pio) { + dkd->flags |= PROT_DOMAINS_FLAG_PIO; + } +} +/*---------------------------------------------------------------------------*/ +static void __attribute__((regparm(3))) +set_ptes(uintptr_t start_la, uintptr_t start_pa, uintptr_t end_pa, + pte_t template) +{ +#ifdef DBG_PAGE_ALLOC +#warning Checking page allocations at runtime. + + if(((start_la & (MIN_PAGE_SIZE - 1)) != 0) || + ((start_pa & (MIN_PAGE_SIZE - 1)) != 0) || + ((start_la & (MIN_PAGE_SIZE - 1)) != 0) || + ((end_pa & (MIN_PAGE_SIZE - 1)) != 0) || + (LINEAR_ADDR_BOUND <= (start_la + (end_pa - start_pa)))) { + halt(); + } +#endif + + while(start_pa < end_pa) { + template.addr = start_pa >> 12; + + leaf_pgtbl[start_la >> MIN_PAGE_SIZE_SHAMT] = template; + +#ifdef X86_CONF_USE_INVLPG + __asm__("invlpg %0" :: "m" (*(uint8_t *)start_la)); +#endif + + start_la += MIN_PAGE_SIZE; + start_pa += MIN_PAGE_SIZE; + } +} +/*---------------------------------------------------------------------------*/ +static void __attribute__((fastcall)) +set_ptes_identity_map(uintptr_t start_pa, uintptr_t end_pa, pte_t template) +{ + set_ptes(start_pa, start_pa, end_pa, template); +} +/*---------------------------------------------------------------------------*/ +static inline uint32_t __attribute__((always_inline)) +prot_domains_switch(dom_id_t from_id, dom_id_t to_id, + interrupt_stack_t *intr_stk) +{ + volatile dom_kern_data_t *from, *to; + + from = prot_domains_kern_data + from_id; + to = prot_domains_kern_data + to_id; + + if((from_id == DOM_ID_kern) || + (to_id == DOM_ID_kern)) { + pte_t to_kern_data_pte = { .raw = 0 }; + to_kern_data_pte.present = 1; + to_kern_data_pte.exec_disable = 1; + /* The kernel data region should always be accessible to supervisory code, + * but it is only accessible to user mode in the kernel protection domain. + */ + to_kern_data_pte.user_accessible = 1; + if(to_id == DOM_ID_kern) { + to_kern_data_pte.writable = 1; + } + + set_ptes_identity_map((uintptr_t)&_sbss_kern_addr, + (uintptr_t)&_ebss_syscall_addr, + to_kern_data_pte); + + if(to_id != DOM_ID_kern) { + to_kern_data_pte.user_accessible = 0; + to_kern_data_pte.writable = 0; + } + + set_ptes_identity_map((uintptr_t)&_ebss_syscall_addr, + (uintptr_t)&_ebss_kern_addr, + to_kern_data_pte); + } + + if(to->mmio_sz != 0) { + pte_t pte = { .raw = 0 }; + pte.present = 1; + pte.exec_disable = 1; + pte.user_accessible = 1; + pte.writable = 1; + /* disable caching of MMIO accesses */ + pte.pcd = 1; + + set_ptes(PROT_DOMAINS_MMIO_LINEAR_BASE, + to->mmio, + to->mmio + to->mmio_sz, + pte); + } + if(to->mmio_sz < from->mmio_sz) { + pte_t pte = { .raw = 0 }; + + set_ptes_identity_map(PROT_DOMAINS_MMIO_LINEAR_BASE + to->mmio_sz, + PROT_DOMAINS_MMIO_LINEAR_BASE + from->mmio_sz, + pte); + } + + if(to->meta_sz != 0) { + pte_t pte = { .raw = 0 }; + pte.present = 1; + pte.exec_disable = 1; + pte.user_accessible = 1; + pte.writable = 1; + + set_ptes(PROT_DOMAINS_META_LINEAR_BASE, + to->meta, + to->meta + to->meta_sz, + pte); + } + if(to->meta_sz < from->meta_sz) { + pte_t pte = { .raw = 0 }; + + set_ptes_identity_map(PROT_DOMAINS_META_LINEAR_BASE + to->mmio_sz, + PROT_DOMAINS_META_LINEAR_BASE + from->mmio_sz, + pte); + } + +#ifndef X86_CONF_USE_INVLPG + __asm__ __volatile__ ("mov %%cr3, %%eax\n\t" + "mov %%eax, %%cr3\n\t" ::: "eax"); +#endif + + return 0; +} +/*---------------------------------------------------------------------------*/ +void +prot_domains_gdt_init(void) +{ + gdt_copy_desc_change_dpl(GDT_IDX_DATA, GDT_IDX_DATA_FLAT, PRIV_LVL_USER); + gdt_copy_desc_change_dpl(GDT_IDX_STK_INT, GDT_IDX_STK_EXC, PRIV_LVL_INT); +} +/*---------------------------------------------------------------------------*/ +void +prot_domains_impl_init(void) +{ + pte_t pte = { .raw = 0 }; + + syscalls_int_init(); + + /* Initialize page table: */ + + pte.present = 1; + pte.addr = ((uint32_t)second_lvl_pgtbl) >> MIN_PAGE_SIZE_SHAMT; + + root_pgtbl[0] = pte; + + pte.writable = 1; + pte.user_accessible = 1; + pte.addr = ((uint32_t)leaf_pgtbl) >> MIN_PAGE_SIZE_SHAMT; + + second_lvl_pgtbl[0] = pte; + + /* Map code sections: */ + + pte.writable = 0; + set_ptes_identity_map((uintptr_t)&_stext_addr, (uintptr_t)&_etext_addr, pte); + + /* Map data sections: */ + + pte.writable = 1; + pte.exec_disable = 1; + set_ptes_identity_map((uintptr_t)stacks_main, + (uintptr_t)stacks_main + + STACKS_SIZE_MAIN + + STACKS_SIZE_EXC + + STACKS_SIZE_INT, + pte); + set_ptes_identity_map((uintptr_t)&_sdata_addr, (uintptr_t)&_edata_addr, pte); + + /* Enable XD bit support */ + __asm__ __volatile__ ("wrmsr" :: "c" (MSR_EFER), "a" (EFER_NXE), "d" (0)); + + /* Enable PAE */ + __asm__ __volatile__ ("mov %%cr4, %%eax\n\t" + "or %0, %%eax\n\t" + "mov %%eax, %%cr4\n\t" + : + : "r" (CR4_PAE) + : "eax"); + + /* Load CR3 */ + __asm__ __volatile__ ("mov %0, %%cr3" :: "r" (root_pgtbl)); +} +/*---------------------------------------------------------------------------*/ +uintptr_t +prot_domains_lookup_meta_phys_base(dom_client_data_t *drv) +{ + return prot_domains_kern_data[drv->dom_id].meta; +} +/*---------------------------------------------------------------------------*/ + +/* Enable inter-procedural optimization with procedures in the following file: + */ +#include "syscalls-int.c" diff --git a/cpu/x86/mm/paging-prot-domains.h b/cpu/x86/mm/paging-prot-domains.h new file mode 100644 index 000000000..0f7f54ea3 --- /dev/null +++ b/cpu/x86/mm/paging-prot-domains.h @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_PAGING_PROT_DOMAINS_H_ +#define CPU_X86_MM_PAGING_PROT_DOMAINS_H_ + +#include +#include +#include +#include "dma.h" +#include "helpers.h" +#include "paging.h" +#include "syscalls-int.h" + +struct dom_kern_data { + /** Base physical address of optional MMIO region */ + uintptr_t mmio; + /** Number of (contiguous) pages in MMIO region */ + size_t mmio_sz; + /** Base physical address of optional metadata region */ + uintptr_t meta; + /** Number of (contiguous) pages in metadata region */ + size_t meta_sz; + /** Flags are defined with the prefix PROT_DOMAINS_FLAG in prot-domains.h */ + uint32_t flags; + /** + * Original return address from call stack when this protection domain + * invoked some other protection domain. This serves to control the return + * entrypoint. The callee is not permitted to modify this value (unless the + * callee is the kernel protection domain). + */ + uintptr_t orig_ret_addr; + + /* align to next-larger power of 2 to enable usage of shifting instead of + * multiplication to index an array of these structures. + */ +} __attribute__((aligned(32))); + +/** Linear base address at which to map the MMIO region. */ +#define PROT_DOMAINS_MMIO_LINEAR_BASE (MIN_PAGE_SIZE + (uintptr_t)&_ebss_kern_addr) + +/** Maximum supported size of MMIO region */ +#define PROT_DOMAINS_MAX_MMIO_SZ 0x4000 + +/** Linear base address at which to map the metadata region */ +#define PROT_DOMAINS_META_LINEAR_BASE \ + (MIN_PAGE_SIZE + (PROT_DOMAINS_MMIO_LINEAR_BASE + PROT_DOMAINS_MAX_MMIO_SZ)) + +#define PROT_DOMAINS_META_OFF_TO_PHYS(off, meta_phys_base) \ + ((meta_phys_base) + ((off) - PROT_DOMAINS_META_LINEAR_BASE)) + +/** Any MMIO region mapping always starts at a particular linear address. */ +#define PROT_DOMAINS_MMIO(dcd) PROT_DOMAINS_MMIO_LINEAR_BASE +/** + * Any metadata region mapping always starts at a particular linear address. + */ +#define PROT_DOMAINS_META(dcd) PROT_DOMAINS_META_LINEAR_BASE + +#define PROT_DOMAINS_ENTER_ISR(exc) \ + PROT_DOMAINS_ENTER_ISR_COMMON(exc) +#define PROT_DOMAINS_LEAVE_ISR(exc) PROT_DOMAINS_LEAVE_ISR_COMMON(exc) + +/* Enable paging */ +#define CR0_PG BIT(31) +/* Enable write protection in supervisor mode */ +#define CR0_WP BIT(16) +/* Enable protected mode */ +#define CR0_PE BIT(0) + +/** + * \brief Enable or disable write protection enforcement in supervisor mode. + * When disabled, supervisory code (i.e. code running at ring levels + * 0-2) is permitted to write to pages that are marked read-only in + * page tables. + * + * \param en Set to true to enable write protection enforcement. + */ +static inline void prot_domains_set_wp(bool en) +{ + uint32_t cr0_val = CR0_PG | CR0_PE; + if(en) { + cr0_val |= CR0_WP; + } + __asm__ __volatile__ ("mov %0, %%cr0" :: "r"(cr0_val)); +} + +#endif /* CPU_X86_MM_PAGING_PROT_DOMAINS_H_ */ diff --git a/cpu/x86/mm/paging.h b/cpu/x86/mm/paging.h new file mode 100644 index 000000000..7882ceab2 --- /dev/null +++ b/cpu/x86/mm/paging.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_PAGING_H_ +#define CPU_X86_MM_PAGING_H_ + +#include + +/** + * Page table entry format for PAE mode page table. See Intel Combined Manual, + * Vol. 3, Section 4.4 for more details. + */ +typedef union pte { + struct { + uint64_t present : 1; + uint64_t writable : 1; + uint64_t user_accessible : 1; + uint64_t pwt : 1; /**< Specify write-through cache policy */ + uint64_t pcd : 1; /**< Disable caching */ + uint64_t accessed : 1; + uint64_t dirty : 1; + uint64_t : 5; + uint64_t addr : 51; + uint64_t exec_disable : 1; + }; + uint64_t raw; +} pte_t; + +#define ENTRIES_PER_PDPT 4 +#define ENTRIES_PER_PAGE_TABLE 512 + +typedef pte_t pdpt_t[ENTRIES_PER_PDPT]; +typedef pte_t page_table_t[ENTRIES_PER_PAGE_TABLE]; + +#define MIN_PAGE_SIZE_SHAMT 12 +#define MIN_PAGE_SIZE (1 << MIN_PAGE_SIZE_SHAMT) + +#endif /* CPU_X86_MM_PAGING_H_ */ diff --git a/cpu/x86/mm/prot-domains.c b/cpu/x86/mm/prot-domains.c new file mode 100644 index 000000000..593da98e2 --- /dev/null +++ b/cpu/x86/mm/prot-domains.c @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "prot-domains.h" + +#include "gdt.h" +#include +#include "interrupt.h" +#include +#include +#include "syscalls.h" +#include "stacks.h" + +static dom_kern_data_t __attribute__((section(".kern_prot_dom_bss"))) + PROT_DOMAINS_PDCS_NM(kern_dcd); +static dom_client_data_t ATTR_BSS_KERN kern_dcd; +static dom_kern_data_t __attribute__((section(".app_prot_dom_bss"))) + PROT_DOMAINS_PDCS_NM(app_dcd); +static dom_client_data_t ATTR_BSS_KERN app_dcd; + +/*---------------------------------------------------------------------------*/ +void +prot_domains_init(void) +{ + segment_desc_t desc; + + gdt_lookup(GDT_IDX_CODE_EXC, &desc); + + SEG_SET_FLAG(desc, DPL, PRIV_LVL_INT); + gdt_insert(GDT_IDX_CODE_INT, desc); + + SEG_SET_FLAG(desc, DPL, PRIV_LVL_USER); + gdt_insert(GDT_IDX_CODE, desc); + + PROT_DOMAINS_INIT_ID(kern_dcd); + prot_domains_reg(&kern_dcd, 0, 0, 0, 0, true); + PROT_DOMAINS_INIT_ID(app_dcd); + prot_domains_reg(&app_dcd, 0, 0, 0, 0, false); + + prot_domains_impl_init(); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/mm/prot-domains.h b/cpu/x86/mm/prot-domains.h new file mode 100644 index 000000000..f7dc84e3c --- /dev/null +++ b/cpu/x86/mm/prot-domains.h @@ -0,0 +1,275 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_PROT_DOMAINS_H_ +#define CPU_X86_MM_PROT_DOMAINS_H_ + +#if !__ASSEMBLER__ +#include +#include +#include +#include "helpers.h" +#endif + +#define X86_CONF_PROT_DOMAINS__NONE 0 +#define X86_CONF_PROT_DOMAINS__PAGING 1 + +/** Privilege level (ring) for exception handlers and other supervisory code */ +#define PRIV_LVL_EXC 0 +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE +/** Privilege level for interrupt handlers */ +#define PRIV_LVL_INT 2 +/** Default privilege level */ +#define PRIV_LVL_USER 3 +#else +#define PRIV_LVL_INT PRIV_LVL_EXC +#define PRIV_LVL_USER PRIV_LVL_EXC +#endif + +#define DOM_ID_kern 0 +#define DOM_ID_app 1 + +/** I/O Privilege Level */ +#define EFLAGS_IOPL(pl) ((pl) << 12) +/** Interrupt Enable Flag */ +#define EFLAGS_IF (1u << 9) + +#if !__ASSEMBLER__ + +/** Protection domain ID */ +typedef uint32_t dom_id_t; + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING +#include "paging-prot-domains.h" +#endif + +/* The following symbols are defined in the linker script */ +/** Bounds for .text section */ +extern uint32_t _stext_addr, _etext_addr; + +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE + +/** Metadata that should not be DMA-accessible */ +#define ATTR_BSS_META __attribute__((section(".meta_bss"))) +/** Kernel-owned data */ +#define ATTR_BSS_KERN __attribute__((section(".kern_bss"))) +/** Code that should only be executable during bootup */ +#define ATTR_CODE_BOOT __attribute__((section(".boot_text"))) + +/** + * Domain-defined metadata must be page-aligned, which is implemented by the + * linker script for variables with this attribute. + */ +#define ATTR_BSS_KERN_PAGE_ALIGNED \ + __attribute__((section(".page_aligned_kern_bss"))) + +/** Bounds for .kern_data, .syscall_data, and .prot_dom_data sections */ +extern uint32_t _sbss_kern_addr, _ebss_kern_addr; +/** End of .syscall_data section */ +extern uint32_t _ebss_syscall_addr; +/** Bounds for other data sections */ +extern uint32_t _sdata_addr, _edata_addr; + +/** + * If set, this protection domain is already in the call stack and is not + * available for nested invocations. + */ +#define PROT_DOMAINS_FLAG_BUSY BIT(0) +/** If set, this protection domain requires port I/O access. */ +#define PROT_DOMAINS_FLAG_PIO BIT(1) +/** If set, this protection domain is initialized. */ +#define PROT_DOMAINS_FLAG_INITED BIT(2) + +/** + * Data associated with each protection domain that should be fully accessible + * only to the kernel, with limited accesses and modifications permitted from + * other domains. Includes storage for system data structures. + */ +typedef struct dom_kern_data dom_kern_data_t; + +extern volatile dom_kern_data_t prot_domains_kern_data[]; +extern volatile dom_kern_data_t prot_domains_kern_data_end[]; + +#define PROT_DOMAINS_ACTUAL_CNT \ + (prot_domains_kern_data_end - prot_domains_kern_data) + +#define PROT_DOMAINS_GET_DOM_ID(dkd) \ + ((dom_id_t)((dkd) - prot_domains_kern_data)) + +void prot_domains_syscall_dispatcher(void); + +/** + * Data associated with each protection domain that is owned by clients of that + * domain and used to identify the domain. + */ +struct dom_client_data { + dom_id_t dom_id; +} __attribute__((packed)); + +/** Allocate the client-owned protection domain data structure. */ +#define PROT_DOMAINS_PDCS_NM(nm) _pdcs_##nm +#define PROT_DOMAINS_ALLOC(typ, nm) \ + static dom_kern_data_t __attribute__((section(".prot_dom_bss"))) \ + PROT_DOMAINS_PDCS_NM(nm); \ + static typ ATTR_BSS_KERN nm +#define PROT_DOMAINS_INIT_ID(nm) \ + (nm).dom_id = PROT_DOMAINS_GET_DOM_ID(&PROT_DOMAINS_PDCS_NM(nm)) + +/** + * Perform early initialization during boot stage 0 to prepare for boot stage 1 + */ +void prot_domains_gdt_init() ATTR_CODE_BOOT; +/** + * Perform initialization during boot stage 1 to prepare for kernel launch + */ +void prot_domains_init(); +void prot_domains_impl_init(); + +/* Return from cpu_boot_stage1 will invoke prot_domains_launch_kernel due to + * that return address being pushed on the stack by cpu_boot_stage0. + */ +#define prot_domains_leave_boot_stage1() + +/* Return from main will invoke prot_domains_launch_app due to that return + * address being pushed on the stack by cpu_boot_stage0. + */ +#define prot_domains_leave_main() + +void prot_domains_launch_kernel(void); + +/* Whenever changing this, update syscalls-int-asm.S:prot_domains_launch_kernel + * to match: + */ +#define PROT_DOMAINS_INIT_RET_ADDR_CNT 2 + +void app_main(void); +#define prot_domains_launch_app app_main + +#else + +#define ATTR_BSS_META +#define ATTR_BSS_KERN +#define ATTR_CODE_BOOT + +struct dom_client_data { + uintptr_t mmio; /**< MMIO range base address */ + uintptr_t meta; /**< Domain-defined metadata base address */ +}; + +/** Retrieve the MMIO base address for the specified protection domain. */ +#define PROT_DOMAINS_MMIO(dcd) ((dcd).mmio) + +/** Retrieve the metadata base address for the specified protection domain. */ +#define PROT_DOMAINS_META(dcd) ((dcd).meta) + +#define PROT_DOMAINS_ALLOC(typ, nm) static typ nm +#define PROT_DOMAINS_INIT_ID(nm) + +#define prot_domains_gdt_init() + +#define prot_domains_init() + +int main(void); +#define prot_domains_leave_boot_stage1 main +#define prot_domains_leave_main ENABLE_IRQ(); app_main + +#define PROT_DOMAINS_INIT_RET_ADDR_CNT 0 + +#endif + +/** + * Protection domain data readable by the client. It is used to control + * execution, so it should be protected from modifications by clients. + * Otherwise, there is a risk that one client could modify one of these + * structures used by another client to issue a system call, which could then + * cause the latter client to perform an unintended system call. + */ +typedef struct dom_client_data dom_client_data_t; + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__NONE +#define prot_domains_reg(dcd, mmio_, mmio_sz, meta_, meta_sz, pio) \ + (dcd)->mmio = (mmio_); \ + (dcd)->meta = (meta_) +#else +/** + * \brief Register a protection domain, which involves creating the + * necessary system data structures for it. + * + * \param dcd Client-accessible domain information + * \param mmio Optional base address for per-domain memory-mapped IO region + * \param mmio_sz Size of MMIO region + * \param meta Optional base address for per-domain metadata + * \param meta_sz Size of metadata + * \param pio Set to true if protection domain requires port IO access + */ +void prot_domains_reg(dom_client_data_t *dcd, + uintptr_t mmio, + size_t mmio_sz, + uintptr_t meta, + size_t meta_sz, + bool pio); +#endif + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__NONE +#define prot_domains_lookup_meta_phys_base(drv) 0 +#else +/** Lookup base physical address of metadata region for specified domain */ +uintptr_t prot_domains_lookup_meta_phys_base(dom_client_data_t *drv); +#endif + +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__PAGING +#define PROT_DOMAINS_META_OFF_TO_PHYS(off, meta_phys_base) \ + ((meta_phys_base) + (off)) +#endif + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__NONE +#define PROT_DOMAINS_ENTER_ISR(...) +#define PROT_DOMAINS_LEAVE_ISR(...) +#else +#define PROT_DOMAINS_ENTER_ISR_COMMON(exc) \ + ".if !" #exc "\n\t" \ + /* Save the current stack pointer into a callee-saved register. */ \ + "mov %%esp, %%ebx\n\t" \ + /* Pivot to the main stack of the interrupted context. */ \ + /* Interrupts never have an error code, so the offset is always 44. */ \ + /* No interrupt handlers use anything from the original interrupt stack, */ \ + /* so there is no need to copy anything from it to the main stack. */ \ + "mov 44(%%esp), %%esp\n\t" \ + ".endif\n\t" +#define PROT_DOMAINS_LEAVE_ISR_COMMON(exc) \ + /* Restore the interrupt/exception stack pointer. */ \ + ".if !" #exc "\n\t" \ + "mov %%ebx, %%esp\n\t" \ + ".endif\n\t" +#endif + +#endif /* !__ASSEMBLER__ */ + +#endif /* CPU_X86_MM_PROT_DOMAINS_H_ */ diff --git a/cpu/x86/mm/segmentation.h b/cpu/x86/mm/segmentation.h new file mode 100644 index 000000000..57b1b8aea --- /dev/null +++ b/cpu/x86/mm/segmentation.h @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_SEGMENTATION_H_ +#define CPU_X86_MM_SEGMENTATION_H_ + +#include + +#define SEG_FLAG(lbl, val) \ + (((val) & (~0u >> (32 - SEG_WIDTH_##lbl))) << SEG_SHAMT_##lbl) + +#define SEG_SET_FLAG(desc, lbl, val) \ + (desc).flags = ((desc).flags & ~SEG_FLAG(lbl, ~0u)) | SEG_FLAG(lbl, val) + +#define SEG_WIDTH_TYPE 4 +#define SEG_SHAMT_TYPE 0 +#define SEG_WIDTH_DESCTYPE 1 +#define SEG_SHAMT_DESCTYPE 4 +#define SEG_WIDTH_DPL 2 +#define SEG_SHAMT_DPL 5 +#define SEG_WIDTH_PRESENT 1 +#define SEG_SHAMT_PRESENT 7 +#define SEG_WIDTH_LIMIT_HI 4 +#define SEG_SHAMT_LIMIT_HI 8 +#define SEG_WIDTH_AVL 1 +#define SEG_SHAMT_AVL 12 +#define SEG_WIDTH_LONG_MODE 1 +#define SEG_SHAMT_LONG_MODE 13 +/* also used to indicate default operand and address size */ +#define SEG_WIDTH_DIRECTION 1 +#define SEG_SHAMT_DIRECTION 14 +#define SEG_WIDTH_GRAN 1 +#define SEG_SHAMT_GRAN 15 + +#define SEG_TYPE_DATA_RDWR SEG_FLAG(TYPE, 0x02) /* Read/Write */ +#define SEG_TYPE_CODE_EXRD SEG_FLAG(TYPE, 0x0A) /* Execute/Read */ +#define SEG_TYPE_TSS32_AVAIL SEG_FLAG(TYPE, 0x09) + +#define SEG_DESCTYPE_SYS SEG_FLAG(DESCTYPE, 0) +#define SEG_DESCTYPE_NSYS SEG_FLAG(DESCTYPE, 1) + +#define SEG_PRESENT SEG_FLAG(PRESENT, 1) + +#define SEG_DEFL_OPSZ_32BIT SEG_FLAG(DIRECTION, 1) + +#define SEG_GRAN_BYTE SEG_FLAG(GRAN, 0) +#define SEG_GRAN_PAGE SEG_FLAG(GRAN, 1) + +/** + * Segment descriptor. See Intel Combined Manual, + * Vol. 3, Section 3.4.5 for more details. + */ +typedef union segment_desc { + struct { + uint32_t lim_lo : 16; + uint32_t base_lo : 16; + uint32_t base_mid : 8; + uint32_t flags : 16; + uint32_t base_hi : 8; + }; + struct { + uint32_t raw_lo, raw_hi; + }; + uint64_t raw; +} segment_desc_t; + +static inline void +segment_desc_set_limit(segment_desc_t *c_this, uint32_t len) +{ + uint32_t limit = len - 1; + + SEG_SET_FLAG(*c_this, LIMIT_HI, limit >> 16); /* set limit bits 19:16 */ + c_this->lim_lo = limit; /* set limit bits 15:0 */ +} +/** + * \brief Initialize a segment descriptor. + * \param c_this Segment descriptor to be initialized. + * \param base Base address of region to be covered by segment descriptor. + * \param len Length to be specified by segment descriptor. The units may + * be bytes or pages, depending on the flags. + * \param flags Flags to be added to the default flags: present, default + * operand size of 32 bits, and high limit bits. + */ +static inline void +segment_desc_init(segment_desc_t *c_this, + uint32_t base, uint32_t len, uint16_t flags) +{ + c_this->raw = 0; + + /* Create the high 32 bit segment */ + c_this->base_mid = base >> 16; /* set base bits 23:16 */ + c_this->base_hi = base >> 24; /* set base bits 31:24 */ + + /* Create the low 32 bit segment */ + c_this->base_lo = base; /* set base bits 15:0 */ + + c_this->flags = SEG_FLAG(PRESENT, 1) | SEG_DEFL_OPSZ_32BIT | flags; + + /* This must be done after setting the other flags, or else it + * would be partially overridden. + */ + segment_desc_set_limit(c_this, len); +} +#endif /* CPU_X86_MM_SEGMENTATION_H_ */ diff --git a/cpu/x86/mm/stacks.c b/cpu/x86/mm/stacks.c new file mode 100644 index 000000000..60ccb0ebc --- /dev/null +++ b/cpu/x86/mm/stacks.c @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "stacks.h" + +uint8_t stacks_main[STACKS_SIZE_MAIN] + __attribute__((section(".main_stack"), aligned(4))); +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE +uint8_t stacks_int[STACKS_SIZE_INT] + __attribute__((section(".int_stack"), aligned(4))); +uint8_t stacks_exc[STACKS_SIZE_EXC] + __attribute__((section(".exc_stack"), aligned(4))); +#endif diff --git a/cpu/x86/mm/stacks.h b/cpu/x86/mm/stacks.h new file mode 100644 index 000000000..a1005d8e0 --- /dev/null +++ b/cpu/x86/mm/stacks.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_STACKS_H_ +#define CPU_X86_MM_STACKS_H_ + +#include "prot-domains.h" + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__NONE +#define STACKS_SIZE_INT 0 +#else +/** + * The necessary amount of space for the interrupt and exception stacks is + * determined by the amount of data pushed on the stack by the CPU when + * delivering an interrupt or exception, and by the additional data pushed + * on the stack by the interrupt dispatcher. See interrupt.h for more details. + */ +#define STACKS_SIZE_INT (14 * 4) +#endif + +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING +/** + * The system call and return dispatchers use this stack, so its size was + * determined by observing their behavior. It is possible that the dispatchers + * could overflow the stack and overwrite data on the other stacks. An + * alternative design that would facilitate detection of such overflows would + * place the exception handler stack on a separate page surrounded by guard + * bands, but that would consume a substantial amount of additional memory. + * + * All stack sizes should be a multiple of 4 to accommodate a 4-byte alignment. + */ +#ifdef __clang__ +#define STACKS_SIZE_EXC 512 +#else +#define STACKS_SIZE_EXC 256 +#endif +#else +#define STACKS_SIZE_EXC STACKS_SIZE_INT +#endif +/** + * The combined size of the stacks should be an even multiple of the 4K page + * size so that they precisely fill some number of pages when paging-based + * protection domains are in use. The stacks are arranged contiguously by + * the linker scripts. See those and README.md for more details. + */ +#define STACKS_SIZE_MAIN (8192 - (STACKS_SIZE_INT + STACKS_SIZE_EXC)) + +#if !__ASSEMBLER__ +/** + * Stack for exception handlers. Also used for system call and return + * dispatchers when paging-based protection domains are enabled. + */ +extern uint8_t stacks_exc[STACKS_SIZE_EXC]; +/** Stack for interrupt handlers. */ +extern uint8_t stacks_int[STACKS_SIZE_INT]; +/** Main C stack. */ +extern uint8_t stacks_main[STACKS_SIZE_MAIN]; + +#define STACKS_INIT_TOP \ + ((uintptr_t)stacks_main + STACKS_SIZE_MAIN - \ + (PROT_DOMAINS_INIT_RET_ADDR_CNT * sizeof(uintptr_t))) + +#endif + +#endif /* CPU_X86_MM_STACKS_H_ */ diff --git a/cpu/x86/mm/syscalls-int-asm.S b/cpu/x86/mm/syscalls-int-asm.S new file mode 100644 index 000000000..1fe80310f --- /dev/null +++ b/cpu/x86/mm/syscalls-int-asm.S @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "syscalls-int.h" +#include "prot-domains.h" +#include "gdt-layout.h" +#include "stacks.h" + +.text + +/* Invoke the system call return dispatcher from the default privilege + * level + */ +.global prot_domains_sysret_stub +prot_domains_sysret_stub: + int $PROT_DOMAINS_SYSRET_DISPATCH_INT + +/* Invoke the system call dispatcher C routine */ +.global prot_domains_syscall_dispatcher +prot_domains_syscall_dispatcher: + mov %esp, %ecx /*< interrupt_stack_t *intr_stk */ + /* EDX already set to "dom_client_data_t to_dcd" by syscall stub */ + push %eax /*< syscalls_id_t syscall_id */ + call prot_domains_syscall_dispatcher_impl + /* fastcall convention, so callee pops arguments */ + iret + +/* Invoke the system call return dispatcher C routine */ +.global prot_domains_sysret_dispatcher +prot_domains_sysret_dispatcher: + mov %esp, %ecx /*< interrupt_stack_t *intr_stk */ + call prot_domains_sysret_dispatcher_impl + /* Zero caller-saved registers in case they contain secrets. The system call + * handlers and dispatchers need to preserve the callee-saved registers. + */ + xor %eax, %eax + xor %ecx, %ecx + xor %edx, %edx + iret + +.global prot_domains_launch_kernel +prot_domains_launch_kernel: + mov $GDT_SEL_DATA, %eax + mov %eax, %ds + mov %eax, %es + mov %eax, %fs + mov %eax, %gs + /* init interrupt return stack: */ + pushl $GDT_SEL_STK + lea stacks_main, %eax + /* matches STACKS_INIT_TOP, plus 4 since an address has been consumed: */ + add $(STACKS_SIZE_MAIN - 4), %eax + pushl %eax + pushl $EFLAGS_IOPL(PRIV_LVL_INT) + pushl $GDT_SEL_CODE + pushl $0 /* will be overwritten by syscall_dispatcher_impl */ + /* fastcall convention: */ + mov %esp, %ecx + call prot_domains_launch_kernel_impl + iretl diff --git a/cpu/x86/mm/syscalls-int.c b/cpu/x86/mm/syscalls-int.c new file mode 100644 index 000000000..1d1c77efb --- /dev/null +++ b/cpu/x86/mm/syscalls-int.c @@ -0,0 +1,298 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "prot-domains.h" +#include "tss.h" +#include "helpers.h" +#include "stacks.h" +#include "idt.h" +#include "syscalls.h" +#include "gdt.h" +#include "gdt-layout.h" +#include "interrupt.h" + +/** + * Current protection domain. Not protected, since it is just a convenience + * variable to avoid unneeded protection domain switches. + */ +dom_id_t cur_dom = DOM_ID_app; + +/* defined in syscalls-int-asm.S */ +void prot_domains_sysret_dispatcher(void); + +/* Maximum depth of inter-domain call stack */ +#define MAX_INTER_DOM_CALL_STK_SZ 4 + +/* Protected call stack for inter-domain system calls. The stack grows up. */ +static volatile dom_id_t ATTR_BSS_KERN + inter_dom_call_stk[MAX_INTER_DOM_CALL_STK_SZ]; + +/* Pointer to the next (free) slot in the inter-domain call stack */ +static int ATTR_BSS_KERN inter_dom_call_stk_ptr; + +/*---------------------------------------------------------------------------*/ +static inline void __attribute__((always_inline)) +update_eflags(dom_id_t from_id, dom_id_t to_id, interrupt_stack_t *intr_stk) +{ + if((to_id == DOM_ID_app) && + (DT_SEL_GET_RPL(intr_stk->cs) == PRIV_LVL_USER)) { + /* Only enable interrupts in the application protection domain cooperative + * scheduling context. + */ + intr_stk->eflags |= EFLAGS_IF; + } else { + intr_stk->eflags &= ~EFLAGS_IF; + } +} +/*---------------------------------------------------------------------------*/ +static inline void __attribute__((always_inline)) +dispatcher_tail(dom_id_t from_id, dom_id_t to_id, interrupt_stack_t *intr_stk) +{ + cur_dom = to_id; + + prot_domains_switch(from_id, to_id, intr_stk); + + prot_domains_set_wp(true); + + update_eflags(from_id, to_id, intr_stk); +} +/*---------------------------------------------------------------------------*/ +int main(void); +static inline void __attribute__((always_inline)) +syscall_dispatcher_tail(interrupt_stack_t *intr_stk, + dom_id_t to_id, + uint32_t syscall_eip) +{ + dom_id_t from_id; + volatile dom_kern_data_t *from_dkd, *to_dkd; + + to_dkd = prot_domains_kern_data + to_id; + + /* This implementation of protection domains is non-reentrant. For example, + * it stores the return address taken from the stack of a caller domain + * while dispatching a system call and stores it in a single field in the + * kernel data associated with that protection domain. That model does not + * permit reentrancy. + */ + if((to_dkd->flags & PROT_DOMAINS_FLAG_BUSY) == PROT_DOMAINS_FLAG_BUSY) { + halt(); + } + to_dkd->flags |= PROT_DOMAINS_FLAG_BUSY; + + /* Update the interrupt stack so that the IRET instruction will return to the + * system call entrypoint. + */ + intr_stk->eip = syscall_eip; + + /* Lookup the information for the caller */ + from_id = inter_dom_call_stk[inter_dom_call_stk_ptr - 1]; + from_dkd = prot_domains_kern_data + from_id; + + /* Save the current return address from the unprivileged stack to a protected + * location in the kernel-owned data structure. This enforces return + * entrypoint control. + */ + from_dkd->orig_ret_addr = *(uintptr_t *)intr_stk->esp; + /* Update the unprivileged stack so that when the system call body is + * complete, it will invoke the system call return stub. + */ + *((uintptr_t *)intr_stk->esp) = (uintptr_t)prot_domains_sysret_stub; + + if(MAX_INTER_DOM_CALL_STK_SZ <= inter_dom_call_stk_ptr) { + halt(); + } + inter_dom_call_stk[inter_dom_call_stk_ptr] = to_id; + + inter_dom_call_stk_ptr++; + + dispatcher_tail(from_id, to_id, intr_stk); +} +/*---------------------------------------------------------------------------*/ +void __attribute__((fastcall)) +prot_domains_syscall_dispatcher_impl(interrupt_stack_t *intr_stk, + dom_id_t to_id, + syscalls_entrypoint_t *syscall) +{ + uint32_t syscall_eip; + + if(PROT_DOMAINS_ACTUAL_CNT <= to_id) { + halt(); + } + + /* Get the approved entrypoint for the system call being invoked */ + + if(!((((uintptr_t)syscalls_entrypoints) <= (uintptr_t)syscall) && + (((uintptr_t)syscall) < (uintptr_t)syscalls_entrypoints_end) && + (((((uintptr_t)syscall) - (uintptr_t)syscalls_entrypoints) + % sizeof(syscalls_entrypoint_t)) == 0))) { + /* Assert is not usable when switching protection domains */ + halt(); + } + + if((BIT(to_id) & syscall->doms) == 0) { + halt(); + } + + syscall_eip = syscall->entrypoint; + + prot_domains_set_wp(false); + + syscall_dispatcher_tail(intr_stk, to_id, syscall_eip); +} +/*---------------------------------------------------------------------------*/ +int main(void); +void __attribute__((fastcall)) +prot_domains_launch_kernel_impl(interrupt_stack_t *intr_stk) +{ + inter_dom_call_stk[0] = DOM_ID_app; + + inter_dom_call_stk_ptr = 1; + + syscall_dispatcher_tail(intr_stk, DOM_ID_kern, (uint32_t)main); +} +/*---------------------------------------------------------------------------*/ +void __attribute__((fastcall)) +prot_domains_sysret_dispatcher_impl(interrupt_stack_t *intr_stk) +{ + dom_id_t from_id, to_id; + if(inter_dom_call_stk_ptr <= 1) { + halt(); + } + + from_id = inter_dom_call_stk[inter_dom_call_stk_ptr - 1]; + to_id = inter_dom_call_stk[inter_dom_call_stk_ptr - 2]; + + intr_stk->eip = prot_domains_kern_data[to_id].orig_ret_addr; + + prot_domains_set_wp(false); + + prot_domains_kern_data[from_id].flags &= ~PROT_DOMAINS_FLAG_BUSY; + + inter_dom_call_stk_ptr--; + + dispatcher_tail(from_id, to_id, intr_stk); +} +/*---------------------------------------------------------------------------*/ +/** + * \brief Lookup the current protection domain. + * \return Kernel data structure for the current protection domain. + */ +static volatile dom_kern_data_t * +get_current_domain(void) +{ + dom_id_t id; + id = inter_dom_call_stk[inter_dom_call_stk_ptr - 1]; + return prot_domains_kern_data + id; +} +/*---------------------------------------------------------------------------*/ +/** + * \brief Check whether the protection domain is authorized to perform port + * I/O from the cooperative scheduling context. + * \param dkd Protection domain to check + * \return Result of the check as a Boolean value + */ +static bool +needs_port_io(volatile dom_kern_data_t *dkd) +{ + return (dkd->flags & PROT_DOMAINS_FLAG_PIO) == PROT_DOMAINS_FLAG_PIO; +} +/*---------------------------------------------------------------------------*/ +/* Mark the context parameter as volatile so that writes to it will not get + * optimized out. This parameter is not handled like ordinary function + * parameters. It actually partially includes the contents of the exception + * stack, so updates to those locations can affect the operation of the + * subsequent interrupt return. + */ +static void +gp_fault_handler(volatile struct interrupt_context context) +{ + uint32_t cs_lim; + uint8_t opcode; + + volatile dom_kern_data_t *dkd = get_current_domain(); + if (needs_port_io(dkd)) { + __asm__ __volatile__ ( + "mov %%cs, %0\n\t" + "lsl %0, %0\n\t" + : "=r"(cs_lim)); + + if (cs_lim < context.eip) { + halt(); + } + + /* Load first byte of faulting instruction */ + __asm__ __volatile__ ( + "movb %%cs:%1, %0" + : "=q"(opcode) + : "m"(*(uint8_t *)context.eip)); + + switch (opcode) { + case 0xEC: /* inb */ + context.eax = (context.eax & ~0xFF) | inb((uint16_t)context.edx); + break; + case 0xED: /* inl */ + context.eax = inl((uint16_t)context.edx); + break; + case 0xEE: /* outb */ + outb((uint16_t)context.edx, (uint8_t)context.eax); + break; + case 0xEF: /* outl */ + outl((uint16_t)context.edx, context.eax); + break; + default: + halt(); + } + + /* Skip the faulting port I/O instruction that was emulated. */ + context.eip++; + } else { + halt(); + } +} +/*---------------------------------------------------------------------------*/ +void +syscalls_int_init(void) +{ + tss_init(); + + SET_EXCEPTION_HANDLER(13, 1, gp_fault_handler); + + /* Register system call dispatchers: */ + + idt_set_intr_gate_desc(PROT_DOMAINS_SYSCALL_DISPATCH_INT, + (uint32_t)prot_domains_syscall_dispatcher, + GDT_SEL_CODE_EXC, + PRIV_LVL_USER); + idt_set_intr_gate_desc(PROT_DOMAINS_SYSRET_DISPATCH_INT, + (uint32_t)prot_domains_sysret_dispatcher, + GDT_SEL_CODE_EXC, + PRIV_LVL_USER); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/mm/syscalls-int.h b/cpu/x86/mm/syscalls-int.h new file mode 100644 index 000000000..7ee4bcb36 --- /dev/null +++ b/cpu/x86/mm/syscalls-int.h @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_SYSCALLS_INT_H_ +#define CPU_X86_MM_SYSCALLS_INT_H_ + +/** Software interrupt number for dispatching a system call */ +#define PROT_DOMAINS_SYSCALL_DISPATCH_INT 100 +/** Software interrupt number for returning from a system call */ +#define PROT_DOMAINS_SYSRET_DISPATCH_INT 101 + +#if !__ASSEMBLER__ + +#include + +extern dom_id_t cur_dom; + +#define SYSCALLS_STUB_EPILOGUE(nm) \ + /* Load the system call identifier into EAX, as required by */ \ + /* prot_domains_syscall_dispatcher: */ \ + " mov $" EXP_STRINGIFY(_syscall_ent_##nm) ", %eax\n\t" \ + /* Check whether the server protection domain is already active: */ \ + " cmp %edx, cur_dom\n\t" \ + /* If so, skip the system call dispatcher and directly invoke the */ \ + /* system call body: */ \ + " je _syscall_" #nm "\n\t" \ + " int $" EXP_STRINGIFY(PROT_DOMAINS_SYSCALL_DISPATCH_INT) "\n\t" + +#define SYSCALLS_STUB(nm) \ + SYSCALLS_ALLOC_ENTRYPOINT(nm); \ + asm ( \ + ".text\n\t" \ + ".global " #nm "\n\t" \ + #nm ":\n\t" \ + /* First, load server protection domain ID into EDX, as required by */ \ + /* prot_domains_syscall_dispatcher: */ \ + /* Skip past return address on stack to obtain address of protection */ \ + /* domain ID parameter: */ \ + " mov 4(%esp), %edx\n\t" \ + SYSCALLS_STUB_EPILOGUE(nm)) + +#define SYSCALLS_STUB_SINGLETON(nm, dcd) \ + SYSCALLS_ALLOC_ENTRYPOINT(nm); \ + asm ( \ + ".text\n\t" \ + ".global " #nm "\n\t" \ + #nm ":\n\t" \ + /* First, load server protection domain ID into EDX, as required by */ \ + /* prot_domains_syscall_dispatcher: */ \ + " mov " #dcd ", %edx\n\t" \ + SYSCALLS_STUB_EPILOGUE(nm)) + +void syscalls_int_init(void); + +void prot_domains_sysret_stub(void); + +/* Inter-privilege level interrupt stack with no error code. */ +typedef struct interrupt_stack { + uint32_t eip; + uint32_t cs; + uint32_t eflags; + uint32_t esp; + uint32_t ss; +} interrupt_stack_t; + +#if 0 +/* Declaration only included for documentation purposes: */ +/** + * \brief Switch to a different protection domain. + * \param from_id Origin protection domain. + * \param to_id Destination protection domain. + * \return Segment selector for kernel data access (only used for + * multi-segment implementations). + */ +uint32_t prot_domains_switch(dom_id_t from_id, + dom_id_t to_id, + interrupt_stack_t *intr_stk); +#endif + +#endif + +#endif /* CPU_X86_MM_SYSCALLS_INT_H_ */ diff --git a/cpu/x86/mm/syscalls.h b/cpu/x86/mm/syscalls.h new file mode 100644 index 000000000..83be7a47e --- /dev/null +++ b/cpu/x86/mm/syscalls.h @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_SYSCALLS_H_ +#define CPU_X86_MM_SYSCALLS_H_ + +#include "helpers.h" +#include "prot-domains.h" + +typedef uint32_t dom_id_bitmap_t; + +typedef struct syscalls_entrypoint { + uintptr_t entrypoint; + dom_id_bitmap_t doms; +} syscalls_entrypoint_t; +extern syscalls_entrypoint_t syscalls_entrypoints[]; +extern syscalls_entrypoint_t syscalls_entrypoints_end[]; + +#define SYSCALLS_ACTUAL_CNT (syscalls_entrypoints_end - syscalls_entrypoints) + +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE + +#define SYSCALLS_ALLOC_ENTRYPOINT(nm) \ + syscalls_entrypoint_t __attribute__((section(".syscall_bss"))) \ + _syscall_ent_##nm + +#define SYSCALLS_INIT(nm) \ + _syscall_ent_##nm.entrypoint = (uintptr_t)_syscall_##nm; \ + _syscall_ent_##nm.doms = 0 + +#define SYSCALLS_DEFINE(nm, ...) \ + void _syscall_##nm(__VA_ARGS__); \ + SYSCALLS_STUB(nm); \ + void _syscall_##nm(__VA_ARGS__) + +#define SYSCALLS_DEFINE_SINGLETON(nm, dcd, ...) \ + void _syscall_##nm(__VA_ARGS__); \ + SYSCALLS_STUB_SINGLETON(nm, dcd); \ + void _syscall_##nm(__VA_ARGS__) + +#define SYSCALLS_AUTHZ(nm, drv) _syscall_ent_##nm.doms |= BIT((drv).dom_id) +#define SYSCALLS_DEAUTHZ(nm, drv) _syscall_ent_##nm.doms &= ~BIT((drv).dom_id) + +/** + * Check that any untrusted pointer that could have been influenced by a caller + * (i.e. a stack parameter or global variable) refers to a location at or above + * a certain stack boundary and halt otherwise. This is used to prevent a + * protection domain from calling a different protection domain and passing a + * pointer that references a location in the callee's stack other than its + * parameters. + * + * This also checks that the pointer is either within the stack region or the + * shared data region, which is important for preventing redirection of data + * accesses to MMIO or metadata regions. + * + * The pointer is both validated and copied to a new storage location, which + * must be within the callee's local stack region (excluding the parameter + * region). This is to mitigate scenarios such as two pointers being validated + * and an adversary later inducing a write through one of the pointers to the + * other pointer to corrupt the latter pointer before it is used. + * + * The frame address is adjusted to account for the first word pushed on the + * local frame and the return address, since neither of those should ever be + * referenced by an incoming pointer. In particular, if an incoming pointer + * references the return address, it could potentially redirect execution with + * the privileges of the callee protection domain. + */ +#define PROT_DOMAINS_VALIDATE_PTR(validated, untrusted, sz) \ + validated = untrusted; \ + if((((uintptr_t)(validated)) < \ + ((2 * sizeof(uintptr_t)) + (uintptr_t)__builtin_frame_address(0))) || \ + (((uintptr_t)&_edata_addr) <= (((uintptr_t)(validated)) + (sz)))) { \ + halt(); \ + } + +#else + +#define SYSCALLS_ALLOC_ENTRYPOINT(nm) +#define SYSCALLS_INIT(nm) +#define SYSCALLS_DEFINE(nm, ...) void nm(__VA_ARGS__) +#define SYSCALLS_DEFINE_SINGLETON(nm, dcd, ...) void nm(__VA_ARGS__) +#define SYSCALLS_AUTHZ(nm, drv) +#define SYSCALLS_DEAUTHZ(nm, drv) +#define PROT_DOMAINS_VALIDATE_PTR(validated, untrusted, sz) validated = untrusted + +#endif + +#endif /* CPU_X86_MM_SYSCALLS_H_ */ diff --git a/cpu/x86/mm/tss.c b/cpu/x86/mm/tss.c new file mode 100644 index 000000000..c3628fa8a --- /dev/null +++ b/cpu/x86/mm/tss.c @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "gdt.h" +#include "gdt-layout.h" +#include "prot-domains.h" +#include "segmentation.h" +#include "stacks.h" +#include "tss.h" + +/* System-wide TSS */ +tss_t ATTR_BSS_KERN sys_tss; + +static segment_desc_t ATTR_BSS_GDT sys_tss_desc; + +/*---------------------------------------------------------------------------*/ +/** + * \brief Initialize system-wide TSS. + */ +void +tss_init(void) +{ + sys_tss.iomap_base = sizeof(sys_tss); + sys_tss.esp2 = ((uint32_t)stacks_int) + STACKS_SIZE_INT; + sys_tss.ss2 = GDT_SEL_STK_INT; + sys_tss.esp0 = ((uint32_t)stacks_exc) + STACKS_SIZE_EXC; + sys_tss.ss0 = GDT_SEL_STK_EXC; + + segment_desc_init(&sys_tss_desc, (uint32_t)&sys_tss, sizeof(sys_tss), + SEG_FLAG(DPL, PRIV_LVL_EXC) | + SEG_DESCTYPE_SYS | SEG_TYPE_TSS32_AVAIL); + + __asm__ __volatile__ ( + "ltr %0" + : + : "r" ((uint16_t)GDT_SEL_OF_DESC(&sys_tss_desc, 0))); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/mm/tss.h b/cpu/x86/mm/tss.h new file mode 100644 index 000000000..e8431d388 --- /dev/null +++ b/cpu/x86/mm/tss.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_TSS_H_ +#define CPU_X86_MM_TSS_H_ + +#include + +/** + * Task State Segment. Used by the CPU to manage switching between + * different protection domains (tasks). The current task is referenced + * by the Task Register. When the CPU switches away from a task due to + * a far call, etc., it updates the associated in-memory TSS with the + * current state of the task. It then loads CPU state from the TSS for + * the new task. See Intel Combined Manual, Vol. 3, Chapter 7 for more + * details. + */ +typedef struct tss { + uint32_t prev_tsk; /**< The selector of the task that called this one, if applicable */ + uint32_t esp0; /**< Stack pointer for ring 0 code in this task */ + uint32_t ss0; /**< Stack segment selector for ring 0 code in this task */ + uint32_t esp1; /**< Stack pointer for ring 1 code in this task */ + uint32_t ss1; /**< Stack segment selector for ring 1 code in this task */ + uint32_t esp2; /**< Stack pointer for ring 2 code in this task */ + uint32_t ss2; /**< Stack segment selector for ring 2 code in this task */ + uint32_t cr3; /**< CR3 for this task when paging is enabled */ + uint32_t eip; /**< Stored instruction pointer value */ + uint32_t eflags; /**< Settings for EFLAGS register */ + /** General purpose register values */ + uint32_t eax, ecx, edx, ebx, esp, ebp, esi, edi; + /** Segment register selector values */ + uint32_t es, cs, ss, ds, fs, gs; + /** Selector for Local Descriptor Table */ + uint32_t ldt; + /** Debug-related flag */ + uint16_t t; + /** Offset from base of TSS to base of IO permission bitmap, if one is installed */ + uint16_t iomap_base; +} tss_t; + +void tss_init(void); + +#endif /* CPU_X86_TSS_H_ */ diff --git a/cpu/x86/quarkX1000.ld b/cpu/x86/quarkX1000.ld index a7f2c2555..2f90b7c70 100644 --- a/cpu/x86/quarkX1000.ld +++ b/cpu/x86/quarkX1000.ld @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015, Intel Corporation. All rights reserved. + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -55,6 +55,7 @@ SECTIONS { .text ALIGN (32) : { KEEP(*(.multiboot)) + *(.boot_text) *(.text*) } @@ -75,6 +76,15 @@ SECTIONS { .bss ALIGN (32) : { *(COMMON) + *(.main_stack) *(.bss*) + + *(.gdt_bss_start) + /* + The other GDT-related sections defined in gdt.h are only used when + protection domain support is enabled. Thus, they do not need to be + included here. + */ + _ebss_gdt_addr = .; } } diff --git a/cpu/x86/quarkX1000_dma.ld b/cpu/x86/quarkX1000_dma.ld index 71ebd04b1..fe3b79861 100644 --- a/cpu/x86/quarkX1000_dma.ld +++ b/cpu/x86/quarkX1000_dma.ld @@ -37,8 +37,12 @@ SECTIONS { Using 1K-alignment perturbs the symbols, hindering debugging. Thus, this section is simply padded out to the desired alignment and declared to have a section alignment of only 32 bytes. + + The alignment directives used here suffice even when paging is in use, + because this is the last section and directly follows one (.bss.meta) + that is 4K-aligned. */ - .bss.dma ALIGN (32) (NOLOAD) : + .bss.dma (NOLOAD) : ALIGN (32) { /* The IMR feature operates at 1K granularity. */ . = ALIGN(1K); diff --git a/cpu/x86/quarkX1000_paging.ld b/cpu/x86/quarkX1000_paging.ld new file mode 100644 index 000000000..0352cbf64 --- /dev/null +++ b/cpu/x86/quarkX1000_paging.ld @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +OUTPUT_FORMAT("elf32-i386") + +ENTRY(start) + +SECTIONS { + /* + OS-Dev Wiki says it is common for kernels to start at 1M. Addresses before that + are used by BIOS/EFI, the bootloader and memory-mapped I/O. + + The UEFI GenFw program inserts a 0x220-byte offset between the image base and + the .text section. We add that same offset here to align the symbols in the + UEFI DLL with those in the final UEFI binary to make debugging easier. + */ + . = 1M + 0x220; + + .text.boot : ALIGN (32) + { + *(.multiboot) + *(.boot_text) + + /* + Fill out the section to the next 4K boundary so that the UEFI GenFw + program does not shift the following .text section forward into the + gap and perturb the symbols. This only works if the size of this + section is less than 4K - 0x220 bytes. + */ + . = 4K - 0x220; + } + + /* + It is actually desired that each of the following sections be page- + aligned. However, the UEFI GenFw program ratchets up its alignment + granularity to the maximum granularity discovered in its input file. + Using page-alignment perturbs the symbols, hindering debugging. Thus, + this file simply pads each section out to the desired page alignment and + declares a section alignment granularity of 32 bytes. + */ + + .text : ALIGN (32) + { + *(.text*) + + . = ALIGN(4K); + } + + _stext_addr = ADDR(.text); + _etext_addr = ADDR(.text) + SIZEOF(.text); + + .data.stack : ALIGN (32) + { + /* + Introduce a guard band page before the stacks to facilitate stack + overflow detection. This approach wastes a page of memory for each + guard band, but has the advantage of enabling an identity mapping + for all linear to physical addresses except those in the MMIO + regions. The guard bands are marked not-present in the page tables + to facilitate stack overflow detection. + + This padding must be placed inside of the section, or else it will + get dropped when the UEFI GenFw program generates the UEFI binary. + */ + . += 4K; + + /* + Place the main stack first so that an overflow is detected and does + not overwrite the interrupt or supervisor stacks. Usage of the + interrupt and stack is predictable, since it is only used by short + trampoline code sequences that quickly pivot to the main stack. + */ + *(.main_stack) + *(.int_stack) + *(.exc_stack) + + /* + The combined sizes of the stacks is an even multiple of 4K, so there + is no need to align the location counter here. + */ + + /* + Introduce a guard band page after the stacks to detect stack underflow. + Note that an underflow that only affects the interrupt and supervisor + stacks will not generate a page fault. Detecting such conditions by + placing the interrupt and supervisor stacks on separate pages would + substantially increase memory usage. + */ + . += 4K; + } + + .data : ALIGN (32) + { + /* + The UEFI GenFw program treats all sections that are alloc and read- + only as code sections. By that criteria, .rodata would be a code + section, but making such data executable is undesirable. Thus, this + script lumps in .rodata with other data. It may be desirable in the + future to actually write-protect this data. + */ + *(.rodata*) + *(.data*) + + /* + These could also be treated as read-only data to prevent tampering + from the user privilege level. + */ + _sdata_shared_isr = .; + KEEP(*(.shared_isr_data*)) + _edata_shared_isr = .; + + . = ALIGN(4K); + } + + .bss : ALIGN (32) + { + *(COMMON) + *(.bss*) + + . = ALIGN(4K); + } + + _sdata_addr = ADDR(.data); + _edata_addr = ADDR(.bss) + SIZEOF(.bss); + + .bss.kern (NOLOAD) : ALIGN (32) + { + /* + Page-aligned data is output first. + It is infeasible to apply a page-alignment attribute to them in the + source code, because that increases the alignment of this section to + be page-aligned, which causes problems when generating a UEFI binary + as described above. + */ + *(.page_aligned_kern_bss) + *(.kern_bss) + + syscalls_entrypoints = .; + *(.syscall_bss) + syscalls_entrypoints_end = .; + + . = ALIGN(4K); + } + + _ebss_syscall_addr = ADDR(.bss.kern) + SIZEOF(.bss.kern); + + .bss.kern_priv (NOLOAD) : ALIGN (32) + { + prot_domains_kern_data = .; + /* + The kernel and app protection domain control structures must always + be placed in the first two slots in this order, so that they have + well-known protection domain IDs: + */ + *(.kern_prot_dom_bss) + *(.app_prot_dom_bss) + *(.prot_dom_bss) + prot_domains_kern_data_end = .; + + *(.gdt_bss_start) + *(.gdt_bss_mid) + *(.gdt_bss) + _ebss_gdt_addr = .; + + . = ALIGN(4K); + } + + _sbss_kern_addr = ADDR(.bss.kern); + _ebss_kern_addr = ADDR(.bss.kern_priv) + SIZEOF(.bss.kern_priv); + + .bss.meta (NOLOAD) : ALIGN (32) + { + *(.meta_bss) + + . = ALIGN(4K); + } +} diff --git a/cpu/x86/uefi/bootstrap_uefi.c b/cpu/x86/uefi/bootstrap_uefi.c index f6981eb96..5ef778cca 100644 --- a/cpu/x86/uefi/bootstrap_uefi.c +++ b/cpu/x86/uefi/bootstrap_uefi.c @@ -35,7 +35,12 @@ void start(void); -EFI_STATUS EFIAPI +/* The section attribute below is copied from ATTR_BOOT_CODE in prot-domains.h. + * prot-domains.h includes stdlib.h which defines NULL. The UEFI headers also + * define NULL, which induces a warning when the compiler detects the conflict. + * To avoid that, we avoid including prot-domains.h from this file. + */ +EFI_STATUS EFIAPI __attribute__((section(".boot_text"))) uefi_start(IN EFI_HANDLE ImageHandle, IN EFI_SYSTEM_TABLE *SystemTable) { EFI_MEMORY_DESCRIPTOR mem_map[MAX_MEM_DESC]; diff --git a/examples/galileo/Makefile b/examples/galileo/Makefile index 94a327bfd..bc7b071ff 100644 --- a/examples/galileo/Makefile +++ b/examples/galileo/Makefile @@ -8,6 +8,10 @@ ifeq ($(filter $(EXAMPLE),$(KNOWN_EXAMPLES)),) $(error Unable to proceed) endif +ifeq ($(EXAMPLE),print-imr) + CFLAGS += -DDBG_IMRS +endif + CONTIKI_PROJECT = $(EXAMPLE) all: $(CONTIKI_PROJECT) diff --git a/platform/galileo/README.md b/platform/galileo/README.md index 89e6f5711..e441876db 100644 --- a/platform/galileo/README.md +++ b/platform/galileo/README.md @@ -44,6 +44,9 @@ Standard APIs: * Stdio library (stdout and stderr only). Console output through UART 1 device (connected to Galileo Gen2 FTDI header) +Optional support for protection domains is also implemented and is +described in cpu/x86/mm/README.md. + Building -------- diff --git a/platform/galileo/contiki-main.c b/platform/galileo/contiki-main.c index 42568d90a..7b31a9961 100644 --- a/platform/galileo/contiki-main.c +++ b/platform/galileo/contiki-main.c @@ -33,12 +33,17 @@ #include "contiki.h" #include "contiki-net.h" #include "cpu.h" +#include "eth.h" #include "eth-conf.h" #include "galileo-pinmux.h" #include "gpio.h" +#include "helpers.h" #include "i2c.h" #include "imr-conf.h" #include "interrupt.h" +#include "irq.h" +#include "pci.h" +#include "prot-domains.h" #include "shared-isr.h" #include "uart.h" @@ -49,31 +54,12 @@ PROCINIT( &etimer_process #endif ); -int -main(void) +/*---------------------------------------------------------------------------*/ +void +app_main(void) { - cpu_init(); -#ifdef X86_CONF_RESTRICT_DMA - quarkX1000_imr_conf(); -#endif - /* Initialize UART connected to Galileo Gen2 FTDI header */ - quarkX1000_uart_init(QUARK_X1000_UART_1); - clock_init(); - rtimer_init(); - printf("Starting Contiki\n"); - quarkX1000_i2c_init(); - quarkX1000_i2c_configure(QUARKX1000_I2C_SPEED_STANDARD, - QUARKX1000_I2C_ADDR_MODE_7BIT); - /* use default pinmux configuration */ - if(galileo_pinmux_initialize() < 0) { - fprintf(stderr, "Failed to initialize pinmux\n"); - } - quarkX1000_gpio_init(); - - ENABLE_IRQ(); - process_init(); procinit_init(); ctimer_init(); @@ -81,11 +67,45 @@ main(void) eth_init(); - shared_isr_init(); - while(1) { process_run(); } + halt(); +} +/*---------------------------------------------------------------------------*/ +/* Kernel entrypoint */ +int +main(void) +{ +#ifdef X86_CONF_RESTRICT_DMA + quarkX1000_imr_conf(); +#endif + irq_init(); + /* Initialize UART connected to Galileo Gen2 FTDI header */ + quarkX1000_uart_init(QUARK_X1000_UART_1); + clock_init(); + rtimer_init(); + + pci_root_complex_init(); + quarkX1000_eth_init(); + quarkX1000_i2c_init(); + quarkX1000_i2c_configure(QUARKX1000_I2C_SPEED_STANDARD, + QUARKX1000_I2C_ADDR_MODE_7BIT); + /* use default pinmux configuration */ + if(galileo_pinmux_initialize() < 0) { + fprintf(stderr, "Failed to initialize pinmux\n"); + } + quarkX1000_gpio_init(); + shared_isr_init(); + + /* The ability to remap interrupts is not needed after this point and should + * thus be disabled according to the principle of least privilege. + */ + pci_root_complex_lock(); + + prot_domains_leave_main(); + return 0; } +/*---------------------------------------------------------------------------*/ diff --git a/platform/galileo/net/eth-conf.c b/platform/galileo/net/eth-conf.c index ff3a771bf..061e0aae7 100644 --- a/platform/galileo/net/eth-conf.c +++ b/platform/galileo/net/eth-conf.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015, Intel Corporation. All rights reserved. + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -29,7 +29,6 @@ */ #include "eth-conf.h" -#include "eth.h" #include "net/eth-proc.h" #include "contiki-net.h" #include "net/linkaddr.h" @@ -45,6 +44,7 @@ const linkaddr_t linkaddr_null = { { 0, 0, 0, 0, 0, 0 } }; #define NAMESERVER_IP GATEWAY_IP #endif +/*---------------------------------------------------------------------------*/ void eth_init(void) { @@ -69,7 +69,6 @@ eth_init(void) #endif #endif - quarkX1000_eth_init(); - process_start(ð_process, NULL); } +/*---------------------------------------------------------------------------*/