#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <inttypes.h>
#include <ctype.h>
#include <fcntl.h>
#include <sys/stat.h>

#include <xenctrl.h>
#include "xen/xc_dom.h"

#include "msr-index.h"
#include "xenner.h"
#include "mm.h"

/* ------------------------------------------------------------------ */

static const struct kvm_segment xen32_cs0 = {
    .base     = 0,
    .limit    = 0xffffffff,
    .selector = 0xe008,
    .dpl      = 0,
    .type     = 0xb,
    .present  = 1,  .db = 1,  .s = 1,  .g = 1,
};
static const struct kvm_segment xen32_ds0 = {
    .base     = 0,
    .limit    = 0xffffffff,
    .selector = 0xe010,
    .dpl      = 0,
    .type     = 0x3,
    .present  = 1,  .db = 1,  .s = 1,  .g = 1,
};

static const struct kvm_segment xen64_cs0_64 = {
    .base     = 0,
    .limit    = 0xffffffff,
    .selector = 0xe008,
    .dpl      = 0,
    .type     = 0xb,
    .present  = 1,  .l = 1,  .s = 1,  .g = 1,
};
static const struct kvm_segment xen64_ds0_32 = {
    .base     = 0,
    .limit    = 0xffffffff,
    .selector = 0xe010,
    .dpl      = 0,
    .type     = 0x3,
    .present  = 1,  .db = 1,  .s = 1,  .g = 1,
};

static const struct kvm_segment noseg = {
    .unusable = 1,
};

/* ------------------------------------------------------------------ */

static void setup_regs_all(struct xenvcpu *vcpu)
{
    struct xenvm *xen = vcpu->vm;
    pfn_t cr3_pfn;

    /* GP registers */
    vcpu->regs.rbx = vcpu->id;
    vcpu->regs.rip = elf_uval(xen->emu, xen->emu->ehdr, e_entry);

    /* protected mode */
    vcpu->sregs.cr0 |= X86_CR0_PE;
    vcpu->sregs.ldt = noseg;

    /* paging setup */
    cr3_pfn = xen->emu_pgd_mfn;
    vcpu->sregs.cr3 = frame_to_addr(cr3_pfn);
    vcpu->sregs.cr0 |= X86_CR0_PG; /* paging */
    vcpu->sregs.cr0 |= X86_CR0_WP;
    vcpu->sregs.cr4 |= X86_CR4_PSE; /* large pages */
    vcpu->sregs.cr4 |= X86_CR4_PGE; /* global pages */
}

static void setup_regs_32(struct xenvcpu *vcpu)
{
    /* 32-bit protected mode setup */
    vcpu->sregs.cs  = xen32_cs0;
    vcpu->sregs.ds  = xen32_ds0;
    vcpu->sregs.es  = vcpu->sregs.ds;
    vcpu->sregs.fs  = vcpu->sregs.ds;
    vcpu->sregs.gs  = vcpu->sregs.ds;
    vcpu->sregs.ss  = vcpu->sregs.ds;

    /* paging */
    if (vcpu->vm->mode == XENMODE_PAE)
	vcpu->sregs.cr4 |= X86_CR4_PAE;
}

static void setup_regs_64(struct xenvcpu *vcpu)
{
    /* 64-bit long mode setup */
    vcpu->sregs.cs  = xen64_cs0_64;
    vcpu->sregs.ds  = xen64_ds0_32;
    vcpu->sregs.es  = vcpu->sregs.ds;
    vcpu->sregs.fs  = vcpu->sregs.ds;
    vcpu->sregs.gs  = vcpu->sregs.ds;
    vcpu->sregs.ss  = vcpu->sregs.ds;

    vcpu->sregs.efer |= EFER_SCE;   // syscall enable
    vcpu->sregs.efer |= EFER_LME;   // long mode enable
    vcpu->sregs.efer |= EFER_LMA;   // long mode active

    /* paging */
    vcpu->sregs.cr4 |= X86_CR4_PAE;
}

void setup_regs(struct xenvcpu *vcpu)
{
    struct xenvm *xen = vcpu->vm;

    switch (xen->mode) {
    case XENMODE_32:
    case XENMODE_PAE:
	d1printf("%s: 32bit\n", __FUNCTION__);
	setup_regs_all(vcpu);
	setup_regs_32(vcpu);
	break;
    case XENMODE_64:
	d1printf("%s: 64bit\n", __FUNCTION__);
	setup_regs_all(vcpu);
	setup_regs_64(vcpu);
	break;
    default:
	break;
    }
}

/* ------------------------------------------------------------------ */

static enum xenmode xen_guest_mode(struct xc_dom_image *dom)
{
    if (0 == strcmp(dom->guest_type, "xen-3.0-x86_32")) {
	return XENMODE_32;
    } else if (0 == strcmp(dom->guest_type, "xen-3.0-x86_32p")) {
	return XENMODE_PAE;
    } else if (0 == strcmp(dom->guest_type, "xen-3.0-x86_64")) {
	return XENMODE_64;
    } else {
	return 0;
    }
}

static void xen_guest_hypercall_page_init_32(void *hypercall_page)
{
    char *p;
    int i;

    /* Fill in all the transfer points with template machine code. */
    for ( i = 0; i < (PAGE_SIZE / 32); i++ )
    {
        p = (char *)(hypercall_page + (i * 32));
        *(uint8_t  *)(p+ 0) = 0xb8;    /* mov  $<i>,%eax */
        *(uint32_t *)(p+ 1) = i;
        *(uint16_t *)(p+ 5) = 0x82cd;  /* int  $0x82 */
        *(uint8_t  *)(p+ 7) = 0xc3;    /* ret */
    }

    /*
     * HYPERVISOR_iret is special because it doesn't return and expects a 
     * special stack frame. Guests jump at this transfer point instead of 
     * calling it.
     */
    p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
    *(uint8_t  *)(p+ 0) = 0x50;    /* push %eax */
    *(uint8_t  *)(p+ 1) = 0xb8;    /* mov  $__HYPERVISOR_iret,%eax */
    *(uint32_t *)(p+ 2) = __HYPERVISOR_iret;
    *(uint16_t *)(p+ 6) = 0x82cd;  /* int  $0x82 */
}

static void xen_guest_hypercall_page_init_64(void *hypercall_page)
{
    char *p;
    int i;

    /* Fill in all the transfer points with template machine code. */
    for ( i = 0; i < (PAGE_SIZE / 32); i++ )
    {
        p = (char *)(hypercall_page + (i * 32));
        *(uint8_t  *)(p+ 0) = 0x51;    /* push %rcx */
        *(uint16_t *)(p+ 1) = 0x5341;  /* push %r11 */
        *(uint8_t  *)(p+ 3) = 0xb8;    /* mov  $<i>,%eax */
        *(uint32_t *)(p+ 4) = i;
        *(uint16_t *)(p+ 8) = 0x050f;  /* syscall */
        *(uint16_t *)(p+10) = 0x5b41;  /* pop  %r11 */
        *(uint8_t  *)(p+12) = 0x59;    /* pop  %rcx */
        *(uint8_t  *)(p+13) = 0xc3;    /* ret */
    }

    /*
     * HYPERVISOR_iret is special because it doesn't return and expects a 
     * special stack frame. Guests jump at this transfer point instead of 
     * calling it.
     */
    p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
    *(uint8_t  *)(p+ 0) = 0x51;    /* push %rcx */
    *(uint16_t *)(p+ 1) = 0x5341;  /* push %r11 */
    *(uint8_t  *)(p+ 3) = 0x50;    /* push %rax */
    *(uint8_t  *)(p+ 4) = 0xb8;    /* mov  $__HYPERVISOR_iret,%eax */
    *(uint32_t *)(p+ 5) = __HYPERVISOR_iret;
    *(uint16_t *)(p+ 9) = 0x050f;  /* syscall */
}

static void xen_guest_copy(struct xenvm *xen, struct xc_dom_image *dom)
{
    struct xc_dom_phys *phys;
    void *dest;

    for (phys = dom->phys_pages; phys != NULL; phys = phys->next) {
	dest = mfn_to_ptr(xen, xen->mfn_guest + phys->first);
	d2printf("%s: 0x%04" PRIpfn " +0x%04" PRIpfn " @ %p => +%lx\n",
		 __FUNCTION__, phys->first, phys->count, phys->ptr,
		 (unsigned long)(dest - xen->memory));
	memcpy(dest, phys->ptr, phys->count * PAGE_SIZE);
    }
}

static int xen_guest_parse(struct xenvm *xen, struct xc_dom_image *dom)
{
    int rc;

    section_print(xen, __FUNCTION__, "parse kernel");
    if (0 != (rc = xc_dom_kernel_file(dom, xen->kernel)))
	goto out;
    if (xen->ramdisk && strlen(xen->ramdisk))
	if (0 != (rc = xc_dom_ramdisk_file(dom, xen->ramdisk)))
	    goto out;
    if (0 != (rc = xc_dom_parse_image(dom)))
	goto out;
    return 0;

out:
    section_print(xen, __FUNCTION__, "FAILURE");
    return -1;
}

static int xen_guest_setup(struct xenvm *xen, struct xc_dom_image *dom)
{
    void *ptr;
    xen_pfn_t pfn;
    int rc;

    section_print(xen, __FUNCTION__, "memory setup");
    if (0 != (rc = xc_dom_mem_init(dom, PG_TO_MB(xen->pg_guest))))
	goto out;
    dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) * dom->total_pages);
    for (pfn = 0; pfn < dom->total_pages; pfn++)
        dom->p2m_host[pfn] = pfn + xen->mfn_guest;

    section_print(xen, __FUNCTION__, "create start-of-day");
    if (0 != (rc = xc_dom_build_image(dom)))
	goto out;
    if (0 != (rc = xc_dom_update_guest_p2m(dom)))
	goto out;
    if (0 != (rc = dom->arch_hooks->setup_pgtables(dom)))
	goto out;

    ptr = xc_dom_pfn_to_ptr(dom, dom->console_pfn, 1);
    memset(ptr, 0, PAGE_SIZE);
    ptr = xc_dom_pfn_to_ptr(dom, dom->xenstore_pfn, 1);
    memset(ptr, 0, PAGE_SIZE);

    dom->arch_hooks->start_info(dom);

    if (-1 != dom->parms.virt_hypercall &&
	 0 != dom->parms.virt_hypercall) {
	section_print(xen, __FUNCTION__, "setup xen hypercall page");
	pfn = addr_to_frame(dom->parms.virt_hypercall - dom->parms.virt_base);
	ptr = xc_dom_pfn_to_ptr(dom, pfn, 1);
	switch (xen->mode) {
	case XENMODE_32:
	case XENMODE_PAE:
	    xen_guest_hypercall_page_init_32(ptr);
	    break;
	case XENMODE_64:
	    xen_guest_hypercall_page_init_64(ptr);
	    break;
	}
    } else {
	section_print(xen, __FUNCTION__, "warn: no hypercall page");
    }

    section_print(xen, __FUNCTION__, "setup vcpu context");
    if (0 != (rc = dom->arch_hooks->vcpu(dom, &xen->boot_ctxt)))
	goto out;

    return 0;

out:
    section_print(xen, __FUNCTION__, "FAILURE");
    return -1;
}

/* ------------------------------------------------------------------ */

static int xen_load_emu_file(struct xenvm *xen, char *filename)
{
    struct stat st;
    char buf[256];
    char *blob;
    int i, fd;

    for (i = 0; i < search_path_length; i++) {
	snprintf(buf, sizeof(buf), "%s/%s", search_path[i], filename);
	fd = open(buf, O_RDONLY);
	if (-1 == fd)
	    continue;
	fstat(fd, &st);
	d1printf("%s: loading %s (%ld bytes)\n", __FUNCTION__,
		 buf, st.st_size);
	blob = malloc(st.st_size);
	read(fd, blob, st.st_size);
	close(fd);
	elf_init(xen->emu, blob, st.st_size);
	return 0;
    }
    return -1;
}

static int xen_emu_load(struct xenvm *xen, struct xc_dom_image *dom)
{
    section_print(xen, __FUNCTION__, "load xen emu");
    xen->emu = malloc(sizeof(struct elf_binary));
    switch (xen->mode) {
    case XENMODE_32:
    case XENMODE_PAE:
	if (-1 == xen_load_emu_file(xen, "emu32.elf"))
	    return -1;
	break;
    case XENMODE_64:
	if (-1 == xen_load_emu_file(xen, "emu64.elf"))
	    return -1;
	break;
    }
    if (xen->logfile)
	elf_set_logfile(xen->emu, xen->logfile, 1);
    else if (xen->debug && !xen->nostderr)
	elf_set_logfile(xen->emu, stderr, 1);
    elf_parse_binary(xen->emu);

    xen->emu->dest = mfn_to_ptr(xen, xen->mfn_emu);
    elf_load_binary(xen->emu);
    xen->emu_vs = xen->emu->pstart;
    xen->emu_ve = xen->emu->pend;
    return 0;
}

static int xen_emu_setup(struct xenvm *xen, struct xc_dom_image *dom)
{
    const elf_shdr *pt;
    uint64_t addr;

    section_print(xen, __FUNCTION__, "memory info");
    xen->e.config[EMUDEV_CONF_DEBUG_LEVEL]      = xen->debug;
    xen->e.config[EMUDEV_CONF_EMU_START_PFN]    = xen->mfn_emu;
    xen->e.config[EMUDEV_CONF_EMU_PAGE_COUNT]   = xen->pg_emu;
    xen->e.config[EMUDEV_CONF_M2P_START_PFN]    = xen->mfn_m2p;
    xen->e.config[EMUDEV_CONF_M2P_PAGE_COUNT]   = xen->pg_m2p;
    xen->e.config[EMUDEV_CONF_GUEST_START_PFN]  = xen->mfn_guest;
    xen->e.config[EMUDEV_CONF_GUEST_PAGE_COUNT] = xen->pg_guest;
    xen->e.config[EMUDEV_CONF_TOTAL_PAGE_COUNT] = xen->pg_total;
    xen->e.config[EMUDEV_CONF_NR_VCPUS]         = xen->vcpus;

    section_print(xen, __FUNCTION__, "emu pgd setup");
    switch (xen->mode) {
    case XENMODE_32:
	pt = elf_shdr_by_name(xen->emu, ".pt.32");
	break;
    case XENMODE_PAE:
	pt = elf_shdr_by_name(xen->emu, ".pt.pae");
	break;
    case XENMODE_64:
	pt = elf_shdr_by_name(xen->emu, ".pt.64");
	break;
    default:
	return -1;
    }
    addr = elf_uval(xen->emu, pt, sh_addr);
    xen->emu_pgd_mfn = xen->mfn_emu + addr_to_frame(addr - xen->emu_vs);
    
    return 0;
}

/* ------------------------------------------------------------------ */

int domain_builder(struct xenvm *xen)
{
    struct xc_dom_image *dom = NULL;
    pfn_t pfn;

    if (xen->logfile)
	xc_dom_logfile = xen->logfile;
    else if (xen->debug && !xen->nostderr)
	xc_dom_logfile = stderr;
    dom = xc_dom_allocate(xen->cmdline, 0);

    if (0 != xen_guest_parse(xen, dom))
	goto out;
    xen->mode = xen_guest_mode(dom);
    switch (xen->mode) {
    case XENMODE_32:
	if (xen->pg_total > MB_TO_PG(4096))
	    xen->pg_total = MB_TO_PG(4096);
	xen->pg_emu = MB_TO_PG(4);
	xen->pg_m2p = MB_TO_PG(4);
	break;
    case XENMODE_PAE:
	if (xen->pg_total > MB_TO_PG(16384))
	    xen->pg_total = MB_TO_PG(16384);
	xen->pg_emu = MB_TO_PG(4);
	xen->pg_m2p = MB_TO_PG(4);
	while (xen->pg_m2p < xen->pg_total / 1024)
	    xen->pg_m2p += MB_TO_PG(2);
	break;
    case XENMODE_64:
	xen->pg_emu = MB_TO_PG(4);
	xen->pg_m2p = MB_TO_PG(4);
	while (xen->pg_m2p < xen->pg_total / 512)
	    xen->pg_m2p += MB_TO_PG(2);
	break;
    default:
	d0printf("%s: unknown guest mode: %d (%s)\n",  __FUNCTION__,
		 xen->mode, dom->guest_type);
	goto out;
    }

    xen->mfn_emu    = 0;
    xen->mfn_m2p    = xen->pg_emu;
    xen->mfn_guest  = xen->pg_emu + xen->pg_m2p;
    xen->pg_guest   = xen->pg_total - xen->mfn_guest;
    if (xen->mode != XENMODE_64)
	xen->m2p_32 = mfn_to_ptr(xen, xen->mfn_m2p);
    else
	xen->m2p_64 = mfn_to_ptr(xen, xen->mfn_m2p);
    d1printf("%s: memory: emu %" PRId64 " MB, m2p %" PRId64 " MB, guest %" PRId64 " MB\n",
	     __FUNCTION__, PG_TO_MB(xen->pg_emu), PG_TO_MB(xen->pg_m2p),
	     PG_TO_MB(xen->pg_guest));

    dom->console_evtchn  = xen->console_event;
    dom->xenstore_evtchn = xen->xenstore_event;

    if (0 != xen_emu_load(xen, dom))
	goto out;
    if (0 != xen_emu_setup(xen, dom))
	goto out;
    if (0 != xen_guest_setup(xen, dom))
	goto out;

    xen->console_pfn = dom->console_pfn;
    xen->xenstore_pfn = dom->xenstore_pfn;

    /* m2p setup */
    if (xen->m2p_32)
	for (pfn = 0; pfn < dom->total_pages; pfn++)
	    xen->m2p_32[pfn + xen->mfn_guest] = pfn;
    if (xen->m2p_64)
	for (pfn = 0; pfn < dom->total_pages; pfn++)
	    xen->m2p_64[pfn + xen->mfn_guest] = pfn;

    section_print(xen, __FUNCTION__, "kvm: state setup");
    xen_guest_copy(xen, dom);

    xc_dom_release(dom);
    section_print(xen, __FUNCTION__, "all done");
    return 0;

out:
    section_print(xen, __FUNCTION__, "FAILURE");
    return -1;
}
