/*
 *
 * Copyright (c) 2004 Christian Limpach.
 * Copyright (c) 2004-2006 Kip Macy
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Christian Limpach.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/mount.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/reboot.h>
#include <sys/sysproto.h>


#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/stdarg.h>
#include <machine/xenvar.h>
#include <machine/xenfunc.h>
#include <machine/xenpmap.h>
#include <machine/vmparam.h>
#include <machine/cpu.h>
#include <machine/intr_machdep.h>
#include <machine/md_var.h>
#include <machine/asmacros.h>
#include <machine/xenbus.h>
#include <machine/xenfunc.h>
#include <machine/xen-public/memory.h>

#define	IDTVEC(name)	__CONCAT(X,name)

extern inthand_t
IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
	IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);

int xendebug_flags; 
start_info_t *xen_start_info;
shared_info_t *HYPERVISOR_shared_info;
unsigned long *xen_machine_phys = ((unsigned long *)VADDR(1008, 0));
unsigned long *xen_phys_machine;
int preemptable;
int init_first;


void ni_cli(void);
void ni_sti(void);

void
ni_cli(void)
{
	__asm__("pushl %edx;"
		"pushl %eax;"
		);
	__cli();
	__asm__("popl %eax;"
		"popl %edx;"
		);
}


void
ni_sti(void)
{
	__asm__("pushl %edx;"
		"pushl %esi;"
		"pushl %eax;"
		);
	__sti();
	__asm__("popl %eax;"
		"popl %esi;"
		"popl %edx;"
		);
}

/*
 * Modify the cmd_line by converting ',' to NULLs so that it is in a  format 
 * suitable for the static env vars.
 */
char *
xen_setbootenv(char *cmd_line)
{
	char *cmd_line_next;
    
	for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
	return cmd_line;
}

static struct 
{
	const char	*ev;
	int		mask;
} howto_names[] = {
	{"boot_askname",	RB_ASKNAME},
	{"boot_single",	RB_SINGLE},
	{"boot_nosync",	RB_NOSYNC},
	{"boot_halt",	RB_ASKNAME},
	{"boot_serial",	RB_SERIAL},
	{"boot_cdrom",	RB_CDROM},
	{"boot_gdb",	RB_GDB},
	{"boot_gdb_pause",	RB_RESERVED1},
	{"boot_verbose",	RB_VERBOSE},
	{"boot_multicons",	RB_MULTIPLE},
	{NULL,	0}
};

int 
xen_boothowto(char *envp)
{
	int i, howto = 0;

	/* get equivalents from the environment */
	for (i = 0; howto_names[i].ev != NULL; i++)
		if (getenv(howto_names[i].ev) != NULL)
			howto |= howto_names[i].mask;
	return howto;
}

#define PRINTK_BUFSIZE 1024
void
printk(const char *fmt, ...)
{
        __va_list ap;
        int ret;
        static char buf[PRINTK_BUFSIZE];

        va_start(ap, fmt);
        ret = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
        va_end(ap);
        buf[ret] = 0;
        (void)HYPERVISOR_console_write(buf, ret);
}


#define XPQUEUE_SIZE 128
#ifdef SMP
/* per-cpu queues and indices */
static mmu_update_t xpq_queue[MAX_VIRT_CPUS][XPQUEUE_SIZE];
static int xpq_idx[MAX_VIRT_CPUS];  

#define XPQ_QUEUE xpq_queue[vcpu]
#define XPQ_IDX xpq_idx[vcpu]
#define SET_VCPU() int vcpu = smp_processor_id()
#else
static mmu_update_t xpq_queue[XPQUEUE_SIZE];
static int xpq_idx = 0;

#define XPQ_QUEUE xpq_queue
#define XPQ_IDX xpq_idx
#define SET_VCPU()
#endif
#define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);


static __inline void
_xen_flush_queue(void)
{
	SET_VCPU();
	int _xpq_idx = XPQ_IDX;
	int error, i;
	/* window of vulnerability here? */

	XPQ_IDX = 0;
	/* Make sure index is cleared first to avoid double updates. */
	error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
				      _xpq_idx, NULL, DOMID_SELF);
    
	if (__predict_false(error < 0)) {
		for (i = 0; i < _xpq_idx; i++)
			printk("val: %x ptr: %p\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
		panic("Failed to execute MMU updates: %d", error);
	}

}

void
xen_flush_queue(void)
{
	SET_VCPU();
	if (XPQ_IDX != 0) _xen_flush_queue();
}

static __inline void
xen_increment_idx(void)
{
	SET_VCPU();

	XPQ_IDX++;
	if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
		xen_flush_queue();
}

void
xen_invlpg(vm_offset_t va)
{
	struct mmuext_op op;
	op.cmd = MMUEXT_INVLPG_LOCAL;
	op.arg1.linear_addr = va & ~PAGE_MASK;
	xen_flush_queue();
	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}

void
xen_load_cr3(uint32_t val)
{
	struct mmuext_op op;
	op.cmd = MMUEXT_NEW_BASEPTR;
	op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
	xen_flush_queue();
	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}


void
xen_machphys_update(unsigned long mfn, unsigned long pfn)
{
	SET_VCPU();
    
	XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
	XPQ_QUEUE[XPQ_IDX].val = pfn;
	xen_increment_idx();
	_xen_flush_queue();
}

void
xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val)
{
	SET_VCPU();
    
	XPQ_QUEUE[XPQ_IDX].ptr = (uint64_t)ptr;
	XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
	xen_increment_idx();
}

void 
xen_pgd_pin(unsigned long ma)
{
	struct mmuext_op op;
	op.cmd = MMUEXT_PIN_L2_TABLE;
	op.arg1.mfn = ma >> PAGE_SHIFT;
	xen_flush_queue();
	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}

void 
xen_pgd_unpin(unsigned long ma)
{
	struct mmuext_op op;
	op.cmd = MMUEXT_UNPIN_TABLE;
	op.arg1.mfn = ma >> PAGE_SHIFT;
	xen_flush_queue();
	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}

void 
xen_pt_pin(unsigned long ma)
{
	struct mmuext_op op;
	op.cmd = MMUEXT_PIN_L1_TABLE;
	op.arg1.mfn = ma >> PAGE_SHIFT;
	xen_flush_queue();
	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}

void 
xen_pt_unpin(unsigned long ma)
{
	struct mmuext_op op;
	op.cmd = MMUEXT_UNPIN_TABLE;
	op.arg1.mfn = ma >> PAGE_SHIFT;
	xen_flush_queue();
	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}

void 
xen_set_ldt(unsigned long ptr, unsigned long len)
{
	struct mmuext_op op;
	op.cmd = MMUEXT_SET_LDT;
	op.arg1.linear_addr = ptr;
	op.arg2.nr_ents = len;
	xen_flush_queue();
	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}

void xen_tlb_flush(void)
{
	struct mmuext_op op;
	op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
	xen_flush_queue();
	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}

void
xen_update_descriptor(union descriptor *table, union descriptor *entry)
{
	vm_paddr_t pa;
	pt_entry_t *ptp;

	ptp = vtopte((vm_offset_t)table);
	pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
	if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
		panic("HYPERVISOR_update_descriptor failed\n");
}


/*
 * Bitmap is indexed by page number. If bit is set, the page is part of a
 * xen_create_contiguous_region() area of memory.
 */
unsigned long *contiguous_bitmap;

static void 
contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
{
	unsigned long start_off, end_off, curr_idx, end_idx;

	curr_idx  = first_page / BITS_PER_LONG;
	start_off = first_page & (BITS_PER_LONG-1);
	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);

	if (curr_idx == end_idx) {
		contiguous_bitmap[curr_idx] |=
			((1UL<<end_off)-1) & -(1UL<<start_off);
	} else {
		contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
		while ( ++curr_idx < end_idx )
			contiguous_bitmap[curr_idx] = ~0UL;
		contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
	}
}

static void 
contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
{
	unsigned long start_off, end_off, curr_idx, end_idx;

	curr_idx  = first_page / BITS_PER_LONG;
	start_off = first_page & (BITS_PER_LONG-1);
	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);

	if (curr_idx == end_idx) {
		contiguous_bitmap[curr_idx] &=
			-(1UL<<end_off) | ((1UL<<start_off)-1);
	} else {
		contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
		while ( ++curr_idx != end_idx )
			contiguous_bitmap[curr_idx] = 0;
		contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
	}
}

/* Ensure multi-page extents are contiguous in machine memory. */
int 
xen_create_contiguous_region(vm_page_t pages, int npages)
{
	unsigned long  mfn, i, flags;
	int order;
	struct xen_memory_reservation reservation = {
		.extent_start = &mfn,
		.nr_extents   = 1,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};

	balloon_lock(flags);

	/* can currently only handle power of two allocation */
	PANIC_IF(ffs(npages) != fls(npages));

	/* 0. determine order */
	order = (ffs(npages) == fls(npages)) ? fls(npages) : (ffs(npages) + 1);
	
	/* 1. give away machine pages. */
	for (i = 0; i < (1 << order); i++) {
		int pfn;
		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
		mfn = PFNTOMFN(pfn);
		PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
		PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
	}


	/* 2. Get a new contiguous memory extent. */
	reservation.extent_order = order;
	/* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not 
	 * running with a broxen driver XXXEN
	 */
	reservation.address_bits = 31; 
	if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
		goto fail;

	/* 3. Map the new extent in place of old pages. */
	for (i = 0; i < (1 << order); i++) {
		int pfn;
		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
		xen_machphys_update(mfn+i, pfn);
		PFNTOMFN(pfn) = mfn+i;
	}

	xen_tlb_flush();

	contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);

	balloon_unlock(flags);

	return 0;

 fail:
	reservation.extent_order = 0;
	reservation.address_bits = 0;

	for (i = 0; i < (1 << order); i++) {
		int pfn;
		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
		PANIC_IF(HYPERVISOR_memory_op(
			XENMEM_increase_reservation, &reservation) != 1);
		xen_machphys_update(mfn, pfn);
		PFNTOMFN(pfn) = mfn;
	}

	xen_tlb_flush();

	balloon_unlock(flags);

	return ENOMEM;
}

void 
xen_destroy_contiguous_region(void *addr, int npages)
{
	unsigned long  mfn, i, flags, order, pfn0;
	struct xen_memory_reservation reservation = {
		.extent_start = &mfn,
		.nr_extents   = 1,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};

	pfn0 = vtophys(addr) >> PAGE_SHIFT;
#if 0
	scrub_pages(vstart, 1 << order);
#endif
	/* can currently only handle power of two allocation */
	PANIC_IF(ffs(npages) != fls(npages));

	/* 0. determine order */
	order = (ffs(npages) == fls(npages)) ? fls(npages) : (ffs(npages) + 1);

	balloon_lock(flags);

	contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);

	/* 1. Zap current PTEs, giving away the underlying pages. */
	for (i = 0; i < (1 << order); i++) {
		int pfn;
		pte_t new_val = {0};
		pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;

		PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
		PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
		PANIC_IF(HYPERVISOR_memory_op(
			XENMEM_decrease_reservation, &reservation) != 1);
	}

	/* 2. Map new pages in place of old pages. */
	for (i = 0; i < (1 << order); i++) {
		int pfn;
		pte_t new_val;
		pfn = pfn0 + i;
		PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
		
		new_val.pte_low = mfn << PAGE_SHIFT;
		PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE), 
						      new_val, PG_KERNEL));
		xen_machphys_update(mfn, pfn);
		PFNTOMFN(pfn) = mfn;
	}

	xen_tlb_flush();

	balloon_unlock(flags);
}

extern unsigned long cpu0prvpage;
extern unsigned long *SMPpt;
extern  struct user	*proc0uarea;
extern  vm_offset_t	proc0kstack;
char *bootmem_start, *bootmem_current, *bootmem_end; 

pteinfo_t *pteinfo_list;
void initvalues(start_info_t *startinfo);

struct ringbuf_head *xen_store; /* XXX move me */
char *console_page;

void *
bootmem_alloc(unsigned int size) 
{
	char *retptr;
	
	retptr = bootmem_current;
	PANIC_IF(retptr + size > bootmem_end);
	bootmem_current += size;

	return retptr;
}

void 
bootmem_free(void *ptr, unsigned int size) 
{
	char *tptr;
	
	tptr = ptr;
	PANIC_IF(tptr != bootmem_current - size ||
		bootmem_current - size < bootmem_start);	

	bootmem_current -= size;
}



void
initvalues(start_info_t *startinfo)
{ 
	int i, round_tmpindex;
	vm_paddr_t pdir_shadow_ma, KPTphys;
	vm_offset_t *pdir_shadow;
#ifdef SMP
	int j;
#endif

#ifdef WRITABLE_PAGETABLES
	printk("using writable pagetables\n");
	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
#endif

	xen_start_info = startinfo;
	xen_phys_machine = (unsigned long *)startinfo->mfn_list;

	/* number of pages allocated after the pts + 1*/;
	unsigned long tmpindex = (VTOPFN(xen_start_info->pt_base) + xen_start_info->nr_pt_frames) + 3;
	xendebug_flags = 0xffffffff;
	/* 
	 * pre-zero unused mapped pages - mapped on 4MB boundary
	 */
#ifndef PAE
	round_tmpindex = (((tmpindex - 1) / 1024) + 1)*1024;
#endif
	bzero((char *)PFNTOV(tmpindex), (round_tmpindex - tmpindex)*PAGE_SIZE); 

	IdlePTD = (pd_entry_t *)xpmap_ptom(VTOP(startinfo->pt_base));
	KPTphys = xpmap_ptom(VTOP(startinfo->pt_base + PAGE_SIZE));
	XENPRINTF("IdlePTD %p\n", IdlePTD);
	XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
		  "mod_start: 0x%lx mod_len: 0x%lx\n",
		  xen_start_info->nr_pages, xen_start_info->shared_info, 
		  xen_start_info->flags, xen_start_info->pt_base, 
		  xen_start_info->mod_start, xen_start_info->mod_len);


	/* Map proc0's KSTACK */
	proc0kstack = PFNTOV(tmpindex);
	tmpindex += KSTACK_PAGES;    
    
	/* allocate page for gdt */
	gdt = (union descriptor *)PFNTOV(tmpindex);
	tmpindex++; 

	/* allocate page for ldt */
	ldt = (union descriptor *)PFNTOV(tmpindex);
	tmpindex++; 

	/* allocate 4 pages for bootmem allocator */
	bootmem_start = bootmem_current = (char *)PFNTOV(tmpindex);
	tmpindex += 4;
	bootmem_end = (char *)PFNTOV(tmpindex);
	
	/* initialize page directory shadow page */
	pdir_shadow = (vm_offset_t *)PFNTOV(tmpindex);
	i686_pagezero(pdir_shadow);
	pdir_shadow_ma = xpmap_ptom(tmpindex << PAGE_SHIFT);
	PT_SET_MA(pdir_shadow, pdir_shadow_ma | PG_V | PG_A);
	tmpindex++;

	/* setup shadow mapping first so vtomach will work */
	xen_pt_pin((vm_paddr_t)pdir_shadow_ma);
	xen_queue_pt_update((vm_paddr_t)(IdlePTD + PTDPTDI), 
			    pdir_shadow_ma | PG_KERNEL);
	xen_queue_pt_update(pdir_shadow_ma + PTDPTDI*sizeof(vm_paddr_t), 
			    ((vm_paddr_t)IdlePTD) | PG_V | PG_A);
	xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), 
			    KPTphys | PG_V | PG_A);

	xen_flush_queue();
	/* allocate remainder of NKPT pages */


#ifdef SMP
	/* allocate cpu0 private page */
	cpu0prvpage = PFNTOV(tmpindex);
	tmpindex++; 

	/* allocate SMP page table */
	SMPpt = (unsigned long *)PFNTOV(tmpindex);

	/* Map the private page into the SMP page table */
	SMPpt[0] = vtomach(cpu0prvpage) | PG_RW | PG_M | PG_V | PG_A;

	/* map SMP page table RO */
	PT_SET_MA(SMPpt, *vtopte((vm_offset_t)SMPpt) & ~PG_RW);

	/* put the page table into the page directory */
	xen_queue_pt_update((vm_paddr_t)(IdlePTD + MPPTDI), 
			    xpmap_ptom((tmpindex << PAGE_SHIFT))| PG_KERNEL);
	xen_queue_pt_update(pdir_shadow_ma + MPPTDI*sizeof(vm_paddr_t), 
			    xpmap_ptom((tmpindex << PAGE_SHIFT))| PG_V | PG_A);
	tmpindex++;
#endif

#ifdef PMAP_DEBUG    
	pteinfo_list = (pteinfo_t *)PFNTOV(tmpindex);
	tmpindex +=  ((xen_start_info->nr_pages >> 10) + 1)*(1 + XPQ_CALL_DEPTH*XPQ_CALL_COUNT);
    
	if (tmpindex > 980)
		__asm__("int3");
#endif
	/* unmap remaining pages from initial 4MB chunk */
	for (i = tmpindex; i%1024 != 0; i++) 
		xen_queue_pt_update(KPTphys + i*sizeof(vm_paddr_t), 0);
	xen_flush_queue();
    
	/* allocate remainder of NKPT pages */
	for (i = 0; i < NKPT-1; i++, tmpindex++) {
		xen_queue_pt_update((vm_paddr_t)(IdlePTD + KPTDI + i + 1), 
				    xpmap_ptom((tmpindex << PAGE_SHIFT)| PG_KERNEL));
		xen_queue_pt_update(pdir_shadow_ma + (KPTDI + i + 1)*sizeof(vm_paddr_t), 
				    xpmap_ptom((tmpindex << PAGE_SHIFT)| PG_V | PG_A));
	}
	tmpindex += NKPT-1;
	PT_UPDATES_FLUSH();

	HYPERVISOR_shared_info = (shared_info_t *)PFNTOV(tmpindex);
	PT_SET_MA(HYPERVISOR_shared_info, 
		  xen_start_info->shared_info | PG_KERNEL);
	tmpindex++;

	xen_store = (struct ringbuf_head *)PFNTOV(tmpindex);
	PT_SET_MA(xen_store,
		  (xen_start_info->store_mfn << PAGE_SHIFT)| PG_KERNEL);
	tmpindex++;
	console_page = (char *)PFNTOV(tmpindex);
	PT_SET_MA(console_page,
		  (xen_start_info->console_mfn << PAGE_SHIFT)| PG_KERNEL);
	tmpindex++;
    

	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = (unsigned long)xen_phys_machine;
#ifdef SMP
	for (i = 0; i < ncpus; i++) {
		int npages = (sizeof(struct privatespace) + 1)/PAGE_SIZE;
		for (j = 0; j < npages; j++) {
			vm_paddr_t ma = xpmap_ptom(tmpindex << PAGE_SHIFT);
			tmpindex++;
			PT_SET_VA_MA(SMPpt + i*npages + j, ma | PG_KERNEL, FALSE);
		}
	}
	xen_flush_queue();
#endif
    
	init_first = tmpindex;
    
}


trap_info_t trap_table[] = {
	{ 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
	{ 1,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
	{ 3,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
	{ 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
	/* This is UPL on Linux and KPL on BSD */
	{ 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
	{ 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
	{ 7,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
	/*
	 * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
	 *   no handler for double fault
	 */
	{ 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
	{10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
	{11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
	{12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
	{13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
	{14,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
	{15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
	{16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
	{17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
	{18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
	{19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
	{0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
	{  0, 0,           0, 0 }
};


static void 
shutdown_handler(struct xenbus_watch *watch,
		 const char **vec, unsigned int len)
{
	char *str;
	struct xenbus_transaction *xbt;
	int err, howto;
	struct reboot_args uap;
	
	howto = 0;

 again:
	xbt = xenbus_transaction_start();
	if (IS_ERR(xbt))
		return;
	str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
	/* Ignore read errors and empty reads. */
	if (XENBUS_IS_ERR_READ(str)) {
		xenbus_transaction_end(xbt, 1);
		return;
	}

	xenbus_write(xbt, "control", "shutdown", "");

	err = xenbus_transaction_end(xbt, 0);
	if (err == EAGAIN) {
		free(str, M_DEVBUF);
		goto again;
	}

	if (strcmp(str, "reboot") == 0)
		howto = 0;
	else if (strcmp(str, "poweroff") == 0)
		howto |= (RB_POWEROFF | RB_HALT);
	else if (strcmp(str, "halt") == 0)
		howto |= RB_HALT;
	else if (strcmp(str, "suspend") == 0)
		howto = -1;
	else {
		printf("Ignoring shutdown request: %s\n", str);
		goto done;
	}
#ifdef notyet
	if (howto == -1) {
		do_suspend(NULL);
		goto done;
	}
#else 
	if (howto == -1) {
		printf("suspend not currently supported\n");
		goto done;
	}
#endif
	uap.opt = howto;
	reboot(curthread, &uap);
 done:
	free(str, M_DEVBUF);
}

static struct xenbus_watch shutdown_watch = {
	.node = "control/shutdown",
	.callback = shutdown_handler
};


static void
setup_shutdown_watcher(void *unused)
{
	if (register_xenbus_watch(&shutdown_watch))
		printf("Failed to set shutdown watcher\n");
}


SYSINIT(shutdown, SI_SUB_PSEUDO, SI_ORDER_ANY, setup_shutdown_watcher, NULL)
#ifdef notyet

static void 
xen_suspend(void *ignore)
{
	int i, j, k, fpp;

	extern void time_resume(void);
	extern unsigned long max_pfn;
	extern unsigned long *pfn_to_mfn_frame_list_list;
	extern unsigned long *pfn_to_mfn_frame_list[];

#ifdef CONFIG_SMP
#error "do_suspend must be run cpu 0 - need to create separate thread"
	cpumask_t prev_online_cpus;
	int vcpu_prepare(int vcpu);
#endif

	int err = 0;

	PANIC_IF(smp_processor_id() != 0);

#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
	if (num_online_cpus() > 1) {
		printk(KERN_WARNING "Can't suspend SMP guests "
		       "without CONFIG_HOTPLUG_CPU\n");
		return -EOPNOTSUPP;
	}
#endif

	xenbus_suspend();

#ifdef CONFIG_SMP
	lock_cpu_hotplug();
	/*
	 * Take all other CPUs offline. We hold the hotplug semaphore to
	 * avoid other processes bringing up CPUs under our feet.
	 */
	cpus_clear(prev_online_cpus);
	while (num_online_cpus() > 1) {
		for_each_online_cpu(i) {
			if (i == 0)
				continue;
			unlock_cpu_hotplug();
			err = cpu_down(i);
			lock_cpu_hotplug();
			if (err != 0) {
				printk(KERN_CRIT "Failed to take all CPUs "
				       "down: %d.\n", err);
				goto out_reenable_cpus;
			}
			cpu_set(i, prev_online_cpus);
		}
	}
#endif

	preempt_disable();


	__cli();
	preempt_enable();
#ifdef SMP
	unlock_cpu_hotplug();
#endif
	gnttab_suspend();

	pmap_kremove(HYPERVISOR_shared_info);

	xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
	xen_start_info->console_mfn = mfn_to_pfn(xen_start_info->console_mfn);

	/*
	 * We'll stop somewhere inside this hypercall. When it returns,
	 * we'll start resuming after the restore.
	 */
	HYPERVISOR_suspend(VTOMFN(xen_start_info));

	pmap_kenter_ma(HYPERVISOR_shared_info, xen_start_info->shared_info);
	set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);

#if 0
	memset(empty_zero_page, 0, PAGE_SIZE);
#endif     
	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
		VTOMFN(pfn_to_mfn_frame_list_list);
  
	fpp = PAGE_SIZE/sizeof(unsigned long);
	for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
		if ((j % fpp) == 0) {
			k++;
			pfn_to_mfn_frame_list_list[k] = 
				VTOMFN(pfn_to_mfn_frame_list[k]);
			j = 0;
		}
		pfn_to_mfn_frame_list[k][j] = 
			VTOMFN(&phys_to_machine_mapping[i]);
	}
	HYPERVISOR_shared_info->arch.max_pfn = max_pfn;

	gnttab_resume();

	irq_resume();

	time_resume();

	__sti();

	xencons_resume();

#ifdef CONFIG_SMP
	for_each_cpu(i)
		vcpu_prepare(i);

#endif

	/* 
	 * Only resume xenbus /after/ we've prepared our VCPUs; otherwise
	 * the VCPU hotplug callback can race with our vcpu_prepare
	 */
	xenbus_resume();

#ifdef CONFIG_SMP
 out_reenable_cpus:
	for_each_cpu_mask(i, prev_online_cpus) {
		j = cpu_up(i);
		if ((j != 0) && !cpu_online(i)) {
			printk(KERN_CRIT "Failed to bring cpu "
			       "%d back up (%d).\n",
			       i, j);
			err = j;
		}
	}
#endif
	return err;
}

#endif
/********** CODE WORTH KEEPING ABOVE HERE *****************/ 

void xen_failsafe_handler(void);

void
xen_failsafe_handler(void)
{

	panic("xen_failsafe_handler called!\n");
}
