Blob Blame History Raw
From 8bd6ddfd569309d8e915ffb6f68ad7bf03e53922 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 20 Feb 2009 12:58:42 -0800
Subject: [PATCH 05/43] x86: define arch_vm_get_page_prot to set _PAGE_IOMAP on VM_IO vmas

Set _PAGE_IOMAP in ptes mapping a VM_IO vma.  This says that the mapping
is of a real piece of physical hardware, and not just system memory.

Xen, in particular, uses to this to inhibit the normal pfn->mfn conversion
that would normally happen - in other words, treat the address directly
as a machine physical address without converting it from pseudo-physical.

[ Impact: make VM_IO mappings map the right thing under Xen ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/pgtable.h |    3 +++
 arch/x86/mm/pgtable.c          |   10 ++++++++++
 2 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a34c785..7198bcf 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -399,6 +399,9 @@ static inline unsigned long pages_to_mb(unsigned long npg)
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)	\
 	remap_pfn_range(vma, vaddr, pfn, size, prot)
 
+#define arch_vm_get_page_prot arch_vm_get_page_prot
+extern pgprot_t arch_vm_get_page_prot(unsigned vm_flags);
+
 #if PAGETABLE_LEVELS > 2
 static inline int pud_none(pud_t pud)
 {
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 5c4ee42..5083449 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -15,6 +15,16 @@
 
 gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
 
+pgprot_t arch_vm_get_page_prot(unsigned vm_flags)
+{
+	pgprot_t ret = __pgprot(0);
+
+	if (vm_flags & VM_IO)
+		ret = __pgprot(_PAGE_IOMAP);
+
+	return ret;
+}
+
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP);
-- 
1.7.3.4


From 5527f9bee43a910b019ad77d7de6620b3412759b Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 2 Oct 2009 09:49:05 -0700
Subject: [PATCH 06/43] drm: recompute vma->vm_page_prot after changing vm_flags

vm_get_page_prot() computes vm_page_prot depending on vm_flags, so
we need to re-call it if we change flags.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index fe6cb77..2d15c5e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -273,6 +273,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
 
 	vma->vm_private_data = bo;
 	vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	return 0;
 out_unref:
 	ttm_bo_unref(&bo);
@@ -288,6 +289,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo)
 	vma->vm_ops = &ttm_bo_vm_ops;
 	vma->vm_private_data = ttm_bo_reference(bo);
 	vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	return 0;
 }
 EXPORT_SYMBOL(ttm_fbdev_mmap);
-- 
1.7.3.4


From 66aae0dade39748bb7e068c4518c6160394ae3aa Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 25 Feb 2010 16:38:29 -0800
Subject: [PATCH 07/43] agp: Use PAGE_KERNEL_IO_NOCACHE for AGP mappings

When mapping AGP memory, the offset is a machine address.  In Xen we
need to make sure mappings of physical machine addresses have _PAGE_IO
included in the PTE, so use PAGE_KERNEL_IO_NOCACHE for these mappings.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/pgtable_64.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 076052c..3038ef6 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -160,7 +160,7 @@ extern void cleanup_highmap(void);
 #define pgtable_cache_init()   do { } while (0)
 #define check_pgt_cache()      do { } while (0)
 
-#define PAGE_AGP    PAGE_KERNEL_NOCACHE
+#define PAGE_AGP    PAGE_KERNEL_IO_NOCACHE
 #define HAVE_PAGE_AGP 1
 
 /* fs/proc/kcore.c */
-- 
1.7.3.4


From 4cd35860df16d7d3bd55ce4d4500bfe59c46a849 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 24 Feb 2010 11:10:45 -0800
Subject: [PATCH 08/43] agp: use DMA API when compiled for Xen as well

Xen guests need translation between pseudo-physical and real machine
physical addresses when accessing graphics devices, so use the DMA API
in that case too.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/char/agp/intel-gtt.c |    6 +++++-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 75e0a34..ce63e5c 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -20,8 +20,12 @@
  * an Intel IOMMU. So make the correct use of the PCI DMA API contingent
  * on the Intel IOMMU support (CONFIG_DMAR).
  * Only newer chipsets need to bother with this, of course.
+ *
+ * Xen guests accessing graphics hardware also need proper translation
+ * between pseudo-physical addresses and real machine addresses, which
+ * is also achieved by using the DMA API.
  */
-#ifdef CONFIG_DMAR
+#if defined(CONFIG_DMAR) || defined(CONFIG_XEN)
 #define USE_PCI_DMA_API 1
 #else
 #define USE_PCI_DMA_API 0
-- 
1.7.3.4


From 4ffb5de0f02bad2b0ebe7a9a6a45bc5855c15369 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 9 Aug 2010 14:35:36 -0700
Subject: [PATCH 10/43] pvops: make pte_flags() go via pvops

As part of PAT support in Xen we need to fiddle with the page flags.
For consistency, we need to make sure the conversion happens both ways
when converting from kernel<->Xen, so pte_flags() must go via pvops.

Unfortunately this undermines the original rationale for introducing
pte_flags (which avoids a pvop call for a common operation in the
mm code)...

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/pgtable.h       |    5 +++++
 arch/x86/include/asm/pgtable_types.h |    5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 7198bcf..ce5b845 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -78,6 +78,11 @@ extern struct list_head pgd_list;
 
 #endif	/* CONFIG_PARAVIRT */
 
+static inline pteval_t pte_flags(pte_t pte)
+{
+	return pte_val(pte) & PTE_FLAGS_MASK;
+}
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index d1f4a76..a81b0ed 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -265,11 +265,6 @@ static inline pteval_t native_pte_val(pte_t pte)
 	return pte.pte;
 }
 
-static inline pteval_t pte_flags(pte_t pte)
-{
-	return native_pte_val(pte) & PTE_FLAGS_MASK;
-}
-
 #define pgprot_val(x)	((x).pgprot)
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-- 
1.7.3.4


From 0aa82d86c699890ce3661927f176045fc8e47156 Mon Sep 17 00:00:00 2001
From: Stephen Tweedie <sct@redhat.com>
Date: Fri, 6 Feb 2009 19:09:47 -0800
Subject: [PATCH 11/43] xen dom0: Add support for the platform_ops hypercall

Minimal changes to get platform ops (renamed dom0_ops on pv_ops) working
on pv_ops builds.  Pulls in upstream linux-2.6.18-xen.hg's platform.h

[ Impact: add Xen hypercall definitions ]

Signed-off-by: Stephen Tweedie <sct@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/xen/hypercall.h |    8 ++
 include/xen/interface/platform.h     |  222 ++++++++++++++++++++++++++++++++++
 include/xen/interface/xen.h          |    2 +
 3 files changed, 232 insertions(+), 0 deletions(-)
 create mode 100644 include/xen/interface/platform.h

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a3c28ae..3d10d04 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -45,6 +45,7 @@
 #include <xen/interface/xen.h>
 #include <xen/interface/sched.h>
 #include <xen/interface/physdev.h>
+#include <xen/interface/platform.h>
 
 /*
  * The hypercall asms have to meet several constraints:
@@ -299,6 +300,13 @@ HYPERVISOR_set_timer_op(u64 timeout)
 }
 
 static inline int
+HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
+{
+	platform_op->interface_version = XENPF_INTERFACE_VERSION;
+	return _hypercall1(int, dom0_op, platform_op);
+}
+
+static inline int
 HYPERVISOR_set_debugreg(int reg, unsigned long value)
 {
 	return _hypercall2(int, set_debugreg, reg, value);
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
new file mode 100644
index 0000000..83e4714
--- /dev/null
+++ b/include/xen/interface/platform.h
@@ -0,0 +1,222 @@
+/******************************************************************************
+ * platform.h
+ *
+ * Hardware platform operations. Intended for use by domain-0 kernel.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_PLATFORM_H__
+#define __XEN_PUBLIC_PLATFORM_H__
+
+#include "xen.h"
+
+#define XENPF_INTERFACE_VERSION 0x03000001
+
+/*
+ * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC,
+ * 1 January, 1970 if the current system time was <system_time>.
+ */
+#define XENPF_settime             17
+struct xenpf_settime {
+    /* IN variables. */
+    uint32_t secs;
+    uint32_t nsecs;
+    uint64_t system_time;
+};
+typedef struct xenpf_settime xenpf_settime_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime_t);
+
+/*
+ * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type.
+ * On x86, @type is an architecture-defined MTRR memory type.
+ * On success, returns the MTRR that was used (@reg) and a handle that can
+ * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting.
+ * (x86-specific).
+ */
+#define XENPF_add_memtype         31
+struct xenpf_add_memtype {
+    /* IN variables. */
+    unsigned long mfn;
+    uint64_t nr_mfns;
+    uint32_t type;
+    /* OUT variables. */
+    uint32_t handle;
+    uint32_t reg;
+};
+typedef struct xenpf_add_memtype xenpf_add_memtype_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_add_memtype_t);
+
+/*
+ * Tear down an existing memory-range type. If @handle is remembered then it
+ * should be passed in to accurately tear down the correct setting (in case
+ * of overlapping memory regions with differing types). If it is not known
+ * then @handle should be set to zero. In all cases @reg must be set.
+ * (x86-specific).
+ */
+#define XENPF_del_memtype         32
+struct xenpf_del_memtype {
+    /* IN variables. */
+    uint32_t handle;
+    uint32_t reg;
+};
+typedef struct xenpf_del_memtype xenpf_del_memtype_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_del_memtype_t);
+
+/* Read current type of an MTRR (x86-specific). */
+#define XENPF_read_memtype        33
+struct xenpf_read_memtype {
+    /* IN variables. */
+    uint32_t reg;
+    /* OUT variables. */
+    unsigned long mfn;
+    uint64_t nr_mfns;
+    uint32_t type;
+};
+typedef struct xenpf_read_memtype xenpf_read_memtype_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_read_memtype_t);
+
+#define XENPF_microcode_update    35
+struct xenpf_microcode_update {
+    /* IN variables. */
+    GUEST_HANDLE(void) data;          /* Pointer to microcode data */
+    uint32_t length;                  /* Length of microcode data. */
+};
+typedef struct xenpf_microcode_update xenpf_microcode_update_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_microcode_update_t);
+
+#define XENPF_platform_quirk      39
+#define QUIRK_NOIRQBALANCING      1 /* Do not restrict IO-APIC RTE targets */
+#define QUIRK_IOAPIC_BAD_REGSEL   2 /* IO-APIC REGSEL forgets its value    */
+#define QUIRK_IOAPIC_GOOD_REGSEL  3 /* IO-APIC REGSEL behaves properly     */
+struct xenpf_platform_quirk {
+    /* IN variables. */
+    uint32_t quirk_id;
+};
+typedef struct xenpf_platform_quirk xenpf_platform_quirk_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_platform_quirk_t);
+
+#define XENPF_firmware_info       50
+#define XEN_FW_DISK_INFO          1 /* from int 13 AH=08/41/48 */
+#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
+#define XEN_FW_VBEDDC_INFO        3 /* from int 10 AX=4f15 */
+struct xenpf_firmware_info {
+	/* IN variables. */
+	uint32_t type;
+	uint32_t index;
+	/* OUT variables. */
+	union {
+		struct {
+			/* Int13, Fn48: Check Extensions Present. */
+			uint8_t device;                   /* %dl: bios device number */
+			uint8_t version;                  /* %ah: major version      */
+			uint16_t interface_support;       /* %cx: support bitmap     */
+			/* Int13, Fn08: Legacy Get Device Parameters. */
+			uint16_t legacy_max_cylinder;     /* %cl[7:6]:%ch: max cyl # */
+			uint8_t legacy_max_head;          /* %dh: max head #         */
+			uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector #  */
+			/* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+			/* NB. First uint16_t of buffer must be set to buffer size.      */
+			GUEST_HANDLE(void) edd_params;
+		} disk_info; /* XEN_FW_DISK_INFO */
+		struct {
+			uint8_t device;                   /* bios device number  */
+			uint32_t mbr_signature;           /* offset 0x1b8 in mbr */
+		} disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */
+		struct {
+			/* Int10, AX=4F15: Get EDID info. */
+			uint8_t capabilities;
+			uint8_t edid_transfer_time;
+			/* must refer to 128-byte buffer */
+			GUEST_HANDLE(uchar) edid;
+		} vbeddc_info; /* XEN_FW_VBEDDC_INFO */
+	} u;
+};
+typedef struct xenpf_firmware_info xenpf_firmware_info_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_firmware_info_t);
+
+#define XENPF_enter_acpi_sleep    51
+struct xenpf_enter_acpi_sleep {
+	/* IN variables */
+	uint16_t pm1a_cnt_val;      /* PM1a control value. */
+	uint16_t pm1b_cnt_val;      /* PM1b control value. */
+	uint32_t sleep_state;       /* Which state to enter (Sn). */
+	uint32_t flags;             /* Must be zero. */
+};
+typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_enter_acpi_sleep_t);
+
+#define XENPF_change_freq         52
+struct xenpf_change_freq {
+	/* IN variables */
+	uint32_t flags; /* Must be zero. */
+	uint32_t cpu;   /* Physical cpu. */
+	uint64_t freq;  /* New frequency (Hz). */
+};
+typedef struct xenpf_change_freq xenpf_change_freq_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_change_freq_t);
+
+/*
+ * Get idle times (nanoseconds since boot) for physical CPUs specified in the
+ * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is
+ * indexed by CPU number; only entries with the corresponding @cpumap_bitmap
+ * bit set are written to. On return, @cpumap_bitmap is modified so that any
+ * non-existent CPUs are cleared. Such CPUs have their @idletime array entry
+ * cleared.
+ */
+#define XENPF_getidletime         53
+struct xenpf_getidletime {
+	/* IN/OUT variables */
+	/* IN: CPUs to interrogate; OUT: subset of IN which are present */
+	GUEST_HANDLE(uchar) cpumap_bitmap;
+	/* IN variables */
+	/* Size of cpumap bitmap. */
+	uint32_t cpumap_nr_cpus;
+	/* Must be indexable for every cpu in cpumap_bitmap. */
+	GUEST_HANDLE(uint64_t) idletime;
+	/* OUT variables */
+	/* System time when the idletime snapshots were taken. */
+	uint64_t now;
+};
+typedef struct xenpf_getidletime xenpf_getidletime_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t);
+
+struct xen_platform_op {
+	uint32_t cmd;
+	uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
+	union {
+		struct xenpf_settime           settime;
+		struct xenpf_add_memtype       add_memtype;
+		struct xenpf_del_memtype       del_memtype;
+		struct xenpf_read_memtype      read_memtype;
+		struct xenpf_microcode_update  microcode;
+		struct xenpf_platform_quirk    platform_quirk;
+		struct xenpf_firmware_info     firmware_info;
+		struct xenpf_enter_acpi_sleep  enter_acpi_sleep;
+		struct xenpf_change_freq       change_freq;
+		struct xenpf_getidletime       getidletime;
+		uint8_t                        pad[128];
+	} u;
+};
+typedef struct xen_platform_op xen_platform_op_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_platform_op_t);
+
+#endif /* __XEN_PUBLIC_PLATFORM_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 2befa3e..18b5599 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -461,6 +461,8 @@ typedef uint8_t xen_domain_handle_t[16];
 #define __mk_unsigned_long(x) x ## UL
 #define mk_unsigned_long(x) __mk_unsigned_long(x)
 
+DEFINE_GUEST_HANDLE(uint64_t);
+
 #else /* __ASSEMBLY__ */
 
 /* In assembly code we cannot use C numeric constant suffixes. */
-- 
1.7.3.4


From 37a80bdde5957ffa81c2ecdffc4ccc2e874e34cb Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 27 Mar 2009 17:39:15 -0700
Subject: [PATCH 12/43] xen: add CPU microcode update driver

Xen does all the hard work for us, including choosing the right update
method for this cpu type and actually doing it for all cpus.  We just
need to supply it with the firmware blob.

Because Xen updates all CPUs (and the kernel's virtual cpu numbers have
no fixed relationship with the underlying physical cpus), we only bother
doing anything for cpu "0".

[ Impact: allow CPU microcode update in Xen dom0 ]
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/microcode.h |    9 ++
 arch/x86/kernel/Makefile         |    1 +
 arch/x86/kernel/microcode_core.c |    5 +-
 arch/x86/kernel/microcode_xen.c  |  201 ++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/Kconfig             |    4 +
 5 files changed, 219 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kernel/microcode_xen.c

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index ef51b50..e15fca1 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -55,4 +55,13 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
 }
 #endif
 
+#ifdef CONFIG_MICROCODE_XEN
+extern struct microcode_ops * __init init_xen_microcode(void);
+#else
+static inline struct microcode_ops * __init init_xen_microcode(void)
+{
+	return NULL;
+}
+#endif
+
 #endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9e13763..0036150 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -104,6 +104,7 @@ obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
 microcode-y				:= microcode_core.o
 microcode-$(CONFIG_MICROCODE_INTEL)	+= microcode_intel.o
 microcode-$(CONFIG_MICROCODE_AMD)	+= microcode_amd.o
+microcode-$(CONFIG_MICROCODE_XEN)	+= microcode_xen.o
 obj-$(CONFIG_MICROCODE)			+= microcode.o
 
 obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 1cca374..6550539 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -83,6 +83,7 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 
+#include <xen/xen.h>
 #include <asm/microcode.h>
 #include <asm/processor.h>
 
@@ -506,7 +507,9 @@ static int __init microcode_init(void)
 	struct cpuinfo_x86 *c = &cpu_data(0);
 	int error;
 
-	if (c->x86_vendor == X86_VENDOR_INTEL)
+	if (xen_pv_domain())
+		microcode_ops = init_xen_microcode();
+	else if (c->x86_vendor == X86_VENDOR_INTEL)
 		microcode_ops = init_intel_microcode();
 	else if (c->x86_vendor == X86_VENDOR_AMD)
 		microcode_ops = init_amd_microcode();
diff --git a/arch/x86/kernel/microcode_xen.c b/arch/x86/kernel/microcode_xen.c
new file mode 100644
index 0000000..16c742e
--- /dev/null
+++ b/arch/x86/kernel/microcode_xen.c
@@ -0,0 +1,201 @@
+/*
+ * Xen microcode update driver
+ *
+ * Xen does most of the work here.  We just pass the whole blob into
+ * Xen, and it will apply it to all CPUs as appropriate.  Xen will
+ * worry about how different CPU models are actually updated.
+ */
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/firmware.h>
+#include <linux/vmalloc.h>
+#include <linux/uaccess.h>
+
+#include <asm/microcode.h>
+
+#include <xen/xen.h>
+#include <xen/interface/platform.h>
+#include <xen/interface/xen.h>
+
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+MODULE_DESCRIPTION("Xen microcode update driver");
+MODULE_LICENSE("GPL");
+
+struct xen_microcode {
+	size_t len;
+	char data[0];
+};
+
+static int xen_microcode_update(int cpu)
+{
+	int err;
+	struct xen_platform_op op;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	struct xen_microcode *uc = uci->mc;
+
+	if (uc == NULL || uc->len == 0) {
+		/*
+		 * We do all cpus at once, so we don't need to do
+		 * other cpus explicitly (besides, these vcpu numbers
+		 * have no relationship to underlying physical cpus).
+		 */
+		return 0;
+	}
+
+	op.cmd = XENPF_microcode_update;
+	set_xen_guest_handle(op.u.microcode.data, uc->data);
+	op.u.microcode.length = uc->len;
+
+	err = HYPERVISOR_dom0_op(&op);
+
+	if (err != 0)
+		printk(KERN_WARNING "microcode_xen: microcode update failed: %d\n", err);
+
+	return err;
+}
+
+static enum ucode_state xen_request_microcode_fw(int cpu, struct device *device)
+{
+	char name[30];
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	const struct firmware *firmware;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	enum ucode_state ret;
+	struct xen_microcode *uc;
+	size_t size;
+	int err;
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_INTEL:
+		snprintf(name, sizeof(name), "intel-ucode/%02x-%02x-%02x",
+			 c->x86, c->x86_model, c->x86_mask);
+		break;
+
+	case X86_VENDOR_AMD:
+		snprintf(name, sizeof(name), "amd-ucode/microcode_amd.bin");
+		break;
+
+	default:
+		return UCODE_NFOUND;
+	}
+
+	err = request_firmware(&firmware, name, device);
+	if (err) {
+		pr_debug("microcode: data file %s load failed\n", name);
+		return UCODE_NFOUND;
+	}
+
+	/*
+	 * Only bother getting real firmware for cpu 0; the others get
+	 * dummy placeholders.
+	 */
+	if (cpu == 0)
+		size = firmware->size;
+	else
+		size = 0;
+
+	if (uci->mc != NULL) {
+		vfree(uci->mc);
+		uci->mc = NULL;
+	}
+
+	ret = UCODE_ERROR;
+	uc = vmalloc(sizeof(*uc) + size);
+	if (uc == NULL)
+		goto out;
+
+	ret = UCODE_OK;
+	uc->len = size;
+	memcpy(uc->data, firmware->data, uc->len);
+
+	uci->mc = uc;
+
+out:
+	release_firmware(firmware);
+
+	return ret;
+}
+
+static enum ucode_state xen_request_microcode_user(int cpu,
+						   const void __user *buf, size_t size)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	struct xen_microcode *uc;
+	enum ucode_state ret;
+	size_t unread;
+
+	if (cpu != 0) {
+		/* No real firmware for non-zero cpus; just store a
+		   placeholder */
+		size = 0;
+	}
+
+	if (uci->mc != NULL) {
+		vfree(uci->mc);
+		uci->mc = NULL;
+	}
+
+	ret = UCODE_ERROR;
+	uc = vmalloc(sizeof(*uc) + size);
+	if (uc == NULL)
+		goto out;
+
+	uc->len = size;
+
+	ret = UCODE_NFOUND;
+
+	/* XXX This sporadically returns uncopied bytes, so we return
+	   EFAULT.  As far as I can see, the usermode code
+	   (microcode_ctl) isn't doing anything wrong... */
+	unread = copy_from_user(uc->data, buf, size);
+
+	if (unread != 0) {
+		printk(KERN_WARNING "failed to read %zd of %zd bytes at %p -> %p\n",
+		       unread, size, buf, uc->data);
+		goto out;
+	}
+
+	ret = UCODE_OK;
+
+out:
+	if (ret == 0)
+		uci->mc = uc;
+	else
+		vfree(uc);
+
+	return ret;
+}
+
+static void xen_microcode_fini_cpu(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	vfree(uci->mc);
+	uci->mc = NULL;
+}
+
+static int xen_collect_cpu_info(int cpu, struct cpu_signature *sig)
+{
+	sig->sig = 0;
+	sig->pf = 0;
+	sig->rev = 0;
+
+	return 0;
+}
+
+static struct microcode_ops microcode_xen_ops = {
+	.request_microcode_user		  = xen_request_microcode_user,
+	.request_microcode_fw             = xen_request_microcode_fw,
+	.collect_cpu_info                 = xen_collect_cpu_info,
+	.apply_microcode                  = xen_microcode_update,
+	.microcode_fini_cpu               = xen_microcode_fini_cpu,
+};
+
+struct microcode_ops * __init init_xen_microcode(void)
+{
+	if (!xen_initial_domain())
+		return NULL;
+	return &microcode_xen_ops;
+}
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5b54892..384e0a5 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -48,3 +48,7 @@ config XEN_DEBUG_FS
 	help
 	  Enable statistics output and various tuning options in debugfs.
 	  Enabling this option may incur a significant performance overhead.
+
+config MICROCODE_XEN
+       def_bool y
+       depends on XEN_DOM0 && MICROCODE
\ No newline at end of file
-- 
1.7.3.4


From aed8ff456bd7847683776e5c4d0dd4e4abc5087e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 10 Nov 2010 12:28:57 -0800
Subject: [PATCH 14/43] x86: demacro set_iopl_mask()

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/processor.h |    5 ++++-
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cae9c3c..32f75be 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -591,7 +591,10 @@ static inline void load_sp0(struct tss_struct *tss,
 	native_load_sp0(tss, thread);
 }
 
-#define set_iopl_mask native_set_iopl_mask
+static inline void set_iopl_mask(unsigned mask)
+{
+	native_set_iopl_mask(mask);
+}
 #endif /* CONFIG_PARAVIRT */
 
 /*
-- 
1.7.3.4


From 640524e029f5b5cb93462c7bccdd93141f53ae65 Mon Sep 17 00:00:00 2001
From: Christophe Saout <chtephan@leto.intern.saout.de>
Date: Sat, 17 Jan 2009 17:30:17 +0100
Subject: [PATCH 15/43] x86/paravirt: paravirtualize IO permission bitmap

Paravirtualized x86 systems don't have an exposed TSS, as it is only
directly visible in ring 0.  The IO permission bitmap is part of
the TSS, so with out a TSS, it must be paravirtualized separately,
like the iopl mask.

[ Impact: make ioperm bitmap work under Xen ]

Signed-off-by: Christophe Saout <chtephan@leto.intern.saout.de>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/paravirt.h       |    7 +++++++
 arch/x86/include/asm/paravirt_types.h |    2 ++
 arch/x86/include/asm/processor.h      |    9 +++++++++
 arch/x86/kernel/ioport.c              |   29 ++++++++++++++++++++++-------
 arch/x86/kernel/paravirt.c            |    1 +
 arch/x86/kernel/process.c             |   28 ++++++++--------------------
 6 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 18e3b8a..1764f4a 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -330,11 +330,18 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
 {
 	PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g);
 }
+
 static inline void set_iopl_mask(unsigned mask)
 {
 	PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
 }
 
+static inline void set_io_bitmap(struct thread_struct *thread,
+				 unsigned long bytes_updated)
+{
+	PVOP_VCALL2(pv_cpu_ops.set_io_bitmap, thread, bytes_updated);
+}
+
 /* The paravirtualized I/O functions */
 static inline void slow_down_io(void)
 {
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index b82bac9..dcb0c44 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -135,6 +135,8 @@ struct pv_cpu_ops {
 	void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
 
 	void (*set_iopl_mask)(unsigned mask);
+	void (*set_io_bitmap)(struct thread_struct *thread,
+			      unsigned long bytes_updated);
 
 	void (*wbinvd)(void);
 	void (*io_delay)(void);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 32f75be..9c5528e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -551,6 +551,9 @@ static inline void native_set_iopl_mask(unsigned mask)
 #endif
 }
 
+extern void native_set_io_bitmap(struct thread_struct *thread,
+				 unsigned long updated_bytes);
+
 static inline void
 native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
 {
@@ -595,6 +598,12 @@ static inline void set_iopl_mask(unsigned mask)
 {
 	native_set_iopl_mask(mask);
 }
+
+static inline void set_io_bitmap(struct thread_struct *thread,
+				 unsigned long updated_bytes)
+{
+	native_set_io_bitmap(thread, updated_bytes);
+}
 #endif /* CONFIG_PARAVIRT */
 
 /*
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 8eec0ec..8ead1f0 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -30,13 +30,29 @@ static void set_bitmap(unsigned long *bitmap, unsigned int base,
 	}
 }
 
+void native_set_io_bitmap(struct thread_struct *t,
+			  unsigned long bytes_updated)
+{
+	struct tss_struct *tss;
+
+	if (!bytes_updated)
+		return;
+
+	tss = &__get_cpu_var(init_tss);
+
+	/* Update the TSS: */
+	if (t->io_bitmap_ptr)
+		memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
+	else
+		memset(tss->io_bitmap, 0xff, bytes_updated);
+}
+
 /*
  * this changes the io permissions bitmap in the current task.
  */
 asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 {
 	struct thread_struct *t = &current->thread;
-	struct tss_struct *tss;
 	unsigned int i, max_long, bytes, bytes_updated;
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
@@ -61,13 +77,13 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 	}
 
 	/*
-	 * do it in the per-thread copy and in the TSS ...
+	 * do it in the per-thread copy
 	 *
-	 * Disable preemption via get_cpu() - we must not switch away
+	 * Disable preemption - we must not switch away
 	 * because the ->io_bitmap_max value must match the bitmap
 	 * contents:
 	 */
-	tss = &per_cpu(init_tss, get_cpu());
+	preempt_disable();
 
 	set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
 
@@ -85,10 +101,9 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 
 	t->io_bitmap_max = bytes;
 
-	/* Update the TSS: */
-	memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
+	set_io_bitmap(t, bytes_updated);
 
-	put_cpu();
+	preempt_enable();
 
 	return 0;
 }
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c5b2500..febd851 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -376,6 +376,7 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.swapgs = native_swapgs,
 
 	.set_iopl_mask = native_set_iopl_mask,
+	.set_io_bitmap = native_set_io_bitmap,
 	.io_delay = native_io_delay,
 
 	.start_context_switch = paravirt_nop,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 57d1868..a48e82a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -74,16 +74,12 @@ void exit_thread(void)
 	unsigned long *bp = t->io_bitmap_ptr;
 
 	if (bp) {
-		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
-
+		preempt_disable();
 		t->io_bitmap_ptr = NULL;
 		clear_thread_flag(TIF_IO_BITMAP);
-		/*
-		 * Careful, clear this in the TSS too:
-		 */
-		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
+		set_io_bitmap(t, t->io_bitmap_max);
 		t->io_bitmap_max = 0;
-		put_cpu();
+		preempt_enable();
 		kfree(bp);
 	}
 }
@@ -214,19 +210,11 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 			hard_enable_TSC();
 	}
 
-	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
-		/*
-		 * Copy the relevant range of the IO bitmap.
-		 * Normally this is 128 bytes or less:
-		 */
-		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
-		       max(prev->io_bitmap_max, next->io_bitmap_max));
-	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
-		/*
-		 * Clear any possible leftover bits:
-		 */
-		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
-	}
+	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP) ||
+	    test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
+		set_io_bitmap(next,
+			      max(prev->io_bitmap_max, next->io_bitmap_max));
+
 	propagate_user_return_notify(prev_p, next_p);
 }
 
-- 
1.7.3.4


From 950952701cf9e218c2269d13b8538f3c07cff762 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 18 Jun 2009 15:04:16 -0700
Subject: [PATCH 16/43] xen: implement IO permission bitmap

Add Xen implementation of IO permission bitmap pvop.

[ Impact: allow guests to set usermode IO permission bitmaps. ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/enlighten.c |   13 +++++++++++++
 1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 235c0f4..4ad88fd 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -700,6 +700,18 @@ static void xen_set_iopl_mask(unsigned mask)
 	HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
 }
 
+static void xen_set_io_bitmap(struct thread_struct *thread,
+			      unsigned long bytes_updated)
+{
+	struct physdev_set_iobitmap set_iobitmap;
+
+	set_xen_guest_handle(set_iobitmap.bitmap,
+			     (char *)thread->io_bitmap_ptr);
+	set_iobitmap.nr_ports = thread->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
+	WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap,
+				      &set_iobitmap));
+}
+
 static void xen_io_delay(void)
 {
 }
@@ -997,6 +1009,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.load_sp0 = xen_load_sp0,
 
 	.set_iopl_mask = xen_set_iopl_mask,
+	.set_io_bitmap = xen_set_io_bitmap,
 	.io_delay = xen_io_delay,
 
 	/* Xen takes care of %gs when switching to usermode for us */
-- 
1.7.3.4


From 66b66fdaae7856319170b10f4690f67cf6129825 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 30 Nov 2010 10:03:44 -0800
Subject: [PATCH 29/43] mm: add apply_to_page_range_batch()

apply_to_page_range() calls its callback function once for each pte, which
is pretty inefficient since it will almost always be operating on a batch
of adjacent ptes.  apply_to_page_range_batch() calls its callback
with both a pte_t * and a count, so it can operate on multiple ptes at
once.

The callback is expected to handle all its ptes, or return an error.  For
both apply_to_page_range and apply_to_page_range_batch, it is up to
the caller to work out how much progress was made if either fails with
an error.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 include/linux/mm.h |    6 +++++
 mm/memory.c        |   56 +++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 721f451..c42f200 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1421,6 +1421,12 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
 			       unsigned long size, pte_fn_t fn, void *data);
 
+typedef int (*pte_batch_fn_t)(pte_t *pte, unsigned count, pgtable_t token,
+			      unsigned long addr, void *data);
+extern int apply_to_page_range_batch(struct mm_struct *mm,
+				     unsigned long address, unsigned long size,
+				     pte_batch_fn_t fn, void *data);
+
 #ifdef CONFIG_PROC_FS
 void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
 #else
diff --git a/mm/memory.c b/mm/memory.c
index 02e48aa..4028984 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1924,7 +1924,7 @@ EXPORT_SYMBOL(remap_pfn_range);
 
 static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 				     unsigned long addr, unsigned long end,
-				     pte_fn_t fn, void *data)
+				     pte_batch_fn_t fn, void *data)
 {
 	pte_t *pte;
 	int err;
@@ -1939,26 +1939,20 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 
 	BUG_ON(pmd_huge(*pmd));
 
-	arch_enter_lazy_mmu_mode();
-
 	token = pmd_pgtable(*pmd);
 
-	do {
-		err = fn(pte++, token, addr, data);
-		if (err)
-			break;
-	} while (addr += PAGE_SIZE, addr != end);
-
+	arch_enter_lazy_mmu_mode();
+	err = fn(pte, (end - addr) / PAGE_SIZE, token, addr, data);
 	arch_leave_lazy_mmu_mode();
 
 	if (mm != &init_mm)
-		pte_unmap_unlock(pte-1, ptl);
+		pte_unmap_unlock(pte, ptl);
 	return err;
 }
 
 static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
 				     unsigned long addr, unsigned long end,
-				     pte_fn_t fn, void *data)
+				     pte_batch_fn_t fn, void *data)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -1980,7 +1974,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
 
 static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
 				     unsigned long addr, unsigned long end,
-				     pte_fn_t fn, void *data)
+				     pte_batch_fn_t fn, void *data)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -2002,8 +1996,9 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
  * Scan a region of virtual memory, filling in page tables as necessary
  * and calling a provided function on each leaf page table.
  */
-int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
-			unsigned long size, pte_fn_t fn, void *data)
+int apply_to_page_range_batch(struct mm_struct *mm,
+			      unsigned long addr, unsigned long size,
+			      pte_batch_fn_t fn, void *data)
 {
 	pgd_t *pgd;
 	unsigned long next;
@@ -2021,6 +2016,39 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(apply_to_page_range_batch);
+
+struct pte_single_fn
+{
+	pte_fn_t fn;
+	void *data;
+};
+
+static int apply_pte_batch(pte_t *pte, unsigned count, pgtable_t token,
+			   unsigned long addr, void *data)
+{
+	struct pte_single_fn *single = data;
+	int err = 0;
+
+	while (count--) {
+		err = single->fn(pte, token, addr, single->data);
+		if (err)
+			break;
+
+		addr += PAGE_SIZE;
+		pte++;
+	}
+
+	return err;
+}
+
+int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+			unsigned long size, pte_fn_t fn, void *data)
+{
+	struct pte_single_fn single = { .fn = fn, .data = data };
+	return apply_to_page_range_batch(mm, addr, size,
+					 apply_pte_batch, &single);
+}
 EXPORT_SYMBOL_GPL(apply_to_page_range);
 
 /*
-- 
1.7.3.4


From 3a5e3a915a3ab35e78b0a4b0f31e405a9640cae5 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 29 Nov 2010 12:22:24 -0800
Subject: [PATCH 30/43] vmalloc: use plain pte_clear() for unmaps

ptep_get_and_clear() is potentially moderately expensive (at least
an atomic operation, or potentially a trap-and-fault when virtualized)
so use a plain pte_clear().

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 mm/vmalloc.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index eb5cc7d..f1e45e0 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -39,8 +39,9 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
-		pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
+		pte_t ptent = *pte;
 		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
+		pte_clear(&init_mm, addr, pte);
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
-- 
1.7.3.4


From 8712efa8e830e26f8088ea3c2ca2a45cf775cd88 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 29 Nov 2010 11:06:19 -0800
Subject: [PATCH 31/43] vmalloc: use apply_to_page_range for vunmap_page_range()

There's no need to open-code it when there's helpful utility function
to do the job.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Nick Piggin <npiggin@kernel.dk>
---
 mm/vmalloc.c |   53 +++++++++--------------------------------------------
 1 files changed, 9 insertions(+), 44 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f1e45e0..1e74a45 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -33,59 +33,24 @@
 
 /*** Page table manipulation functions ***/
 
-static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
+static int vunmap_pte(pte_t *pte, unsigned count,
+		      pgtable_t tok, unsigned long addr, void *data)
 {
-	pte_t *pte;
-
-	pte = pte_offset_kernel(pmd, addr);
-	do {
+	while (count--) {
 		pte_t ptent = *pte;
-		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
-		pte_clear(&init_mm, addr, pte);
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-}
-
-static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
-{
-	pmd_t *pmd;
-	unsigned long next;
 
-	pmd = pmd_offset(pud, addr);
-	do {
-		next = pmd_addr_end(addr, end);
-		if (pmd_none_or_clear_bad(pmd))
-			continue;
-		vunmap_pte_range(pmd, addr, next);
-	} while (pmd++, addr = next, addr != end);
-}
+		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
 
-static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
-{
-	pud_t *pud;
-	unsigned long next;
+		pte_clear(&init_mm, addr, pte++);
+		addr += PAGE_SIZE;
+	}
 
-	pud = pud_offset(pgd, addr);
-	do {
-		next = pud_addr_end(addr, end);
-		if (pud_none_or_clear_bad(pud))
-			continue;
-		vunmap_pmd_range(pud, addr, next);
-	} while (pud++, addr = next, addr != end);
+	return 0;
 }
 
 static void vunmap_page_range(unsigned long addr, unsigned long end)
 {
-	pgd_t *pgd;
-	unsigned long next;
-
-	BUG_ON(addr >= end);
-	pgd = pgd_offset_k(addr);
-	do {
-		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd))
-			continue;
-		vunmap_pud_range(pgd, addr, next);
-	} while (pgd++, addr = next, addr != end);
+	apply_to_page_range_batch(&init_mm, addr, end - addr, vunmap_pte, NULL);
 }
 
 static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
-- 
1.7.3.4


From 373648649635f8d46e3489c6a53b8a3a9204c26a Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 29 Nov 2010 11:11:45 -0800
Subject: [PATCH 32/43] vmalloc: use apply_to_page_range for vmap_page_range_noflush()

There's no need to open-code it when there's a helpful utility
function.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Nick Piggin <npiggin@kernel.dk>
---
 mm/vmalloc.c |   92 ++++++++++++++++++---------------------------------------
 1 files changed, 29 insertions(+), 63 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 1e74a45..e8d2025 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -53,63 +53,34 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
 	apply_to_page_range_batch(&init_mm, addr, end - addr, vunmap_pte, NULL);
 }
 
-static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+struct vmap_data
 {
-	pte_t *pte;
+	struct page **pages;
+	unsigned index;
+	pgprot_t prot;
+};
 
-	/*
-	 * nr is a running index into the array which helps higher level
-	 * callers keep track of where we're up to.
-	 */
+static int vmap_pte(pte_t *pte, unsigned count, pgtable_t tok,
+		    unsigned long addr, void *data)
+{
+	struct vmap_data *vmap = data;
 
-	pte = pte_alloc_kernel(pmd, addr);
-	if (!pte)
-		return -ENOMEM;
-	do {
-		struct page *page = pages[*nr];
+	while (count--) {
+		struct page *page = vmap->pages[vmap->index];
 
 		if (WARN_ON(!pte_none(*pte)))
 			return -EBUSY;
+
 		if (WARN_ON(!page))
 			return -ENOMEM;
-		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
-		(*nr)++;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-	return 0;
-}
 
-static int vmap_pmd_range(pud_t *pud, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
-{
-	pmd_t *pmd;
-	unsigned long next;
-
-	pmd = pmd_alloc(&init_mm, pud, addr);
-	if (!pmd)
-		return -ENOMEM;
-	do {
-		next = pmd_addr_end(addr, end);
-		if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
-			return -ENOMEM;
-	} while (pmd++, addr = next, addr != end);
-	return 0;
-}
+		set_pte_at(&init_mm, addr, pte, mk_pte(page, vmap->prot));
 
-static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
-{
-	pud_t *pud;
-	unsigned long next;
+		pte++;
+		addr += PAGE_SIZE;
+		vmap->index++;
+	}
 
-	pud = pud_alloc(&init_mm, pgd, addr);
-	if (!pud)
-		return -ENOMEM;
-	do {
-		next = pud_addr_end(addr, end);
-		if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
-			return -ENOMEM;
-	} while (pud++, addr = next, addr != end);
 	return 0;
 }
 
@@ -122,22 +93,17 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
 static int vmap_page_range_noflush(unsigned long start, unsigned long end,
 				   pgprot_t prot, struct page **pages)
 {
-	pgd_t *pgd;
-	unsigned long next;
-	unsigned long addr = start;
-	int err = 0;
-	int nr = 0;
-
-	BUG_ON(addr >= end);
-	pgd = pgd_offset_k(addr);
-	do {
-		next = pgd_addr_end(addr, end);
-		err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
-		if (err)
-			return err;
-	} while (pgd++, addr = next, addr != end);
-
-	return nr;
+	int err;
+	struct vmap_data vmap = {
+		.pages = pages,
+		.index = 0,
+		.prot = prot
+	};
+	
+	err = apply_to_page_range_batch(&init_mm, start, end - start,
+					vmap_pte, &vmap);
+	
+	return err ? err : vmap.index;
 }
 
 static int vmap_page_range(unsigned long start, unsigned long end,
-- 
1.7.3.4


From 13b0d5b31725090e3d3658003cc648b97b86c5e5 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 15:23:31 -0800
Subject: [PATCH 33/43] xen: drop all the special iomap pte paths.

Xen can work out when we're doing IO mappings for itself, so we don't
need to do anything special, and the extra tests just clog things up.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |   15 ---------------
 1 files changed, 0 insertions(+), 15 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 0e4ecac..304f034 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -607,11 +607,6 @@ static bool xen_page_pinned(void *ptr)
 	return PagePinned(page);
 }
 
-static bool xen_iomap_pte(pte_t pte)
-{
-	return pte_flags(pte) & _PAGE_IOMAP;
-}
-
 void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
 {
 	struct multicall_space mcs;
@@ -630,11 +625,6 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
 }
 EXPORT_SYMBOL_GPL(xen_set_domain_pte);
 
-static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
-{
-	xen_set_domain_pte(ptep, pteval, DOMID_IO);
-}
-
 static void xen_extend_mmu_update(const struct mmu_update *update)
 {
 	struct multicall_space mcs;
@@ -968,11 +958,6 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
 #ifdef CONFIG_X86_PAE
 void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
 {
-	if (xen_iomap_pte(pte)) {
-		xen_set_iomap_pte(ptep, pte);
-		return;
-	}
-
 	set_64bit((u64 *)ptep, native_pte_val(pte));
 }
 
-- 
1.7.3.4


From 48b66c08a1a0c5299ac3c43fa706289877f697c7 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 15:13:34 -0800
Subject: [PATCH 34/43] xen: use mmu_update for xen_set_pte_at()

In principle update_va_mapping is a good match for set_pte_at, since
it gets the address being mapped, which allows Xen to use its linear
pagetable mapping.

However that assumes that the pmd for the address is attached to the
current pagetable, which may not be true for a given user address space
because the kernel pmd is not shared (at least on 32-bit guests).
Normally the kernel will automatically sync a missing part of the
pagetable with the init_mm pagetable transparently via faults, but that
fails when a missing address is passed to Xen.

And while the linear pagetable mapping is very useful for 32-bit Xen
(as it avoids an explicit domain mapping), 32-bit Xen is deprecated.
64-bit Xen has all memory mapped all the time, so it makes no real
difference.

The upshot is that we should use mmu_update, since it can operate on
non-current pagetables or detached pagetables.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |   28 ++++++++++++----------------
 1 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 304f034..fed2a44 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -703,7 +703,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 {
 	if (xen_iomap_pte(pteval)) {
 		xen_set_iomap_pte(ptep, pteval);
-		goto out;
+		return;
 	}
 
 	ADD_STATS(set_pte_at, 1);
@@ -711,22 +711,18 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	ADD_STATS(set_pte_at_current, mm == current->mm);
 	ADD_STATS(set_pte_at_kernel, mm == &init_mm);
 
-	if (mm == current->mm || mm == &init_mm) {
-		if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
-			struct multicall_space mcs;
-			mcs = xen_mc_entry(0);
+	if(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+		struct mmu_update u;
 
-			MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
-			ADD_STATS(set_pte_at_batched, 1);
-			xen_mc_issue(PARAVIRT_LAZY_MMU);
-			goto out;
-		} else
-			if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
-				goto out;
-	}
-	xen_set_pte(ptep, pteval);
+		xen_mc_batch();
+
+		u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
+		u.val = pte_val_ma(pteval);
+		xen_extend_mmu_update(&u);
 
-out:	return;
+		xen_mc_issue(PARAVIRT_LAZY_MMU);
+	} else
+		native_set_pte(ptep, pteval);
 }
 
 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
-- 
1.7.3.4


From 0c478e2cc9f79d83d3dd629e459158bcb77d1f75 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 15:30:41 -0800
Subject: [PATCH 35/43] xen: condense everything onto xen_set_pte

xen_set_pte_at and xen_clear_pte are essentially identical to
xen_set_pte, so just make them all common.

When batched set_pte and pte_clear are the same, but the unbatch operation
must be different: they need to update the two halves of the pte in
different order.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |   81 ++++++++++++++++-----------------------------------
 1 files changed, 26 insertions(+), 55 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index fed2a44..45fe925 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -108,12 +108,6 @@ static struct {
 
 	u32 prot_commit;
 	u32 prot_commit_batched;
-
-	u32 set_pte_at;
-	u32 set_pte_at_batched;
-	u32 set_pte_at_pinned;
-	u32 set_pte_at_current;
-	u32 set_pte_at_kernel;
 } mmu_stats;
 
 static u8 zero_stats;
@@ -698,33 +692,39 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
 	set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
 }
 
-void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
-		    pte_t *ptep, pte_t pteval)
+static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
 {
-	if (xen_iomap_pte(pteval)) {
-		xen_set_iomap_pte(ptep, pteval);
-		return;
-	}
+	struct mmu_update u;
 
-	ADD_STATS(set_pte_at, 1);
-//	ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
-	ADD_STATS(set_pte_at_current, mm == current->mm);
-	ADD_STATS(set_pte_at_kernel, mm == &init_mm);
+	if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU)
+		return false;
 
-	if(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
-		struct mmu_update u;
+	xen_mc_batch();
 
-		xen_mc_batch();
+	u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
+	u.val = pte_val_ma(pteval);
+	xen_extend_mmu_update(&u);
 
-		u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
-		u.val = pte_val_ma(pteval);
-		xen_extend_mmu_update(&u);
+	xen_mc_issue(PARAVIRT_LAZY_MMU);
 
-		xen_mc_issue(PARAVIRT_LAZY_MMU);
-	} else
+	return true;
+}
+
+void xen_set_pte(pte_t *ptep, pte_t pteval)
+{
+	ADD_STATS(pte_update, 1);
+//	ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
+
+	if (!xen_batched_set_pte(ptep, pteval))
 		native_set_pte(ptep, pteval);
 }
 
+void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
+		    pte_t *ptep, pte_t pteval)
+{
+	xen_set_pte(ptep, pteval);
+}
+
 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
 				 unsigned long addr, pte_t *ptep)
 {
@@ -931,26 +931,6 @@ void xen_set_pud(pud_t *ptr, pud_t val)
 	xen_set_pud_hyper(ptr, val);
 }
 
-void xen_set_pte(pte_t *ptep, pte_t pte)
-{
-	if (xen_iomap_pte(pte)) {
-		xen_set_iomap_pte(ptep, pte);
-		return;
-	}
-
-	ADD_STATS(pte_update, 1);
-//	ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
-	ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-
-#ifdef CONFIG_X86_PAE
-	ptep->pte_high = pte.pte_high;
-	smp_wmb();
-	ptep->pte_low = pte.pte_low;
-#else
-	*ptep = pte;
-#endif
-}
-
 #ifdef CONFIG_X86_PAE
 void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
 {
@@ -959,9 +939,8 @@ void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
 
 void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-	ptep->pte_low = 0;
-	smp_wmb();		/* make sure low gets written first */
-	ptep->pte_high = 0;
+	if (!xen_batched_set_pte(ptep, native_make_pte(0)))
+		native_pte_clear(mm, addr, ptep);
 }
 
 void xen_pmd_clear(pmd_t *pmdp)
@@ -2683,14 +2662,6 @@ static int __init xen_mmu_debugfs(void)
 	xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug,
 				     mmu_stats.mmu_update_histo, 20);
 
-	debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at);
-	debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug,
-			   &mmu_stats.set_pte_at_batched);
-	debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug,
-			   &mmu_stats.set_pte_at_current);
-	debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug,
-			   &mmu_stats.set_pte_at_kernel);
-
 	debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit);
 	debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
 			   &mmu_stats.prot_commit_batched);
-- 
1.7.3.4


From ecccf707c47cad4d6c4b16323d7e7ad97a2d3ab8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 15:44:04 -0800
Subject: [PATCH 36/43] xen/mmu: use apply_to_page_range_batch in xen_remap_domain_mfn_range

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |   18 +++++++++++-------
 1 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 45fe925..8bcb934 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2559,15 +2559,19 @@ struct remap_data {
 	struct mmu_update *mmu_update;
 };
 
-static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
+static int remap_area_mfn_pte_fn(pte_t *ptep, unsigned count, pgtable_t token,
 				 unsigned long addr, void *data)
 {
 	struct remap_data *rmd = data;
-	pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
 
-	rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr;
-	rmd->mmu_update->val = pte_val_ma(pte);
-	rmd->mmu_update++;
+	while (count--) {
+		pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
+
+		rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr;
+		rmd->mmu_update->val = pte_val_ma(pte);
+		rmd->mmu_update++;
+		ptep++;
+	}
 
 	return 0;
 }
@@ -2595,8 +2599,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 		range = (unsigned long)batch << PAGE_SHIFT;
 
 		rmd.mmu_update = mmu_update;
-		err = apply_to_page_range(vma->vm_mm, addr, range,
-					  remap_area_mfn_pte_fn, &rmd);
+		err = apply_to_page_range_batch(vma->vm_mm, addr, range,
+						remap_area_mfn_pte_fn, &rmd);
 		if (err)
 			goto out;
 
-- 
1.7.3.4


From aa3ac7d19a3b17e4eeadea90626f707a9d9414e0 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 15:45:21 -0800
Subject: [PATCH 37/43] vmalloc: use apply_to_page_range_batch() in alloc_vm_area()

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 mm/vmalloc.c |    8 ++++----
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e8d2025..2bd4198 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1991,9 +1991,9 @@ void  __attribute__((weak)) vmalloc_sync_all(void)
 }
 
 
-static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
+static int f(pte_t *pte, unsigned count, pgtable_t table, unsigned long addr, void *data)
 {
-	/* apply_to_page_range() does all the hard work. */
+	/* apply_to_page_range_batch() does all the hard work. */
 	return 0;
 }
 
@@ -2022,8 +2022,8 @@ struct vm_struct *alloc_vm_area(size_t size)
 	 * This ensures that page tables are constructed for this region
 	 * of kernel virtual address space and mapped into init_mm.
 	 */
-	if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
-				area->size, f, NULL)) {
+	if (apply_to_page_range_batch(&init_mm, (unsigned long)area->addr,
+				      area->size, f, NULL)) {
 		free_vm_area(area);
 		return NULL;
 	}
-- 
1.7.3.4


From 2c634d12dc6f2e77560440d6ae0dfc91f9050b55 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 15:45:48 -0800
Subject: [PATCH 38/43] vmalloc: remove vmalloc_sync_all() from alloc_vm_area()

There's no need for it: it will get faulted into the current pagetable
as needed.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 mm/vmalloc.c |    4 ----
 1 files changed, 0 insertions(+), 4 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2bd4198..69d9c5e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2028,10 +2028,6 @@ struct vm_struct *alloc_vm_area(size_t size)
 		return NULL;
 	}
 
-	/* Make sure the pagetables are constructed in process kernel
-	   mappings */
-	vmalloc_sync_all();
-
 	return area;
 }
 EXPORT_SYMBOL_GPL(alloc_vm_area);
-- 
1.7.3.4


From 0e652485dc1a55ff4777bd0c0ce85fa290a02bd6 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 15:50:12 -0800
Subject: [PATCH 39/43] xen/grant-table: use apply_to_page_range_batch

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/grant-table.c |   28 ++++++++++++++++++----------
 1 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 49ba9b5..9cdb35e 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -44,21 +44,29 @@
 
 #include <asm/pgtable.h>
 
-static int map_pte_fn(pte_t *pte, struct page *pmd_page,
+static int map_pte_fn(pte_t *pte, unsigned count, struct page *pmd_page,
 		      unsigned long addr, void *data)
 {
 	unsigned long **frames = (unsigned long **)data;
 
-	set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
-	(*frames)++;
+	while (count--) {
+		set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
+		(*frames)++;
+		pte++;
+		addr += PAGE_SIZE;
+	}
 	return 0;
 }
 
-static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
+static int unmap_pte_fn(pte_t *pte, unsigned count, struct page *pmd_page,
 			unsigned long addr, void *data)
 {
+	while (count--) {
+		pte_clear(&init_mm, addr, pte);
+		addr += PAGE_SIZE;
+		pte++;
+	}
 
-	set_pte_at(&init_mm, addr, pte, __pte(0));
 	return 0;
 }
 
@@ -77,15 +85,15 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
 		*__shared = shared;
 	}
 
-	rc = apply_to_page_range(&init_mm, (unsigned long)shared,
-				 PAGE_SIZE * nr_gframes,
-				 map_pte_fn, &frames);
+	rc = apply_to_page_range_batch(&init_mm, (unsigned long)shared,
+				       PAGE_SIZE * nr_gframes,
+				       map_pte_fn, &frames);
 	return rc;
 }
 
 void arch_gnttab_unmap_shared(struct grant_entry *shared,
 			      unsigned long nr_gframes)
 {
-	apply_to_page_range(&init_mm, (unsigned long)shared,
-			    PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
+	apply_to_page_range_batch(&init_mm, (unsigned long)shared,
+				  PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
 }
-- 
1.7.3.4


From ac527c664a6f52fd780fd324deb0b018d4a4a357 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 3 Nov 2010 16:40:48 -0400
Subject: [PATCH 41/43] xen/events: only unmask irq if enabled

A dynirq EOI is an unmask, but we should only unmask if the
irq is logically enabled.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/events.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 0009e48..4dcdbd9 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1253,10 +1253,11 @@ static void disable_dynirq(unsigned int irq)
 static void ack_dynirq(unsigned int irq)
 {
 	int evtchn = evtchn_from_irq(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
 
 	move_masked_irq(irq);
 
-	if (VALID_EVTCHN(evtchn))
+	if (VALID_EVTCHN(evtchn) && !(desc->status & IRQ_DISABLED))
 		unmask_evtchn(evtchn);
 }
 
-- 
1.7.3.4


From 7a3ab024f242ecf26af9b5b0cce1ce2d9985ce19 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 5 Nov 2010 22:53:52 -0700
Subject: [PATCH 42/43] xen: make FOREIGN_FRAME_BIT high bit on 32 and 64

FOREIGN_FRAME_BIT is supposed to be the MSB of an MFN, which is an
unsigned long.  Since this changes between 32 and 64 bit, the definition
needs to be flexible.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/xen/page.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 8760cc6..05c5cf5 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -29,7 +29,7 @@ typedef struct xpaddr {
 
 /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
 #define INVALID_P2M_ENTRY	(~0UL)
-#define FOREIGN_FRAME_BIT	(1UL<<31)
+#define FOREIGN_FRAME_BIT	(1UL << (sizeof(unsigned long) * 8 - 1))
 #define FOREIGN_FRAME(m)	((m) | FOREIGN_FRAME_BIT)
 
 /* Maximum amount of memory we can handle in a domain in pages */
-- 
1.7.3.4