From 1f6c23a1b72c669a4d9f5713ed4bea0b1ab72505 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Jan 25 2011 21:25:53 +0000 Subject: Switch to xen/next-2.6.38 which adds net and pci backends add some memory fixes --- diff --git a/config-local b/config-local index 8c32be5..8f91cda 100644 --- a/config-local +++ b/config-local @@ -1,2 +1,5 @@ # This file is intentionally left empty in the stock kernel. Its a nicety # added for those wanting to do custom rebuilds with altered config opts. +CONFIG_XEN_NETDEV_BACKEND=m +CONFIG_XEN_PCIDEV_BACKEND=m +CONFIG_XEN_PCIDEV_BE_DEBUG=n diff --git a/kernel.spec b/kernel.spec index b6050f0..4bd59bb 100644 --- a/kernel.spec +++ b/kernel.spec @@ -737,12 +737,12 @@ Patch12421: fs-call-security_d_instantiate-in-d_obtain_alias.patch # Xen patches # git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git branches -Patch20000: xen.next-2.6.37.patch +Patch20000: xen.next-2.6.38.patch #Patch20001: xen.upstream.core.patch # git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git branches -Patch20005: xen.pcifront.fixes.patch +#Patch20005: xen.pcifront.fixes.patch # git://xenbits.xen.org/people/sstabellini/linux-pvhvm branches -#Patch20010: xen.pvhvm.fixes.patch +Patch20010: xen.pvhvm.fixes.patch %endif @@ -1357,9 +1357,9 @@ ApplyPatch dmar-disable-when-ricoh-multifunction.patch ApplyPatch fs-call-security_d_instantiate-in-d_obtain_alias.patch # Xen patches -ApplyPatch xen.next-2.6.37.patch +ApplyPatch xen.next-2.6.38.patch #ApplyPatch xen.upstream.core.patch -ApplyPatch xen.pcifront.fixes.patch +#ApplyPatch xen.pcifront.fixes.patch #ApplyPatch xen.pvhvm.fixes.patch # END OF PATCH APPLICATIONS @@ -1974,6 +1974,13 @@ fi # ||----w | # || || %changelog +* Tue Jan 25 2011 Michael Young +- Switch from xen/next-2.6.37 to xen/next-2.6.38 + which adds XEN_NETDEV_BACKEND and XEN_PCIDEV_BACKEND +- comment out xen.pcifront.fixes.patch (patches are in next-2.6.38) +- put 2.6.38-rc1-memory-fixes branch in xen.pvhvm.fixes.patch + for some memory fixes including a later version of the crash on boot patch + * Tue Jan 25 2011 Kyle McMartin 2.6.38-0.rc2.git3.1 - Linux 2.6.38-rc2-git3 - perf-gcc460-build-fixes.patch: fix context from [9486aa38] diff --git a/xen.next-2.6.38.patch b/xen.next-2.6.38.patch new file mode 100644 index 0000000..c8a28e1 --- /dev/null +++ b/xen.next-2.6.38.patch @@ -0,0 +1,30821 @@ +From 1e13f505ecbc011465783283ebfa05a42f7ce18f Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Thu, 3 Dec 2009 22:04:06 +0000 +Subject: [PATCH 001/139] xen: export xen_gsi_from_irq, it is required by modular pciback + +Signed-off-by: Ian Campbell +Cc: Jeremy Fitzhardinge +Cc: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/events.c | 1 + + 1 files changed, 1 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/events.c b/drivers/xen/events.c +index 97612f5..a04da4b 100644 +--- a/drivers/xen/events.c ++++ b/drivers/xen/events.c +@@ -778,6 +778,7 @@ int xen_gsi_from_irq(unsigned irq) + { + return gsi_from_irq(irq); + } ++EXPORT_SYMBOL_GPL(xen_gsi_from_irq); + + int xen_irq_from_pirq(unsigned pirq) + { +-- +1.7.3.4 + + +From f0885b9401a859bc7bed849925a703c03d00119b Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 8 Nov 2010 14:13:35 -0500 +Subject: [PATCH 002/139] xen/pci: Add xen_[find|register|unregister]_device_domain_owner functions. + +Xen PCI backend performs ownership (MSI/MSI-X) changes on the behalf of +the guest. This means we need some mechanism to find, set and unset +the domain id of the guest. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + arch/x86/include/asm/xen/pci.h | 16 +++++++++ + arch/x86/pci/xen.c | 73 ++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 89 insertions(+), 0 deletions(-) + +diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h +index 2329b3e..8474b4b 100644 +--- a/arch/x86/include/asm/xen/pci.h ++++ b/arch/x86/include/asm/xen/pci.h +@@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void) + #endif + #if defined(CONFIG_XEN_DOM0) + void __init xen_setup_pirqs(void); ++int xen_find_device_domain_owner(struct pci_dev *dev); ++int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); ++int xen_unregister_device_domain_owner(struct pci_dev *dev); + #else + static inline void __init xen_setup_pirqs(void) + { + } ++static inline int xen_find_device_domain_owner(struct pci_dev *dev) ++{ ++ return -1; ++} ++static inline int xen_register_device_domain_owner(struct pci_dev *dev, ++ uint16_t domain) ++{ ++ return -1; ++} ++static inline int xen_unregister_device_domain_owner(struct pci_dev *dev) ++{ ++ return -1; ++} + #endif + + #if defined(CONFIG_PCI_MSI) +diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c +index 117f5b8..6d2a986 100644 +--- a/arch/x86/pci/xen.c ++++ b/arch/x86/pci/xen.c +@@ -412,3 +412,76 @@ void __init xen_setup_pirqs(void) + } + } + #endif ++ ++struct xen_device_domain_owner { ++ domid_t domain; ++ struct pci_dev *dev; ++ struct list_head list; ++}; ++ ++static DEFINE_SPINLOCK(dev_domain_list_spinlock); ++static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list); ++ ++static struct xen_device_domain_owner *find_device(struct pci_dev *dev) ++{ ++ struct xen_device_domain_owner *owner; ++ ++ list_for_each_entry(owner, &dev_domain_list, list) { ++ if (owner->dev == dev) ++ return owner; ++ } ++ return NULL; ++} ++ ++int xen_find_device_domain_owner(struct pci_dev *dev) ++{ ++ struct xen_device_domain_owner *owner; ++ int domain = -ENODEV; ++ ++ spin_lock(&dev_domain_list_spinlock); ++ owner = find_device(dev); ++ if (owner) ++ domain = owner->domain; ++ spin_unlock(&dev_domain_list_spinlock); ++ return domain; ++} ++EXPORT_SYMBOL(xen_find_device_domain_owner); ++ ++int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain) ++{ ++ struct xen_device_domain_owner *owner; ++ ++ owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL); ++ if (!owner) ++ return -ENODEV; ++ ++ spin_lock(&dev_domain_list_spinlock); ++ if (find_device(dev)) { ++ spin_unlock(&dev_domain_list_spinlock); ++ kfree(owner); ++ return -EEXIST; ++ } ++ owner->domain = domain; ++ owner->dev = dev; ++ list_add_tail(&owner->list, &dev_domain_list); ++ spin_unlock(&dev_domain_list_spinlock); ++ return 0; ++} ++EXPORT_SYMBOL(xen_register_device_domain_owner); ++ ++int xen_unregister_device_domain_owner(struct pci_dev *dev) ++{ ++ struct xen_device_domain_owner *owner; ++ ++ spin_lock(&dev_domain_list_spinlock); ++ owner = find_device(dev); ++ if (!owner) { ++ spin_unlock(&dev_domain_list_spinlock); ++ return -ENODEV; ++ } ++ list_del(&owner->list); ++ spin_unlock(&dev_domain_list_spinlock); ++ kfree(owner); ++ return 0; ++} ++EXPORT_SYMBOL(xen_unregister_device_domain_owner); +-- +1.7.3.4 + + +From da24916fdf04d7b4a32c5b9d2c09e47775496e1d Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 8 Nov 2010 14:23:17 -0500 +Subject: [PATCH 003/139] xen: Check if the PCI device is owned by a domain different than DOMID_SELF. + +We check if there is a domain owner for the PCI device. In case of failure +(meaning no domain has registered for this device) we make +DOMID_SELF the owner. + +[v2: deal with rebasing on v2.6.37-1] +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +Acked-by: Xiantao Zhang +--- + drivers/xen/events.c | 16 +++++++++++++--- + 1 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/drivers/xen/events.c b/drivers/xen/events.c +index a04da4b..96c93e7 100644 +--- a/drivers/xen/events.c ++++ b/drivers/xen/events.c +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -97,6 +98,7 @@ struct irq_info + unsigned short gsi; + unsigned char vector; + unsigned char flags; ++ uint16_t domid; + } pirq; + } u; + }; +@@ -158,7 +160,8 @@ static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq, + { + return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn, + .cpu = 0, +- .u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } }; ++ .u.pirq = { .pirq = pirq, .gsi = gsi, ++ .vector = vector, .domid = DOMID_SELF } }; + } + + /* +@@ -688,11 +691,16 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) + int irq = -1; + struct physdev_map_pirq map_irq; + int rc; ++ domid_t domid; + int pos; + u32 table_offset, bir; + ++ domid = rc = xen_find_device_domain_owner(dev); ++ if (rc < 0) ++ domid = DOMID_SELF; ++ + memset(&map_irq, 0, sizeof(map_irq)); +- map_irq.domid = DOMID_SELF; ++ map_irq.domid = domid; + map_irq.type = MAP_PIRQ_TYPE_MSI; + map_irq.index = -1; + map_irq.pirq = -1; +@@ -727,6 +735,8 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) + goto out; + } + irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index); ++ if (domid) ++ irq_info[irq].u.pirq.domid = domid; + + set_irq_chip_and_handler_name(irq, &xen_pirq_chip, + handle_level_irq, +@@ -753,7 +763,7 @@ int xen_destroy_irq(int irq) + + if (xen_initial_domain()) { + unmap_irq.pirq = info->u.pirq.pirq; +- unmap_irq.domid = DOMID_SELF; ++ unmap_irq.domid = info->u.pirq.domid; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); + if (rc) { + printk(KERN_WARNING "unmap irq failed %d\n", rc); +-- +1.7.3.4 + + +From 30fecb8166bdd163bdaab795b573cf988f60fbbe Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 8 Nov 2010 14:26:36 -0500 +Subject: [PATCH 004/139] xen: Add support to check if IRQ line is shared with other domains. + +We do this via the PHYSDEVOP_irq_status_query support hypervisor call. +We will get a positive value if another domain has binded its +PIRQ to the specified IRQ line. + +[v2: Deal with v2.6.37-rc1 rebase fallout] +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/events.c | 13 +++++++++++++ + include/xen/events.h | 3 +++ + 2 files changed, 16 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/events.c b/drivers/xen/events.c +index 96c93e7..690dfad 100644 +--- a/drivers/xen/events.c ++++ b/drivers/xen/events.c +@@ -1398,6 +1398,19 @@ void xen_poll_irq(int irq) + xen_poll_irq_timeout(irq, 0 /* no timeout */); + } + ++/* Check whether the IRQ line is shared with other guests. */ ++int xen_ignore_irq(int irq) ++{ ++ struct irq_info *info = info_for_irq(irq); ++ struct physdev_irq_status_query irq_status = { .irq = ++ info->u.pirq.gsi }; ++ ++ if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) ++ return 0; ++ return !(irq_status.flags & XENIRQSTAT_shared); ++} ++EXPORT_SYMBOL_GPL(xen_ignore_irq); ++ + void xen_irq_resume(void) + { + unsigned int cpu, irq, evtchn; +diff --git a/include/xen/events.h b/include/xen/events.h +index 646dd17..553c664 100644 +--- a/include/xen/events.h ++++ b/include/xen/events.h +@@ -89,4 +89,7 @@ int xen_vector_from_irq(unsigned pirq); + /* Return irq from pirq */ + int xen_irq_from_pirq(unsigned pirq); + ++/* Determine whether to ignore this IRQ if passed to a guest. */ ++int xen_ignore_irq(int irq); ++ + #endif /* _XEN_EVENTS_H */ +-- +1.7.3.4 + + +From 909e45104de4414897cefce2f6bbed07fc4de4b3 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Mon, 9 Feb 2009 12:05:50 -0800 +Subject: [PATCH 005/139] xen: implement bind_interdomain_evtchn_to_irqhandler for backend drivers + +Impact: new Xen-internal API + +Signed-off-by: Ian Campbell +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/events.c | 38 ++++++++++++++++++++++++++++++++++++++ + include/xen/events.h | 6 ++++++ + 2 files changed, 44 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/events.c b/drivers/xen/events.c +index 690dfad..95eea13 100644 +--- a/drivers/xen/events.c ++++ b/drivers/xen/events.c +@@ -849,6 +849,21 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) + return irq; + } + ++static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, ++ unsigned int remote_port) ++{ ++ struct evtchn_bind_interdomain bind_interdomain; ++ int err; ++ ++ bind_interdomain.remote_dom = remote_domain; ++ bind_interdomain.remote_port = remote_port; ++ ++ err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, ++ &bind_interdomain); ++ ++ return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); ++} ++ + + int bind_virq_to_irq(unsigned int virq, unsigned int cpu) + { +@@ -944,6 +959,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, + } + EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); + ++int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, ++ unsigned int remote_port, ++ irq_handler_t handler, ++ unsigned long irqflags, ++ const char *devname, ++ void *dev_id) ++{ ++ int irq, retval; ++ ++ irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); ++ if (irq < 0) ++ return irq; ++ ++ retval = request_irq(irq, handler, irqflags, devname, dev_id); ++ if (retval != 0) { ++ unbind_from_irq(irq); ++ return retval; ++ } ++ ++ return irq; ++} ++EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); ++ + int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, + irq_handler_t handler, + unsigned long irqflags, const char *devname, void *dev_id) +diff --git a/include/xen/events.h b/include/xen/events.h +index 553c664..2fe1644 100644 +--- a/include/xen/events.h ++++ b/include/xen/events.h +@@ -23,6 +23,12 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, + unsigned long irqflags, + const char *devname, + void *dev_id); ++int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, ++ unsigned int remote_port, ++ irq_handler_t handler, ++ unsigned long irqflags, ++ const char *devname, ++ void *dev_id); + + /* + * Common unbind function for all event sources. Takes IRQ to unbind from. +-- +1.7.3.4 + + +From b4f664c8de09ab8537e1cd194df29056f803062e Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 8 Nov 2010 14:46:33 -0500 +Subject: [PATCH 006/139] pci/xen: Make xen_[find|register|unregister]_domain_owner be _GPL + +EXPORT_SYMBOL -> EXPORT_SYMBOL_GPL. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + arch/x86/pci/xen.c | 6 +++--- + 1 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c +index 6d2a986..0fa23c8 100644 +--- a/arch/x86/pci/xen.c ++++ b/arch/x86/pci/xen.c +@@ -445,7 +445,7 @@ int xen_find_device_domain_owner(struct pci_dev *dev) + spin_unlock(&dev_domain_list_spinlock); + return domain; + } +-EXPORT_SYMBOL(xen_find_device_domain_owner); ++EXPORT_SYMBOL_GPL(xen_find_device_domain_owner); + + int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain) + { +@@ -467,7 +467,7 @@ int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain) + spin_unlock(&dev_domain_list_spinlock); + return 0; + } +-EXPORT_SYMBOL(xen_register_device_domain_owner); ++EXPORT_SYMBOL_GPL(xen_register_device_domain_owner); + + int xen_unregister_device_domain_owner(struct pci_dev *dev) + { +@@ -484,4 +484,4 @@ int xen_unregister_device_domain_owner(struct pci_dev *dev) + kfree(owner); + return 0; + } +-EXPORT_SYMBOL(xen_unregister_device_domain_owner); ++EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner); +-- +1.7.3.4 + + +From 443b2aafbdb509f218fcb8f4665f063e3a5e1a92 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Tue, 13 Oct 2009 17:22:20 -0400 +Subject: [PATCH 007/139] xen-pciback: Initial copy from linux-2.6.18.hg off pciback driver. + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/Makefile | 17 + + drivers/xen/pciback/conf_space.c | 435 ++++++++ + drivers/xen/pciback/conf_space.h | 126 +++ + drivers/xen/pciback/conf_space_capability.c | 69 ++ + drivers/xen/pciback/conf_space_capability.h | 23 + + drivers/xen/pciback/conf_space_capability_msi.c | 79 ++ + drivers/xen/pciback/conf_space_capability_pm.c | 126 +++ + drivers/xen/pciback/conf_space_capability_vpd.c | 40 + + drivers/xen/pciback/conf_space_header.c | 317 ++++++ + drivers/xen/pciback/conf_space_quirks.c | 138 +++ + drivers/xen/pciback/conf_space_quirks.h | 35 + + drivers/xen/pciback/controller.c | 443 ++++++++ + drivers/xen/pciback/passthrough.c | 176 +++ + drivers/xen/pciback/pci_stub.c | 1316 +++++++++++++++++++++++ + drivers/xen/pciback/pciback.h | 126 +++ + drivers/xen/pciback/pciback_ops.c | 134 +++ + drivers/xen/pciback/slot.c | 187 ++++ + drivers/xen/pciback/vpci.c | 242 +++++ + drivers/xen/pciback/xenbus.c | 710 ++++++++++++ + 19 files changed, 4739 insertions(+), 0 deletions(-) + create mode 100644 drivers/xen/pciback/Makefile + create mode 100644 drivers/xen/pciback/conf_space.c + create mode 100644 drivers/xen/pciback/conf_space.h + create mode 100644 drivers/xen/pciback/conf_space_capability.c + create mode 100644 drivers/xen/pciback/conf_space_capability.h + create mode 100644 drivers/xen/pciback/conf_space_capability_msi.c + create mode 100644 drivers/xen/pciback/conf_space_capability_pm.c + create mode 100644 drivers/xen/pciback/conf_space_capability_vpd.c + create mode 100644 drivers/xen/pciback/conf_space_header.c + create mode 100644 drivers/xen/pciback/conf_space_quirks.c + create mode 100644 drivers/xen/pciback/conf_space_quirks.h + create mode 100644 drivers/xen/pciback/controller.c + create mode 100644 drivers/xen/pciback/passthrough.c + create mode 100644 drivers/xen/pciback/pci_stub.c + create mode 100644 drivers/xen/pciback/pciback.h + create mode 100644 drivers/xen/pciback/pciback_ops.c + create mode 100644 drivers/xen/pciback/slot.c + create mode 100644 drivers/xen/pciback/vpci.c + create mode 100644 drivers/xen/pciback/xenbus.c + +diff --git a/drivers/xen/pciback/Makefile b/drivers/xen/pciback/Makefile +new file mode 100644 +index 0000000..106dae7 +--- /dev/null ++++ b/drivers/xen/pciback/Makefile +@@ -0,0 +1,17 @@ ++obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o ++ ++pciback-y := pci_stub.o pciback_ops.o xenbus.o ++pciback-y += conf_space.o conf_space_header.o \ ++ conf_space_capability.o \ ++ conf_space_capability_vpd.o \ ++ conf_space_capability_pm.o \ ++ conf_space_quirks.o ++pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o ++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o ++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o ++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o ++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o ++ ++ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y) ++EXTRA_CFLAGS += -DDEBUG ++endif +diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c +new file mode 100644 +index 0000000..0c76db1 +--- /dev/null ++++ b/drivers/xen/pciback/conf_space.c +@@ -0,0 +1,435 @@ ++/* ++ * PCI Backend - Functions for creating a virtual configuration space for ++ * exported PCI Devices. ++ * It's dangerous to allow PCI Driver Domains to change their ++ * device's resources (memory, i/o ports, interrupts). We need to ++ * restrict changes to certain PCI Configuration registers: ++ * BARs, INTERRUPT_PIN, most registers in the header... ++ * ++ * Author: Ryan Wilson ++ */ ++ ++#include ++#include ++#include "pciback.h" ++#include "conf_space.h" ++#include "conf_space_quirks.h" ++ ++static int permissive; ++module_param(permissive, bool, 0644); ++ ++#define DEFINE_PCI_CONFIG(op,size,type) \ ++int pciback_##op##_config_##size \ ++(struct pci_dev *dev, int offset, type value, void *data) \ ++{ \ ++ return pci_##op##_config_##size (dev, offset, value); \ ++} ++ ++DEFINE_PCI_CONFIG(read, byte, u8 *) ++DEFINE_PCI_CONFIG(read, word, u16 *) ++DEFINE_PCI_CONFIG(read, dword, u32 *) ++ ++DEFINE_PCI_CONFIG(write, byte, u8) ++DEFINE_PCI_CONFIG(write, word, u16) ++DEFINE_PCI_CONFIG(write, dword, u32) ++ ++static int conf_space_read(struct pci_dev *dev, ++ const struct config_field_entry *entry, ++ int offset, u32 *value) ++{ ++ int ret = 0; ++ const struct config_field *field = entry->field; ++ ++ *value = 0; ++ ++ switch (field->size) { ++ case 1: ++ if (field->u.b.read) ++ ret = field->u.b.read(dev, offset, (u8 *) value, ++ entry->data); ++ break; ++ case 2: ++ if (field->u.w.read) ++ ret = field->u.w.read(dev, offset, (u16 *) value, ++ entry->data); ++ break; ++ case 4: ++ if (field->u.dw.read) ++ ret = field->u.dw.read(dev, offset, value, entry->data); ++ break; ++ } ++ return ret; ++} ++ ++static int conf_space_write(struct pci_dev *dev, ++ const struct config_field_entry *entry, ++ int offset, u32 value) ++{ ++ int ret = 0; ++ const struct config_field *field = entry->field; ++ ++ switch (field->size) { ++ case 1: ++ if (field->u.b.write) ++ ret = field->u.b.write(dev, offset, (u8) value, ++ entry->data); ++ break; ++ case 2: ++ if (field->u.w.write) ++ ret = field->u.w.write(dev, offset, (u16) value, ++ entry->data); ++ break; ++ case 4: ++ if (field->u.dw.write) ++ ret = field->u.dw.write(dev, offset, value, ++ entry->data); ++ break; ++ } ++ return ret; ++} ++ ++static inline u32 get_mask(int size) ++{ ++ if (size == 1) ++ return 0xff; ++ else if (size == 2) ++ return 0xffff; ++ else ++ return 0xffffffff; ++} ++ ++static inline int valid_request(int offset, int size) ++{ ++ /* Validate request (no un-aligned requests) */ ++ if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0) ++ return 1; ++ return 0; ++} ++ ++static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask, ++ int offset) ++{ ++ if (offset >= 0) { ++ new_val_mask <<= (offset * 8); ++ new_val <<= (offset * 8); ++ } else { ++ new_val_mask >>= (offset * -8); ++ new_val >>= (offset * -8); ++ } ++ val = (val & ~new_val_mask) | (new_val & new_val_mask); ++ ++ return val; ++} ++ ++static int pcibios_err_to_errno(int err) ++{ ++ switch (err) { ++ case PCIBIOS_SUCCESSFUL: ++ return XEN_PCI_ERR_success; ++ case PCIBIOS_DEVICE_NOT_FOUND: ++ return XEN_PCI_ERR_dev_not_found; ++ case PCIBIOS_BAD_REGISTER_NUMBER: ++ return XEN_PCI_ERR_invalid_offset; ++ case PCIBIOS_FUNC_NOT_SUPPORTED: ++ return XEN_PCI_ERR_not_implemented; ++ case PCIBIOS_SET_FAILED: ++ return XEN_PCI_ERR_access_denied; ++ } ++ return err; ++} ++ ++int pciback_config_read(struct pci_dev *dev, int offset, int size, ++ u32 * ret_val) ++{ ++ int err = 0; ++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); ++ const struct config_field_entry *cfg_entry; ++ const struct config_field *field; ++ int req_start, req_end, field_start, field_end; ++ /* if read fails for any reason, return 0 (as if device didn't respond) */ ++ u32 value = 0, tmp_val; ++ ++ if (unlikely(verbose_request)) ++ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n", ++ pci_name(dev), size, offset); ++ ++ if (!valid_request(offset, size)) { ++ err = XEN_PCI_ERR_invalid_offset; ++ goto out; ++ } ++ ++ /* Get the real value first, then modify as appropriate */ ++ switch (size) { ++ case 1: ++ err = pci_read_config_byte(dev, offset, (u8 *) & value); ++ break; ++ case 2: ++ err = pci_read_config_word(dev, offset, (u16 *) & value); ++ break; ++ case 4: ++ err = pci_read_config_dword(dev, offset, &value); ++ break; ++ } ++ ++ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { ++ field = cfg_entry->field; ++ ++ req_start = offset; ++ req_end = offset + size; ++ field_start = OFFSET(cfg_entry); ++ field_end = OFFSET(cfg_entry) + field->size; ++ ++ if ((req_start >= field_start && req_start < field_end) ++ || (req_end > field_start && req_end <= field_end)) { ++ err = conf_space_read(dev, cfg_entry, field_start, ++ &tmp_val); ++ if (err) ++ goto out; ++ ++ value = merge_value(value, tmp_val, ++ get_mask(field->size), ++ field_start - req_start); ++ } ++ } ++ ++ out: ++ if (unlikely(verbose_request)) ++ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n", ++ pci_name(dev), size, offset, value); ++ ++ *ret_val = value; ++ return pcibios_err_to_errno(err); ++} ++ ++int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value) ++{ ++ int err = 0, handled = 0; ++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); ++ const struct config_field_entry *cfg_entry; ++ const struct config_field *field; ++ u32 tmp_val; ++ int req_start, req_end, field_start, field_end; ++ ++ if (unlikely(verbose_request)) ++ printk(KERN_DEBUG ++ "pciback: %s: write request %d bytes at 0x%x = %x\n", ++ pci_name(dev), size, offset, value); ++ ++ if (!valid_request(offset, size)) ++ return XEN_PCI_ERR_invalid_offset; ++ ++ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { ++ field = cfg_entry->field; ++ ++ req_start = offset; ++ req_end = offset + size; ++ field_start = OFFSET(cfg_entry); ++ field_end = OFFSET(cfg_entry) + field->size; ++ ++ if ((req_start >= field_start && req_start < field_end) ++ || (req_end > field_start && req_end <= field_end)) { ++ tmp_val = 0; ++ ++ err = pciback_config_read(dev, field_start, ++ field->size, &tmp_val); ++ if (err) ++ break; ++ ++ tmp_val = merge_value(tmp_val, value, get_mask(size), ++ req_start - field_start); ++ ++ err = conf_space_write(dev, cfg_entry, field_start, ++ tmp_val); ++ ++ /* handled is set true here, but not every byte ++ * may have been written! Properly detecting if ++ * every byte is handled is unnecessary as the ++ * flag is used to detect devices that need ++ * special helpers to work correctly. ++ */ ++ handled = 1; ++ } ++ } ++ ++ if (!handled && !err) { ++ /* By default, anything not specificially handled above is ++ * read-only. The permissive flag changes this behavior so ++ * that anything not specifically handled above is writable. ++ * This means that some fields may still be read-only because ++ * they have entries in the config_field list that intercept ++ * the write and do nothing. */ ++ if (dev_data->permissive || permissive) { ++ switch (size) { ++ case 1: ++ err = pci_write_config_byte(dev, offset, ++ (u8) value); ++ break; ++ case 2: ++ err = pci_write_config_word(dev, offset, ++ (u16) value); ++ break; ++ case 4: ++ err = pci_write_config_dword(dev, offset, ++ (u32) value); ++ break; ++ } ++ } else if (!dev_data->warned_on_write) { ++ dev_data->warned_on_write = 1; ++ dev_warn(&dev->dev, "Driver tried to write to a " ++ "read-only configuration space field at offset " ++ "0x%x, size %d. This may be harmless, but if " ++ "you have problems with your device:\n" ++ "1) see permissive attribute in sysfs\n" ++ "2) report problems to the xen-devel " ++ "mailing list along with details of your " ++ "device obtained from lspci.\n", offset, size); ++ } ++ } ++ ++ return pcibios_err_to_errno(err); ++} ++ ++void pciback_config_free_dyn_fields(struct pci_dev *dev) ++{ ++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); ++ struct config_field_entry *cfg_entry, *t; ++ const struct config_field *field; ++ ++ dev_dbg(&dev->dev, ++ "free-ing dynamically allocated virtual configuration space fields\n"); ++ if (!dev_data) ++ return; ++ ++ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) { ++ field = cfg_entry->field; ++ ++ if (field->clean) { ++ field->clean((struct config_field *)field); ++ ++ if (cfg_entry->data) ++ kfree(cfg_entry->data); ++ ++ list_del(&cfg_entry->list); ++ kfree(cfg_entry); ++ } ++ ++ } ++} ++ ++void pciback_config_reset_dev(struct pci_dev *dev) ++{ ++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); ++ const struct config_field_entry *cfg_entry; ++ const struct config_field *field; ++ ++ dev_dbg(&dev->dev, "resetting virtual configuration space\n"); ++ if (!dev_data) ++ return; ++ ++ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { ++ field = cfg_entry->field; ++ ++ if (field->reset) ++ field->reset(dev, OFFSET(cfg_entry), cfg_entry->data); ++ } ++} ++ ++void pciback_config_free_dev(struct pci_dev *dev) ++{ ++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); ++ struct config_field_entry *cfg_entry, *t; ++ const struct config_field *field; ++ ++ dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n"); ++ if (!dev_data) ++ return; ++ ++ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) { ++ list_del(&cfg_entry->list); ++ ++ field = cfg_entry->field; ++ ++ if (field->release) ++ field->release(dev, OFFSET(cfg_entry), cfg_entry->data); ++ ++ kfree(cfg_entry); ++ } ++} ++ ++int pciback_config_add_field_offset(struct pci_dev *dev, ++ const struct config_field *field, ++ unsigned int base_offset) ++{ ++ int err = 0; ++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); ++ struct config_field_entry *cfg_entry; ++ void *tmp; ++ ++ cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL); ++ if (!cfg_entry) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ cfg_entry->data = NULL; ++ cfg_entry->field = field; ++ cfg_entry->base_offset = base_offset; ++ ++ /* silently ignore duplicate fields */ ++ err = pciback_field_is_dup(dev,OFFSET(cfg_entry)); ++ if (err) ++ goto out; ++ ++ if (field->init) { ++ tmp = field->init(dev, OFFSET(cfg_entry)); ++ ++ if (IS_ERR(tmp)) { ++ err = PTR_ERR(tmp); ++ goto out; ++ } ++ ++ cfg_entry->data = tmp; ++ } ++ ++ dev_dbg(&dev->dev, "added config field at offset 0x%02x\n", ++ OFFSET(cfg_entry)); ++ list_add_tail(&cfg_entry->list, &dev_data->config_fields); ++ ++ out: ++ if (err) ++ kfree(cfg_entry); ++ ++ return err; ++} ++ ++/* This sets up the device's virtual configuration space to keep track of ++ * certain registers (like the base address registers (BARs) so that we can ++ * keep the client from manipulating them directly. ++ */ ++int pciback_config_init_dev(struct pci_dev *dev) ++{ ++ int err = 0; ++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); ++ ++ dev_dbg(&dev->dev, "initializing virtual configuration space\n"); ++ ++ INIT_LIST_HEAD(&dev_data->config_fields); ++ ++ err = pciback_config_header_add_fields(dev); ++ if (err) ++ goto out; ++ ++ err = pciback_config_capability_add_fields(dev); ++ if (err) ++ goto out; ++ ++ err = pciback_config_quirks_init(dev); ++ ++ out: ++ return err; ++} ++ ++int pciback_config_init(void) ++{ ++ return pciback_config_capability_init(); ++} +diff --git a/drivers/xen/pciback/conf_space.h b/drivers/xen/pciback/conf_space.h +new file mode 100644 +index 0000000..fe746ef +--- /dev/null ++++ b/drivers/xen/pciback/conf_space.h +@@ -0,0 +1,126 @@ ++/* ++ * PCI Backend - Common data structures for overriding the configuration space ++ * ++ * Author: Ryan Wilson ++ */ ++ ++#ifndef __XEN_PCIBACK_CONF_SPACE_H__ ++#define __XEN_PCIBACK_CONF_SPACE_H__ ++ ++#include ++#include ++ ++/* conf_field_init can return an errno in a ptr with ERR_PTR() */ ++typedef void *(*conf_field_init) (struct pci_dev * dev, int offset); ++typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data); ++typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data); ++ ++typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value, ++ void *data); ++typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value, ++ void *data); ++typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value, ++ void *data); ++typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value, ++ void *data); ++typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value, ++ void *data); ++typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value, ++ void *data); ++ ++/* These are the fields within the configuration space which we ++ * are interested in intercepting reads/writes to and changing their ++ * values. ++ */ ++struct config_field { ++ unsigned int offset; ++ unsigned int size; ++ unsigned int mask; ++ conf_field_init init; ++ conf_field_reset reset; ++ conf_field_free release; ++ void (*clean) (struct config_field * field); ++ union { ++ struct { ++ conf_dword_write write; ++ conf_dword_read read; ++ } dw; ++ struct { ++ conf_word_write write; ++ conf_word_read read; ++ } w; ++ struct { ++ conf_byte_write write; ++ conf_byte_read read; ++ } b; ++ } u; ++ struct list_head list; ++}; ++ ++struct config_field_entry { ++ struct list_head list; ++ const struct config_field *field; ++ unsigned int base_offset; ++ void *data; ++}; ++ ++#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset) ++ ++/* Add fields to a device - the add_fields macro expects to get a pointer to ++ * the first entry in an array (of which the ending is marked by size==0) ++ */ ++int pciback_config_add_field_offset(struct pci_dev *dev, ++ const struct config_field *field, ++ unsigned int offset); ++ ++static inline int pciback_config_add_field(struct pci_dev *dev, ++ const struct config_field *field) ++{ ++ return pciback_config_add_field_offset(dev, field, 0); ++} ++ ++static inline int pciback_config_add_fields(struct pci_dev *dev, ++ const struct config_field *field) ++{ ++ int i, err = 0; ++ for (i = 0; field[i].size != 0; i++) { ++ err = pciback_config_add_field(dev, &field[i]); ++ if (err) ++ break; ++ } ++ return err; ++} ++ ++static inline int pciback_config_add_fields_offset(struct pci_dev *dev, ++ const struct config_field *field, ++ unsigned int offset) ++{ ++ int i, err = 0; ++ for (i = 0; field[i].size != 0; i++) { ++ err = pciback_config_add_field_offset(dev, &field[i], offset); ++ if (err) ++ break; ++ } ++ return err; ++} ++ ++/* Read/Write the real configuration space */ ++int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value, ++ void *data); ++int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value, ++ void *data); ++int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value, ++ void *data); ++int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value, ++ void *data); ++int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value, ++ void *data); ++int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value, ++ void *data); ++ ++int pciback_config_capability_init(void); ++ ++int pciback_config_header_add_fields(struct pci_dev *dev); ++int pciback_config_capability_add_fields(struct pci_dev *dev); ++ ++#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */ +diff --git a/drivers/xen/pciback/conf_space_capability.c b/drivers/xen/pciback/conf_space_capability.c +new file mode 100644 +index 0000000..50efca4 +--- /dev/null ++++ b/drivers/xen/pciback/conf_space_capability.c +@@ -0,0 +1,69 @@ ++/* ++ * PCI Backend - Handles the virtual fields found on the capability lists ++ * in the configuration space. ++ * ++ * Author: Ryan Wilson ++ */ ++ ++#include ++#include ++#include "pciback.h" ++#include "conf_space.h" ++#include "conf_space_capability.h" ++ ++static LIST_HEAD(capabilities); ++ ++static const struct config_field caplist_header[] = { ++ { ++ .offset = PCI_CAP_LIST_ID, ++ .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */ ++ .u.w.read = pciback_read_config_word, ++ .u.w.write = NULL, ++ }, ++ {} ++}; ++ ++static inline void register_capability(struct pciback_config_capability *cap) ++{ ++ list_add_tail(&cap->cap_list, &capabilities); ++} ++ ++int pciback_config_capability_add_fields(struct pci_dev *dev) ++{ ++ int err = 0; ++ struct pciback_config_capability *cap; ++ int cap_offset; ++ ++ list_for_each_entry(cap, &capabilities, cap_list) { ++ cap_offset = pci_find_capability(dev, cap->capability); ++ if (cap_offset) { ++ dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n", ++ cap->capability, cap_offset); ++ ++ err = pciback_config_add_fields_offset(dev, ++ caplist_header, ++ cap_offset); ++ if (err) ++ goto out; ++ err = pciback_config_add_fields_offset(dev, ++ cap->fields, ++ cap_offset); ++ if (err) ++ goto out; ++ } ++ } ++ ++ out: ++ return err; ++} ++ ++extern struct pciback_config_capability pciback_config_capability_vpd; ++extern struct pciback_config_capability pciback_config_capability_pm; ++ ++int pciback_config_capability_init(void) ++{ ++ register_capability(&pciback_config_capability_vpd); ++ register_capability(&pciback_config_capability_pm); ++ ++ return 0; ++} +diff --git a/drivers/xen/pciback/conf_space_capability.h b/drivers/xen/pciback/conf_space_capability.h +new file mode 100644 +index 0000000..823392e +--- /dev/null ++++ b/drivers/xen/pciback/conf_space_capability.h +@@ -0,0 +1,23 @@ ++/* ++ * PCI Backend - Data structures for special overlays for structures on ++ * the capability list. ++ * ++ * Author: Ryan Wilson ++ */ ++ ++#ifndef __PCIBACK_CONFIG_CAPABILITY_H__ ++#define __PCIBACK_CONFIG_CAPABILITY_H__ ++ ++#include ++#include ++ ++struct pciback_config_capability { ++ struct list_head cap_list; ++ ++ int capability; ++ ++ /* If the device has the capability found above, add these fields */ ++ const struct config_field *fields; ++}; ++ ++#endif +diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c +new file mode 100644 +index 0000000..762e396 +--- /dev/null ++++ b/drivers/xen/pciback/conf_space_capability_msi.c +@@ -0,0 +1,79 @@ ++/* ++ * PCI Backend -- Configuration overlay for MSI capability ++ */ ++#include ++#include ++#include "conf_space.h" ++#include "conf_space_capability.h" ++#include ++#include "pciback.h" ++ ++int pciback_enable_msi(struct pciback_device *pdev, ++ struct pci_dev *dev, struct xen_pci_op *op) ++{ ++ int otherend = pdev->xdev->otherend_id; ++ int status; ++ ++ status = pci_enable_msi(dev); ++ ++ if (status) { ++ printk("error enable msi for guest %x status %x\n", otherend, status); ++ op->value = 0; ++ return XEN_PCI_ERR_op_failed; ++ } ++ ++ op->value = dev->irq; ++ return 0; ++} ++ ++int pciback_disable_msi(struct pciback_device *pdev, ++ struct pci_dev *dev, struct xen_pci_op *op) ++{ ++ pci_disable_msi(dev); ++ ++ op->value = dev->irq; ++ return 0; ++} ++ ++int pciback_enable_msix(struct pciback_device *pdev, ++ struct pci_dev *dev, struct xen_pci_op *op) ++{ ++ int i, result; ++ struct msix_entry *entries; ++ ++ if (op->value > SH_INFO_MAX_VEC) ++ return -EINVAL; ++ ++ entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); ++ if (entries == NULL) ++ return -ENOMEM; ++ ++ for (i = 0; i < op->value; i++) { ++ entries[i].entry = op->msix_entries[i].entry; ++ entries[i].vector = op->msix_entries[i].vector; ++ } ++ ++ result = pci_enable_msix(dev, entries, op->value); ++ ++ for (i = 0; i < op->value; i++) { ++ op->msix_entries[i].entry = entries[i].entry; ++ op->msix_entries[i].vector = entries[i].vector; ++ } ++ ++ kfree(entries); ++ ++ op->value = result; ++ ++ return result; ++} ++ ++int pciback_disable_msix(struct pciback_device *pdev, ++ struct pci_dev *dev, struct xen_pci_op *op) ++{ ++ ++ pci_disable_msix(dev); ++ ++ op->value = dev->irq; ++ return 0; ++} ++ +diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c +new file mode 100644 +index 0000000..e2f99c7 +--- /dev/null ++++ b/drivers/xen/pciback/conf_space_capability_pm.c +@@ -0,0 +1,126 @@ ++/* ++ * PCI Backend - Configuration space overlay for power management ++ * ++ * Author: Ryan Wilson ++ */ ++ ++#include ++#include "conf_space.h" ++#include "conf_space_capability.h" ++ ++static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value, ++ void *data) ++{ ++ int err; ++ u16 real_value; ++ ++ err = pci_read_config_word(dev, offset, &real_value); ++ if (err) ++ goto out; ++ ++ *value = real_value & ~PCI_PM_CAP_PME_MASK; ++ ++ out: ++ return err; ++} ++ ++/* PM_OK_BITS specifies the bits that the driver domain is allowed to change. ++ * Can't allow driver domain to enable PMEs - they're shared */ ++#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK) ++ ++static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, ++ void *data) ++{ ++ int err; ++ u16 old_value; ++ pci_power_t new_state, old_state; ++ ++ err = pci_read_config_word(dev, offset, &old_value); ++ if (err) ++ goto out; ++ ++ old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK); ++ new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); ++ ++ new_value &= PM_OK_BITS; ++ if ((old_value & PM_OK_BITS) != new_value) { ++ new_value = (old_value & ~PM_OK_BITS) | new_value; ++ err = pci_write_config_word(dev, offset, new_value); ++ if (err) ++ goto out; ++ } ++ ++ /* Let pci core handle the power management change */ ++ dev_dbg(&dev->dev, "set power state to %x\n", new_state); ++ err = pci_set_power_state(dev, new_state); ++ if (err) { ++ err = PCIBIOS_SET_FAILED; ++ goto out; ++ } ++ ++ /* ++ * Device may lose PCI config info on D3->D0 transition. This ++ * is a problem for some guests which will not reset BARs. Even ++ * those that have a go will be foiled by our BAR-write handler ++ * which will discard the write! Since Linux won't re-init ++ * the config space automatically in all cases, we do it here. ++ * Future: Should we re-initialise all first 64 bytes of config space? ++ */ ++ if (new_state == PCI_D0 && ++ (old_state == PCI_D3hot || old_state == PCI_D3cold) && ++ !(old_value & PCI_PM_CTRL_NO_SOFT_RESET)) ++ pci_restore_bars(dev); ++ ++ out: ++ return err; ++} ++ ++/* Ensure PMEs are disabled */ ++static void *pm_ctrl_init(struct pci_dev *dev, int offset) ++{ ++ int err; ++ u16 value; ++ ++ err = pci_read_config_word(dev, offset, &value); ++ if (err) ++ goto out; ++ ++ if (value & PCI_PM_CTRL_PME_ENABLE) { ++ value &= ~PCI_PM_CTRL_PME_ENABLE; ++ err = pci_write_config_word(dev, offset, value); ++ } ++ ++ out: ++ return ERR_PTR(err); ++} ++ ++static const struct config_field caplist_pm[] = { ++ { ++ .offset = PCI_PM_PMC, ++ .size = 2, ++ .u.w.read = pm_caps_read, ++ }, ++ { ++ .offset = PCI_PM_CTRL, ++ .size = 2, ++ .init = pm_ctrl_init, ++ .u.w.read = pciback_read_config_word, ++ .u.w.write = pm_ctrl_write, ++ }, ++ { ++ .offset = PCI_PM_PPB_EXTENSIONS, ++ .size = 1, ++ .u.b.read = pciback_read_config_byte, ++ }, ++ { ++ .offset = PCI_PM_DATA_REGISTER, ++ .size = 1, ++ .u.b.read = pciback_read_config_byte, ++ }, ++ {} ++}; ++ ++struct pciback_config_capability pciback_config_capability_pm = { ++ .capability = PCI_CAP_ID_PM, ++ .fields = caplist_pm, ++}; +diff --git a/drivers/xen/pciback/conf_space_capability_vpd.c b/drivers/xen/pciback/conf_space_capability_vpd.c +new file mode 100644 +index 0000000..920cb4a +--- /dev/null ++++ b/drivers/xen/pciback/conf_space_capability_vpd.c +@@ -0,0 +1,40 @@ ++/* ++ * PCI Backend - Configuration space overlay for Vital Product Data ++ * ++ * Author: Ryan Wilson ++ */ ++ ++#include ++#include "conf_space.h" ++#include "conf_space_capability.h" ++ ++static int vpd_address_write(struct pci_dev *dev, int offset, u16 value, ++ void *data) ++{ ++ /* Disallow writes to the vital product data */ ++ if (value & PCI_VPD_ADDR_F) ++ return PCIBIOS_SET_FAILED; ++ else ++ return pci_write_config_word(dev, offset, value); ++} ++ ++static const struct config_field caplist_vpd[] = { ++ { ++ .offset = PCI_VPD_ADDR, ++ .size = 2, ++ .u.w.read = pciback_read_config_word, ++ .u.w.write = vpd_address_write, ++ }, ++ { ++ .offset = PCI_VPD_DATA, ++ .size = 4, ++ .u.dw.read = pciback_read_config_dword, ++ .u.dw.write = NULL, ++ }, ++ {} ++}; ++ ++struct pciback_config_capability pciback_config_capability_vpd = { ++ .capability = PCI_CAP_ID_VPD, ++ .fields = caplist_vpd, ++}; +diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c +new file mode 100644 +index 0000000..f794e12 +--- /dev/null ++++ b/drivers/xen/pciback/conf_space_header.c +@@ -0,0 +1,317 @@ ++/* ++ * PCI Backend - Handles the virtual fields in the configuration space headers. ++ * ++ * Author: Ryan Wilson ++ */ ++ ++#include ++#include ++#include "pciback.h" ++#include "conf_space.h" ++ ++struct pci_bar_info { ++ u32 val; ++ u32 len_val; ++ int which; ++}; ++ ++#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) ++#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) ++ ++static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) ++{ ++ int err; ++ ++ if (!dev->is_enabled && is_enable_cmd(value)) { ++ if (unlikely(verbose_request)) ++ printk(KERN_DEBUG "pciback: %s: enable\n", ++ pci_name(dev)); ++ err = pci_enable_device(dev); ++ if (err) ++ return err; ++ } else if (dev->is_enabled && !is_enable_cmd(value)) { ++ if (unlikely(verbose_request)) ++ printk(KERN_DEBUG "pciback: %s: disable\n", ++ pci_name(dev)); ++ pci_disable_device(dev); ++ } ++ ++ if (!dev->is_busmaster && is_master_cmd(value)) { ++ if (unlikely(verbose_request)) ++ printk(KERN_DEBUG "pciback: %s: set bus master\n", ++ pci_name(dev)); ++ pci_set_master(dev); ++ } ++ ++ if (value & PCI_COMMAND_INVALIDATE) { ++ if (unlikely(verbose_request)) ++ printk(KERN_DEBUG ++ "pciback: %s: enable memory-write-invalidate\n", ++ pci_name(dev)); ++ err = pci_set_mwi(dev); ++ if (err) { ++ printk(KERN_WARNING ++ "pciback: %s: cannot enable memory-write-invalidate (%d)\n", ++ pci_name(dev), err); ++ value &= ~PCI_COMMAND_INVALIDATE; ++ } ++ } ++ ++ return pci_write_config_word(dev, offset, value); ++} ++ ++static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data) ++{ ++ struct pci_bar_info *bar = data; ++ ++ if (unlikely(!bar)) { ++ printk(KERN_WARNING "pciback: driver data not found for %s\n", ++ pci_name(dev)); ++ return XEN_PCI_ERR_op_failed; ++ } ++ ++ /* A write to obtain the length must happen as a 32-bit write. ++ * This does not (yet) support writing individual bytes ++ */ ++ if (value == ~PCI_ROM_ADDRESS_ENABLE) ++ bar->which = 1; ++ else { ++ u32 tmpval; ++ pci_read_config_dword(dev, offset, &tmpval); ++ if (tmpval != bar->val && value == bar->val) { ++ /* Allow restoration of bar value. */ ++ pci_write_config_dword(dev, offset, bar->val); ++ } ++ bar->which = 0; ++ } ++ ++ /* Do we need to support enabling/disabling the rom address here? */ ++ ++ return 0; ++} ++ ++/* For the BARs, only allow writes which write ~0 or ++ * the correct resource information ++ * (Needed for when the driver probes the resource usage) ++ */ ++static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data) ++{ ++ struct pci_bar_info *bar = data; ++ ++ if (unlikely(!bar)) { ++ printk(KERN_WARNING "pciback: driver data not found for %s\n", ++ pci_name(dev)); ++ return XEN_PCI_ERR_op_failed; ++ } ++ ++ /* A write to obtain the length must happen as a 32-bit write. ++ * This does not (yet) support writing individual bytes ++ */ ++ if (value == ~0) ++ bar->which = 1; ++ else { ++ u32 tmpval; ++ pci_read_config_dword(dev, offset, &tmpval); ++ if (tmpval != bar->val && value == bar->val) { ++ /* Allow restoration of bar value. */ ++ pci_write_config_dword(dev, offset, bar->val); ++ } ++ bar->which = 0; ++ } ++ ++ return 0; ++} ++ ++static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data) ++{ ++ struct pci_bar_info *bar = data; ++ ++ if (unlikely(!bar)) { ++ printk(KERN_WARNING "pciback: driver data not found for %s\n", ++ pci_name(dev)); ++ return XEN_PCI_ERR_op_failed; ++ } ++ ++ *value = bar->which ? bar->len_val : bar->val; ++ ++ return 0; ++} ++ ++static inline void read_dev_bar(struct pci_dev *dev, ++ struct pci_bar_info *bar_info, int offset, ++ u32 len_mask) ++{ ++ pci_read_config_dword(dev, offset, &bar_info->val); ++ pci_write_config_dword(dev, offset, len_mask); ++ pci_read_config_dword(dev, offset, &bar_info->len_val); ++ pci_write_config_dword(dev, offset, bar_info->val); ++} ++ ++static void *bar_init(struct pci_dev *dev, int offset) ++{ ++ struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); ++ ++ if (!bar) ++ return ERR_PTR(-ENOMEM); ++ ++ read_dev_bar(dev, bar, offset, ~0); ++ bar->which = 0; ++ ++ return bar; ++} ++ ++static void *rom_init(struct pci_dev *dev, int offset) ++{ ++ struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); ++ ++ if (!bar) ++ return ERR_PTR(-ENOMEM); ++ ++ read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE); ++ bar->which = 0; ++ ++ return bar; ++} ++ ++static void bar_reset(struct pci_dev *dev, int offset, void *data) ++{ ++ struct pci_bar_info *bar = data; ++ ++ bar->which = 0; ++} ++ ++static void bar_release(struct pci_dev *dev, int offset, void *data) ++{ ++ kfree(data); ++} ++ ++static int interrupt_read(struct pci_dev *dev, int offset, u8 * value, ++ void *data) ++{ ++ *value = (u8) dev->irq; ++ ++ return 0; ++} ++ ++static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data) ++{ ++ u8 cur_value; ++ int err; ++ ++ err = pci_read_config_byte(dev, offset, &cur_value); ++ if (err) ++ goto out; ++ ++ if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START) ++ || value == PCI_BIST_START) ++ err = pci_write_config_byte(dev, offset, value); ++ ++ out: ++ return err; ++} ++ ++static const struct config_field header_common[] = { ++ { ++ .offset = PCI_COMMAND, ++ .size = 2, ++ .u.w.read = pciback_read_config_word, ++ .u.w.write = command_write, ++ }, ++ { ++ .offset = PCI_INTERRUPT_LINE, ++ .size = 1, ++ .u.b.read = interrupt_read, ++ }, ++ { ++ .offset = PCI_INTERRUPT_PIN, ++ .size = 1, ++ .u.b.read = pciback_read_config_byte, ++ }, ++ { ++ /* Any side effects of letting driver domain control cache line? */ ++ .offset = PCI_CACHE_LINE_SIZE, ++ .size = 1, ++ .u.b.read = pciback_read_config_byte, ++ .u.b.write = pciback_write_config_byte, ++ }, ++ { ++ .offset = PCI_LATENCY_TIMER, ++ .size = 1, ++ .u.b.read = pciback_read_config_byte, ++ }, ++ { ++ .offset = PCI_BIST, ++ .size = 1, ++ .u.b.read = pciback_read_config_byte, ++ .u.b.write = bist_write, ++ }, ++ {} ++}; ++ ++#define CFG_FIELD_BAR(reg_offset) \ ++ { \ ++ .offset = reg_offset, \ ++ .size = 4, \ ++ .init = bar_init, \ ++ .reset = bar_reset, \ ++ .release = bar_release, \ ++ .u.dw.read = bar_read, \ ++ .u.dw.write = bar_write, \ ++ } ++ ++#define CFG_FIELD_ROM(reg_offset) \ ++ { \ ++ .offset = reg_offset, \ ++ .size = 4, \ ++ .init = rom_init, \ ++ .reset = bar_reset, \ ++ .release = bar_release, \ ++ .u.dw.read = bar_read, \ ++ .u.dw.write = rom_write, \ ++ } ++ ++static const struct config_field header_0[] = { ++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), ++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1), ++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_2), ++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_3), ++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_4), ++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_5), ++ CFG_FIELD_ROM(PCI_ROM_ADDRESS), ++ {} ++}; ++ ++static const struct config_field header_1[] = { ++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), ++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1), ++ CFG_FIELD_ROM(PCI_ROM_ADDRESS1), ++ {} ++}; ++ ++int pciback_config_header_add_fields(struct pci_dev *dev) ++{ ++ int err; ++ ++ err = pciback_config_add_fields(dev, header_common); ++ if (err) ++ goto out; ++ ++ switch (dev->hdr_type) { ++ case PCI_HEADER_TYPE_NORMAL: ++ err = pciback_config_add_fields(dev, header_0); ++ break; ++ ++ case PCI_HEADER_TYPE_BRIDGE: ++ err = pciback_config_add_fields(dev, header_1); ++ break; ++ ++ default: ++ err = -EINVAL; ++ printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n", ++ pci_name(dev), dev->hdr_type); ++ break; ++ } ++ ++ out: ++ return err; ++} +diff --git a/drivers/xen/pciback/conf_space_quirks.c b/drivers/xen/pciback/conf_space_quirks.c +new file mode 100644 +index 0000000..244a438 +--- /dev/null ++++ b/drivers/xen/pciback/conf_space_quirks.c +@@ -0,0 +1,138 @@ ++/* ++ * PCI Backend - Handle special overlays for broken devices. ++ * ++ * Author: Ryan Wilson ++ * Author: Chris Bookholt ++ */ ++ ++#include ++#include ++#include "pciback.h" ++#include "conf_space.h" ++#include "conf_space_quirks.h" ++ ++LIST_HEAD(pciback_quirks); ++ ++static inline const struct pci_device_id * ++match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) ++{ ++ if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) && ++ (id->device == PCI_ANY_ID || id->device == dev->device) && ++ (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) && ++ (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) && ++ !((id->class ^ dev->class) & id->class_mask)) ++ return id; ++ return NULL; ++} ++ ++struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev) ++{ ++ struct pciback_config_quirk *tmp_quirk; ++ ++ list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list) ++ if (match_one_device(&tmp_quirk->devid, dev) != NULL) ++ goto out; ++ tmp_quirk = NULL; ++ printk(KERN_DEBUG ++ "quirk didn't match any device pciback knows about\n"); ++ out: ++ return tmp_quirk; ++} ++ ++static inline void register_quirk(struct pciback_config_quirk *quirk) ++{ ++ list_add_tail(&quirk->quirks_list, &pciback_quirks); ++} ++ ++int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg) ++{ ++ int ret = 0; ++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); ++ struct config_field_entry *cfg_entry; ++ ++ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { ++ if ( OFFSET(cfg_entry) == reg) { ++ ret = 1; ++ break; ++ } ++ } ++ return ret; ++} ++ ++int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field ++ *field) ++{ ++ int err = 0; ++ ++ switch (field->size) { ++ case 1: ++ field->u.b.read = pciback_read_config_byte; ++ field->u.b.write = pciback_write_config_byte; ++ break; ++ case 2: ++ field->u.w.read = pciback_read_config_word; ++ field->u.w.write = pciback_write_config_word; ++ break; ++ case 4: ++ field->u.dw.read = pciback_read_config_dword; ++ field->u.dw.write = pciback_write_config_dword; ++ break; ++ default: ++ err = -EINVAL; ++ goto out; ++ } ++ ++ pciback_config_add_field(dev, field); ++ ++ out: ++ return err; ++} ++ ++int pciback_config_quirks_init(struct pci_dev *dev) ++{ ++ struct pciback_config_quirk *quirk; ++ int ret = 0; ++ ++ quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC); ++ if (!quirk) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ quirk->devid.vendor = dev->vendor; ++ quirk->devid.device = dev->device; ++ quirk->devid.subvendor = dev->subsystem_vendor; ++ quirk->devid.subdevice = dev->subsystem_device; ++ quirk->devid.class = 0; ++ quirk->devid.class_mask = 0; ++ quirk->devid.driver_data = 0UL; ++ ++ quirk->pdev = dev; ++ ++ register_quirk(quirk); ++ out: ++ return ret; ++} ++ ++void pciback_config_field_free(struct config_field *field) ++{ ++ kfree(field); ++} ++ ++int pciback_config_quirk_release(struct pci_dev *dev) ++{ ++ struct pciback_config_quirk *quirk; ++ int ret = 0; ++ ++ quirk = pciback_find_quirk(dev); ++ if (!quirk) { ++ ret = -ENXIO; ++ goto out; ++ } ++ ++ list_del(&quirk->quirks_list); ++ kfree(quirk); ++ ++ out: ++ return ret; ++} +diff --git a/drivers/xen/pciback/conf_space_quirks.h b/drivers/xen/pciback/conf_space_quirks.h +new file mode 100644 +index 0000000..acd0e1a +--- /dev/null ++++ b/drivers/xen/pciback/conf_space_quirks.h +@@ -0,0 +1,35 @@ ++/* ++ * PCI Backend - Data structures for special overlays for broken devices. ++ * ++ * Ryan Wilson ++ * Chris Bookholt ++ */ ++ ++#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__ ++#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__ ++ ++#include ++#include ++ ++struct pciback_config_quirk { ++ struct list_head quirks_list; ++ struct pci_device_id devid; ++ struct pci_dev *pdev; ++}; ++ ++struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev); ++ ++int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field ++ *field); ++ ++int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg); ++ ++int pciback_config_quirks_init(struct pci_dev *dev); ++ ++void pciback_config_field_free(struct config_field *field); ++ ++int pciback_config_quirk_release(struct pci_dev *dev); ++ ++int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg); ++ ++#endif +diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c +new file mode 100644 +index 0000000..294e48f +--- /dev/null ++++ b/drivers/xen/pciback/controller.c +@@ -0,0 +1,443 @@ ++/* ++ * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. ++ * Alex Williamson ++ * ++ * PCI "Controller" Backend - virtualize PCI bus topology based on PCI ++ * controllers. Devices under the same PCI controller are exposed on the ++ * same virtual domain:bus. Within a bus, device slots are virtualized ++ * to compact the bus. ++ * ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ */ ++ ++#include ++#include ++#include ++#include ++#include "pciback.h" ++ ++#define PCI_MAX_BUSSES 255 ++#define PCI_MAX_SLOTS 32 ++ ++struct controller_dev_entry { ++ struct list_head list; ++ struct pci_dev *dev; ++ unsigned int devfn; ++}; ++ ++struct controller_list_entry { ++ struct list_head list; ++ struct pci_controller *controller; ++ unsigned int domain; ++ unsigned int bus; ++ unsigned int next_devfn; ++ struct list_head dev_list; ++}; ++ ++struct controller_dev_data { ++ struct list_head list; ++ unsigned int next_domain; ++ unsigned int next_bus; ++ spinlock_t lock; ++}; ++ ++struct walk_info { ++ struct pciback_device *pdev; ++ int resource_count; ++ int root_num; ++}; ++ ++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, ++ unsigned int domain, unsigned int bus, ++ unsigned int devfn) ++{ ++ struct controller_dev_data *dev_data = pdev->pci_dev_data; ++ struct controller_dev_entry *dev_entry; ++ struct controller_list_entry *cntrl_entry; ++ struct pci_dev *dev = NULL; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&dev_data->lock, flags); ++ ++ list_for_each_entry(cntrl_entry, &dev_data->list, list) { ++ if (cntrl_entry->domain != domain || ++ cntrl_entry->bus != bus) ++ continue; ++ ++ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { ++ if (devfn == dev_entry->devfn) { ++ dev = dev_entry->dev; ++ goto found; ++ } ++ } ++ } ++found: ++ spin_unlock_irqrestore(&dev_data->lock, flags); ++ ++ return dev; ++} ++ ++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, ++ int devid, publish_pci_dev_cb publish_cb) ++{ ++ struct controller_dev_data *dev_data = pdev->pci_dev_data; ++ struct controller_dev_entry *dev_entry; ++ struct controller_list_entry *cntrl_entry; ++ struct pci_controller *dev_controller = PCI_CONTROLLER(dev); ++ unsigned long flags; ++ int ret = 0, found = 0; ++ ++ spin_lock_irqsave(&dev_data->lock, flags); ++ ++ /* Look to see if we already have a domain:bus for this controller */ ++ list_for_each_entry(cntrl_entry, &dev_data->list, list) { ++ if (cntrl_entry->controller == dev_controller) { ++ found = 1; ++ break; ++ } ++ } ++ ++ if (!found) { ++ cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC); ++ if (!cntrl_entry) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ cntrl_entry->controller = dev_controller; ++ cntrl_entry->next_devfn = PCI_DEVFN(0, 0); ++ ++ cntrl_entry->domain = dev_data->next_domain; ++ cntrl_entry->bus = dev_data->next_bus++; ++ if (dev_data->next_bus > PCI_MAX_BUSSES) { ++ dev_data->next_domain++; ++ dev_data->next_bus = 0; ++ } ++ ++ INIT_LIST_HEAD(&cntrl_entry->dev_list); ++ ++ list_add_tail(&cntrl_entry->list, &dev_data->list); ++ } ++ ++ if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) { ++ /* ++ * While it seems unlikely, this can actually happen if ++ * a controller has P2P bridges under it. ++ */ ++ xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x " ++ "is full, no room to export %04x:%02x:%02x.%x", ++ cntrl_entry->domain, cntrl_entry->bus, ++ pci_domain_nr(dev->bus), dev->bus->number, ++ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); ++ ret = -ENOSPC; ++ goto out; ++ } ++ ++ dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC); ++ if (!dev_entry) { ++ if (list_empty(&cntrl_entry->dev_list)) { ++ list_del(&cntrl_entry->list); ++ kfree(cntrl_entry); ++ } ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ dev_entry->dev = dev; ++ dev_entry->devfn = cntrl_entry->next_devfn; ++ ++ list_add_tail(&dev_entry->list, &cntrl_entry->dev_list); ++ ++ cntrl_entry->next_devfn += PCI_DEVFN(1, 0); ++ ++out: ++ spin_unlock_irqrestore(&dev_data->lock, flags); ++ ++ /* TODO: Publish virtual domain:bus:slot.func here. */ ++ ++ return ret; ++} ++ ++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) ++{ ++ struct controller_dev_data *dev_data = pdev->pci_dev_data; ++ struct controller_list_entry *cntrl_entry; ++ struct controller_dev_entry *dev_entry = NULL; ++ struct pci_dev *found_dev = NULL; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&dev_data->lock, flags); ++ ++ list_for_each_entry(cntrl_entry, &dev_data->list, list) { ++ if (cntrl_entry->controller != PCI_CONTROLLER(dev)) ++ continue; ++ ++ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { ++ if (dev_entry->dev == dev) { ++ found_dev = dev_entry->dev; ++ break; ++ } ++ } ++ } ++ ++ if (!found_dev) { ++ spin_unlock_irqrestore(&dev_data->lock, flags); ++ return; ++ } ++ ++ list_del(&dev_entry->list); ++ kfree(dev_entry); ++ ++ if (list_empty(&cntrl_entry->dev_list)) { ++ list_del(&cntrl_entry->list); ++ kfree(cntrl_entry); ++ } ++ ++ spin_unlock_irqrestore(&dev_data->lock, flags); ++ pcistub_put_pci_dev(found_dev); ++} ++ ++int pciback_init_devices(struct pciback_device *pdev) ++{ ++ struct controller_dev_data *dev_data; ++ ++ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL); ++ if (!dev_data) ++ return -ENOMEM; ++ ++ spin_lock_init(&dev_data->lock); ++ ++ INIT_LIST_HEAD(&dev_data->list); ++ ++ /* Starting domain:bus numbers */ ++ dev_data->next_domain = 0; ++ dev_data->next_bus = 0; ++ ++ pdev->pci_dev_data = dev_data; ++ ++ return 0; ++} ++ ++static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data) ++{ ++ struct walk_info *info = data; ++ struct acpi_resource_address64 addr; ++ acpi_status status; ++ int i, len, err; ++ char str[32], tmp[3]; ++ unsigned char *ptr, *buf; ++ ++ status = acpi_resource_to_address64(res, &addr); ++ ++ /* Do we care about this range? Let's check. */ ++ if (!ACPI_SUCCESS(status) || ++ !(addr.resource_type == ACPI_MEMORY_RANGE || ++ addr.resource_type == ACPI_IO_RANGE) || ++ !addr.address_length || addr.producer_consumer != ACPI_PRODUCER) ++ return AE_OK; ++ ++ /* ++ * Furthermore, we really only care to tell the guest about ++ * address ranges that require address translation of some sort. ++ */ ++ if (!(addr.resource_type == ACPI_MEMORY_RANGE && ++ addr.info.mem.translation) && ++ !(addr.resource_type == ACPI_IO_RANGE && ++ addr.info.io.translation)) ++ return AE_OK; ++ ++ /* Store the resource in xenbus for the guest */ ++ len = snprintf(str, sizeof(str), "root-%d-resource-%d", ++ info->root_num, info->resource_count); ++ if (unlikely(len >= (sizeof(str) - 1))) ++ return AE_OK; ++ ++ buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL); ++ if (!buf) ++ return AE_OK; ++ ++ /* Clean out resource_source */ ++ res->data.address64.resource_source.index = 0xFF; ++ res->data.address64.resource_source.string_length = 0; ++ res->data.address64.resource_source.string_ptr = NULL; ++ ++ ptr = (unsigned char *)res; ++ ++ /* Turn the acpi_resource into an ASCII byte stream */ ++ for (i = 0; i < sizeof(*res); i++) { ++ snprintf(tmp, sizeof(tmp), "%02x", ptr[i]); ++ strncat(buf, tmp, 2); ++ } ++ ++ err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename, ++ str, "%s", buf); ++ ++ if (!err) ++ info->resource_count++; ++ ++ kfree(buf); ++ ++ return AE_OK; ++} ++ ++int pciback_publish_pci_roots(struct pciback_device *pdev, ++ publish_pci_root_cb publish_root_cb) ++{ ++ struct controller_dev_data *dev_data = pdev->pci_dev_data; ++ struct controller_list_entry *cntrl_entry; ++ int i, root_num, len, err = 0; ++ unsigned int domain, bus; ++ char str[64]; ++ struct walk_info info; ++ ++ spin_lock(&dev_data->lock); ++ ++ list_for_each_entry(cntrl_entry, &dev_data->list, list) { ++ /* First publish all the domain:bus info */ ++ err = publish_root_cb(pdev, cntrl_entry->domain, ++ cntrl_entry->bus); ++ if (err) ++ goto out; ++ ++ /* ++ * Now figure out which root-%d this belongs to ++ * so we can associate resources with it. ++ */ ++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, ++ "root_num", "%d", &root_num); ++ ++ if (err != 1) ++ goto out; ++ ++ for (i = 0; i < root_num; i++) { ++ len = snprintf(str, sizeof(str), "root-%d", i); ++ if (unlikely(len >= (sizeof(str) - 1))) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, ++ str, "%x:%x", &domain, &bus); ++ if (err != 2) ++ goto out; ++ ++ /* Is this the one we just published? */ ++ if (domain == cntrl_entry->domain && ++ bus == cntrl_entry->bus) ++ break; ++ } ++ ++ if (i == root_num) ++ goto out; ++ ++ info.pdev = pdev; ++ info.resource_count = 0; ++ info.root_num = i; ++ ++ /* Let ACPI do the heavy lifting on decoding resources */ ++ acpi_walk_resources(cntrl_entry->controller->acpi_handle, ++ METHOD_NAME__CRS, write_xenbus_resource, ++ &info); ++ ++ /* No resouces. OK. On to the next one */ ++ if (!info.resource_count) ++ continue; ++ ++ /* Store the number of resources we wrote for this root-%d */ ++ len = snprintf(str, sizeof(str), "root-%d-resources", i); ++ if (unlikely(len >= (sizeof(str) - 1))) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, ++ "%d", info.resource_count); ++ if (err) ++ goto out; ++ } ++ ++ /* Finally, write some magic to synchronize with the guest. */ ++ len = snprintf(str, sizeof(str), "root-resource-magic"); ++ if (unlikely(len >= (sizeof(str) - 1))) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, ++ "%lx", (sizeof(struct acpi_resource) * 2) + 1); ++ ++out: ++ spin_unlock(&dev_data->lock); ++ ++ return err; ++} ++ ++void pciback_release_devices(struct pciback_device *pdev) ++{ ++ struct controller_dev_data *dev_data = pdev->pci_dev_data; ++ struct controller_list_entry *cntrl_entry, *c; ++ struct controller_dev_entry *dev_entry, *d; ++ ++ list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) { ++ list_for_each_entry_safe(dev_entry, d, ++ &cntrl_entry->dev_list, list) { ++ list_del(&dev_entry->list); ++ pcistub_put_pci_dev(dev_entry->dev); ++ kfree(dev_entry); ++ } ++ list_del(&cntrl_entry->list); ++ kfree(cntrl_entry); ++ } ++ ++ kfree(dev_data); ++ pdev->pci_dev_data = NULL; ++} ++ ++int pciback_get_pcifront_dev(struct pci_dev *pcidev, ++ struct pciback_device *pdev, ++ unsigned int *domain, unsigned int *bus, unsigned int *devfn) ++{ ++ struct controller_dev_data *dev_data = pdev->pci_dev_data; ++ struct controller_dev_entry *dev_entry; ++ struct controller_list_entry *cntrl_entry; ++ unsigned long flags; ++ int found = 0; ++ spin_lock_irqsave(&dev_data->lock, flags); ++ ++ list_for_each_entry(cntrl_entry, &dev_data->list, list) { ++ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { ++ if ( (dev_entry->dev->bus->number == ++ pcidev->bus->number) && ++ (dev_entry->dev->devfn == ++ pcidev->devfn) && ++ (pci_domain_nr(dev_entry->dev->bus) == ++ pci_domain_nr(pcidev->bus))) ++ { ++ found = 1; ++ *domain = cntrl_entry->domain; ++ *bus = cntrl_entry->bus; ++ *devfn = dev_entry->devfn; ++ goto out; ++ } ++ } ++ } ++out: ++ spin_unlock_irqrestore(&dev_data->lock, flags); ++ return found; ++ ++} ++ +diff --git a/drivers/xen/pciback/passthrough.c b/drivers/xen/pciback/passthrough.c +new file mode 100644 +index 0000000..9e7a0c4 +--- /dev/null ++++ b/drivers/xen/pciback/passthrough.c +@@ -0,0 +1,176 @@ ++/* ++ * PCI Backend - Provides restricted access to the real PCI bus topology ++ * to the frontend ++ * ++ * Author: Ryan Wilson ++ */ ++ ++#include ++#include ++#include ++#include "pciback.h" ++ ++struct passthrough_dev_data { ++ /* Access to dev_list must be protected by lock */ ++ struct list_head dev_list; ++ spinlock_t lock; ++}; ++ ++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, ++ unsigned int domain, unsigned int bus, ++ unsigned int devfn) ++{ ++ struct passthrough_dev_data *dev_data = pdev->pci_dev_data; ++ struct pci_dev_entry *dev_entry; ++ struct pci_dev *dev = NULL; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&dev_data->lock, flags); ++ ++ list_for_each_entry(dev_entry, &dev_data->dev_list, list) { ++ if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus) ++ && bus == (unsigned int)dev_entry->dev->bus->number ++ && devfn == dev_entry->dev->devfn) { ++ dev = dev_entry->dev; ++ break; ++ } ++ } ++ ++ spin_unlock_irqrestore(&dev_data->lock, flags); ++ ++ return dev; ++} ++ ++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, ++ int devid, publish_pci_dev_cb publish_cb) ++{ ++ struct passthrough_dev_data *dev_data = pdev->pci_dev_data; ++ struct pci_dev_entry *dev_entry; ++ unsigned long flags; ++ unsigned int domain, bus, devfn; ++ int err; ++ ++ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL); ++ if (!dev_entry) ++ return -ENOMEM; ++ dev_entry->dev = dev; ++ ++ spin_lock_irqsave(&dev_data->lock, flags); ++ list_add_tail(&dev_entry->list, &dev_data->dev_list); ++ spin_unlock_irqrestore(&dev_data->lock, flags); ++ ++ /* Publish this device. */ ++ domain = (unsigned int)pci_domain_nr(dev->bus); ++ bus = (unsigned int)dev->bus->number; ++ devfn = dev->devfn; ++ err = publish_cb(pdev, domain, bus, devfn, devid); ++ ++ return err; ++} ++ ++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) ++{ ++ struct passthrough_dev_data *dev_data = pdev->pci_dev_data; ++ struct pci_dev_entry *dev_entry, *t; ++ struct pci_dev *found_dev = NULL; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&dev_data->lock, flags); ++ ++ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { ++ if (dev_entry->dev == dev) { ++ list_del(&dev_entry->list); ++ found_dev = dev_entry->dev; ++ kfree(dev_entry); ++ } ++ } ++ ++ spin_unlock_irqrestore(&dev_data->lock, flags); ++ ++ if (found_dev) ++ pcistub_put_pci_dev(found_dev); ++} ++ ++int pciback_init_devices(struct pciback_device *pdev) ++{ ++ struct passthrough_dev_data *dev_data; ++ ++ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL); ++ if (!dev_data) ++ return -ENOMEM; ++ ++ spin_lock_init(&dev_data->lock); ++ ++ INIT_LIST_HEAD(&dev_data->dev_list); ++ ++ pdev->pci_dev_data = dev_data; ++ ++ return 0; ++} ++ ++int pciback_publish_pci_roots(struct pciback_device *pdev, ++ publish_pci_root_cb publish_root_cb) ++{ ++ int err = 0; ++ struct passthrough_dev_data *dev_data = pdev->pci_dev_data; ++ struct pci_dev_entry *dev_entry, *e; ++ struct pci_dev *dev; ++ int found; ++ unsigned int domain, bus; ++ ++ spin_lock(&dev_data->lock); ++ ++ list_for_each_entry(dev_entry, &dev_data->dev_list, list) { ++ /* Only publish this device as a root if none of its ++ * parent bridges are exported ++ */ ++ found = 0; ++ dev = dev_entry->dev->bus->self; ++ for (; !found && dev != NULL; dev = dev->bus->self) { ++ list_for_each_entry(e, &dev_data->dev_list, list) { ++ if (dev == e->dev) { ++ found = 1; ++ break; ++ } ++ } ++ } ++ ++ domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus); ++ bus = (unsigned int)dev_entry->dev->bus->number; ++ ++ if (!found) { ++ err = publish_root_cb(pdev, domain, bus); ++ if (err) ++ break; ++ } ++ } ++ ++ spin_unlock(&dev_data->lock); ++ ++ return err; ++} ++ ++void pciback_release_devices(struct pciback_device *pdev) ++{ ++ struct passthrough_dev_data *dev_data = pdev->pci_dev_data; ++ struct pci_dev_entry *dev_entry, *t; ++ ++ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { ++ list_del(&dev_entry->list); ++ pcistub_put_pci_dev(dev_entry->dev); ++ kfree(dev_entry); ++ } ++ ++ kfree(dev_data); ++ pdev->pci_dev_data = NULL; ++} ++ ++int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, ++ unsigned int *domain, unsigned int *bus, unsigned int *devfn) ++ ++{ ++ *domain = pci_domain_nr(pcidev->bus); ++ *bus = pcidev->bus->number; ++ *devfn = pcidev->devfn; ++ return 1; ++} +diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c +new file mode 100644 +index 0000000..c481a73 +--- /dev/null ++++ b/drivers/xen/pciback/pci_stub.c +@@ -0,0 +1,1316 @@ ++/* ++ * PCI Stub Driver - Grabs devices in backend to be exported later ++ * ++ * Ryan Wilson ++ * Chris Bookholt ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "pciback.h" ++#include "conf_space.h" ++#include "conf_space_quirks.h" ++ ++static char *pci_devs_to_hide = NULL; ++wait_queue_head_t aer_wait_queue; ++/*Add sem for sync AER handling and pciback remove/reconfigue ops, ++* We want to avoid in middle of AER ops, pciback devices is being removed ++*/ ++static DECLARE_RWSEM(pcistub_sem); ++module_param_named(hide, pci_devs_to_hide, charp, 0444); ++ ++struct pcistub_device_id { ++ struct list_head slot_list; ++ int domain; ++ unsigned char bus; ++ unsigned int devfn; ++}; ++static LIST_HEAD(pcistub_device_ids); ++static DEFINE_SPINLOCK(device_ids_lock); ++ ++struct pcistub_device { ++ struct kref kref; ++ struct list_head dev_list; ++ spinlock_t lock; ++ ++ struct pci_dev *dev; ++ struct pciback_device *pdev; /* non-NULL if struct pci_dev is in use */ ++}; ++ ++/* Access to pcistub_devices & seized_devices lists and the initialize_devices ++ * flag must be locked with pcistub_devices_lock ++ */ ++static DEFINE_SPINLOCK(pcistub_devices_lock); ++static LIST_HEAD(pcistub_devices); ++ ++/* wait for device_initcall before initializing our devices ++ * (see pcistub_init_devices_late) ++ */ ++static int initialize_devices = 0; ++static LIST_HEAD(seized_devices); ++ ++static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) ++{ ++ struct pcistub_device *psdev; ++ ++ dev_dbg(&dev->dev, "pcistub_device_alloc\n"); ++ ++ psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC); ++ if (!psdev) ++ return NULL; ++ ++ psdev->dev = pci_dev_get(dev); ++ if (!psdev->dev) { ++ kfree(psdev); ++ return NULL; ++ } ++ ++ kref_init(&psdev->kref); ++ spin_lock_init(&psdev->lock); ++ ++ return psdev; ++} ++ ++/* Don't call this directly as it's called by pcistub_device_put */ ++static void pcistub_device_release(struct kref *kref) ++{ ++ struct pcistub_device *psdev; ++ ++ psdev = container_of(kref, struct pcistub_device, kref); ++ ++ dev_dbg(&psdev->dev->dev, "pcistub_device_release\n"); ++ ++ /* Clean-up the device */ ++ pciback_reset_device(psdev->dev); ++ pciback_config_free_dyn_fields(psdev->dev); ++ pciback_config_free_dev(psdev->dev); ++ kfree(pci_get_drvdata(psdev->dev)); ++ pci_set_drvdata(psdev->dev, NULL); ++ ++ pci_dev_put(psdev->dev); ++ ++ kfree(psdev); ++} ++ ++static inline void pcistub_device_get(struct pcistub_device *psdev) ++{ ++ kref_get(&psdev->kref); ++} ++ ++static inline void pcistub_device_put(struct pcistub_device *psdev) ++{ ++ kref_put(&psdev->kref, pcistub_device_release); ++} ++ ++static struct pcistub_device *pcistub_device_find(int domain, int bus, ++ int slot, int func) ++{ ++ struct pcistub_device *psdev = NULL; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&pcistub_devices_lock, flags); ++ ++ list_for_each_entry(psdev, &pcistub_devices, dev_list) { ++ if (psdev->dev != NULL ++ && domain == pci_domain_nr(psdev->dev->bus) ++ && bus == psdev->dev->bus->number ++ && PCI_DEVFN(slot, func) == psdev->dev->devfn) { ++ pcistub_device_get(psdev); ++ goto out; ++ } ++ } ++ ++ /* didn't find it */ ++ psdev = NULL; ++ ++ out: ++ spin_unlock_irqrestore(&pcistub_devices_lock, flags); ++ return psdev; ++} ++ ++static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev, ++ struct pcistub_device *psdev) ++{ ++ struct pci_dev *pci_dev = NULL; ++ unsigned long flags; ++ ++ pcistub_device_get(psdev); ++ ++ spin_lock_irqsave(&psdev->lock, flags); ++ if (!psdev->pdev) { ++ psdev->pdev = pdev; ++ pci_dev = psdev->dev; ++ } ++ spin_unlock_irqrestore(&psdev->lock, flags); ++ ++ if (!pci_dev) ++ pcistub_device_put(psdev); ++ ++ return pci_dev; ++} ++ ++struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev, ++ int domain, int bus, ++ int slot, int func) ++{ ++ struct pcistub_device *psdev; ++ struct pci_dev *found_dev = NULL; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&pcistub_devices_lock, flags); ++ ++ list_for_each_entry(psdev, &pcistub_devices, dev_list) { ++ if (psdev->dev != NULL ++ && domain == pci_domain_nr(psdev->dev->bus) ++ && bus == psdev->dev->bus->number ++ && PCI_DEVFN(slot, func) == psdev->dev->devfn) { ++ found_dev = pcistub_device_get_pci_dev(pdev, psdev); ++ break; ++ } ++ } ++ ++ spin_unlock_irqrestore(&pcistub_devices_lock, flags); ++ return found_dev; ++} ++ ++struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev, ++ struct pci_dev *dev) ++{ ++ struct pcistub_device *psdev; ++ struct pci_dev *found_dev = NULL; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&pcistub_devices_lock, flags); ++ ++ list_for_each_entry(psdev, &pcistub_devices, dev_list) { ++ if (psdev->dev == dev) { ++ found_dev = pcistub_device_get_pci_dev(pdev, psdev); ++ break; ++ } ++ } ++ ++ spin_unlock_irqrestore(&pcistub_devices_lock, flags); ++ return found_dev; ++} ++ ++void pcistub_put_pci_dev(struct pci_dev *dev) ++{ ++ struct pcistub_device *psdev, *found_psdev = NULL; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&pcistub_devices_lock, flags); ++ ++ list_for_each_entry(psdev, &pcistub_devices, dev_list) { ++ if (psdev->dev == dev) { ++ found_psdev = psdev; ++ break; ++ } ++ } ++ ++ spin_unlock_irqrestore(&pcistub_devices_lock, flags); ++ ++ /*hold this lock for avoiding breaking link between ++ * pcistub and pciback when AER is in processing ++ */ ++ down_write(&pcistub_sem); ++ /* Cleanup our device ++ * (so it's ready for the next domain) ++ */ ++ pciback_reset_device(found_psdev->dev); ++ pciback_config_free_dyn_fields(found_psdev->dev); ++ pciback_config_reset_dev(found_psdev->dev); ++ ++ spin_lock_irqsave(&found_psdev->lock, flags); ++ found_psdev->pdev = NULL; ++ spin_unlock_irqrestore(&found_psdev->lock, flags); ++ ++ pcistub_device_put(found_psdev); ++ up_write(&pcistub_sem); ++} ++ ++static int __devinit pcistub_match_one(struct pci_dev *dev, ++ struct pcistub_device_id *pdev_id) ++{ ++ /* Match the specified device by domain, bus, slot, func and also if ++ * any of the device's parent bridges match. ++ */ ++ for (; dev != NULL; dev = dev->bus->self) { ++ if (pci_domain_nr(dev->bus) == pdev_id->domain ++ && dev->bus->number == pdev_id->bus ++ && dev->devfn == pdev_id->devfn) ++ return 1; ++ ++ /* Sometimes topmost bridge links to itself. */ ++ if (dev == dev->bus->self) ++ break; ++ } ++ ++ return 0; ++} ++ ++static int __devinit pcistub_match(struct pci_dev *dev) ++{ ++ struct pcistub_device_id *pdev_id; ++ unsigned long flags; ++ int found = 0; ++ ++ spin_lock_irqsave(&device_ids_lock, flags); ++ list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) { ++ if (pcistub_match_one(dev, pdev_id)) { ++ found = 1; ++ break; ++ } ++ } ++ spin_unlock_irqrestore(&device_ids_lock, flags); ++ ++ return found; ++} ++ ++static int __devinit pcistub_init_device(struct pci_dev *dev) ++{ ++ struct pciback_dev_data *dev_data; ++ int err = 0; ++ ++ dev_dbg(&dev->dev, "initializing...\n"); ++ ++ /* The PCI backend is not intended to be a module (or to work with ++ * removable PCI devices (yet). If it were, pciback_config_free() ++ * would need to be called somewhere to free the memory allocated ++ * here and then to call kfree(pci_get_drvdata(psdev->dev)). ++ */ ++ dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC); ++ if (!dev_data) { ++ err = -ENOMEM; ++ goto out; ++ } ++ pci_set_drvdata(dev, dev_data); ++ ++ dev_dbg(&dev->dev, "initializing config\n"); ++ ++ init_waitqueue_head(&aer_wait_queue); ++ err = pciback_config_init_dev(dev); ++ if (err) ++ goto out; ++ ++ /* HACK: Force device (& ACPI) to determine what IRQ it's on - we ++ * must do this here because pcibios_enable_device may specify ++ * the pci device's true irq (and possibly its other resources) ++ * if they differ from what's in the configuration space. ++ * This makes the assumption that the device's resources won't ++ * change after this point (otherwise this code may break!) ++ */ ++ dev_dbg(&dev->dev, "enabling device\n"); ++ err = pci_enable_device(dev); ++ if (err) ++ goto config_release; ++ ++ /* Now disable the device (this also ensures some private device ++ * data is setup before we export) ++ */ ++ dev_dbg(&dev->dev, "reset device\n"); ++ pciback_reset_device(dev); ++ ++ return 0; ++ ++ config_release: ++ pciback_config_free_dev(dev); ++ ++ out: ++ pci_set_drvdata(dev, NULL); ++ kfree(dev_data); ++ return err; ++} ++ ++/* ++ * Because some initialization still happens on ++ * devices during fs_initcall, we need to defer ++ * full initialization of our devices until ++ * device_initcall. ++ */ ++static int __init pcistub_init_devices_late(void) ++{ ++ struct pcistub_device *psdev; ++ unsigned long flags; ++ int err = 0; ++ ++ pr_debug("pciback: pcistub_init_devices_late\n"); ++ ++ spin_lock_irqsave(&pcistub_devices_lock, flags); ++ ++ while (!list_empty(&seized_devices)) { ++ psdev = container_of(seized_devices.next, ++ struct pcistub_device, dev_list); ++ list_del(&psdev->dev_list); ++ ++ spin_unlock_irqrestore(&pcistub_devices_lock, flags); ++ ++ err = pcistub_init_device(psdev->dev); ++ if (err) { ++ dev_err(&psdev->dev->dev, ++ "error %d initializing device\n", err); ++ kfree(psdev); ++ psdev = NULL; ++ } ++ ++ spin_lock_irqsave(&pcistub_devices_lock, flags); ++ ++ if (psdev) ++ list_add_tail(&psdev->dev_list, &pcistub_devices); ++ } ++ ++ initialize_devices = 1; ++ ++ spin_unlock_irqrestore(&pcistub_devices_lock, flags); ++ ++ return 0; ++} ++ ++static int __devinit pcistub_seize(struct pci_dev *dev) ++{ ++ struct pcistub_device *psdev; ++ unsigned long flags; ++ int err = 0; ++ ++ psdev = pcistub_device_alloc(dev); ++ if (!psdev) ++ return -ENOMEM; ++ ++ spin_lock_irqsave(&pcistub_devices_lock, flags); ++ ++ if (initialize_devices) { ++ spin_unlock_irqrestore(&pcistub_devices_lock, flags); ++ ++ /* don't want irqs disabled when calling pcistub_init_device */ ++ err = pcistub_init_device(psdev->dev); ++ ++ spin_lock_irqsave(&pcistub_devices_lock, flags); ++ ++ if (!err) ++ list_add(&psdev->dev_list, &pcistub_devices); ++ } else { ++ dev_dbg(&dev->dev, "deferring initialization\n"); ++ list_add(&psdev->dev_list, &seized_devices); ++ } ++ ++ spin_unlock_irqrestore(&pcistub_devices_lock, flags); ++ ++ if (err) ++ pcistub_device_put(psdev); ++ ++ return err; ++} ++ ++static int __devinit pcistub_probe(struct pci_dev *dev, ++ const struct pci_device_id *id) ++{ ++ int err = 0; ++ ++ dev_dbg(&dev->dev, "probing...\n"); ++ ++ if (pcistub_match(dev)) { ++ ++ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL ++ && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { ++ dev_err(&dev->dev, "can't export pci devices that " ++ "don't have a normal (0) or bridge (1) " ++ "header type!\n"); ++ err = -ENODEV; ++ goto out; ++ } ++ ++ dev_info(&dev->dev, "seizing device\n"); ++ err = pcistub_seize(dev); ++#ifdef CONFIG_PCI_GUESTDEV ++ } else if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { ++ if (!pci_is_guestdev(dev)) { ++ err = -ENODEV; ++ goto out; ++ } ++ ++ dev_info(&dev->dev, "seizing device\n"); ++ err = pcistub_seize(dev); ++#endif /* CONFIG_PCI_GUESTDEV */ ++ } else ++ /* Didn't find the device */ ++ err = -ENODEV; ++ ++ out: ++ return err; ++} ++ ++static void pcistub_remove(struct pci_dev *dev) ++{ ++ struct pcistub_device *psdev, *found_psdev = NULL; ++ unsigned long flags; ++ ++ dev_dbg(&dev->dev, "removing\n"); ++ ++ spin_lock_irqsave(&pcistub_devices_lock, flags); ++ ++ pciback_config_quirk_release(dev); ++ ++ list_for_each_entry(psdev, &pcistub_devices, dev_list) { ++ if (psdev->dev == dev) { ++ found_psdev = psdev; ++ break; ++ } ++ } ++ ++ spin_unlock_irqrestore(&pcistub_devices_lock, flags); ++ ++ if (found_psdev) { ++ dev_dbg(&dev->dev, "found device to remove - in use? %p\n", ++ found_psdev->pdev); ++ ++ if (found_psdev->pdev) { ++ printk(KERN_WARNING "pciback: ****** removing device " ++ "%s while still in-use! ******\n", ++ pci_name(found_psdev->dev)); ++ printk(KERN_WARNING "pciback: ****** driver domain may " ++ "still access this device's i/o resources!\n"); ++ printk(KERN_WARNING "pciback: ****** shutdown driver " ++ "domain before binding device\n"); ++ printk(KERN_WARNING "pciback: ****** to other drivers " ++ "or domains\n"); ++ ++ pciback_release_pci_dev(found_psdev->pdev, ++ found_psdev->dev); ++ } ++ ++ spin_lock_irqsave(&pcistub_devices_lock, flags); ++ list_del(&found_psdev->dev_list); ++ spin_unlock_irqrestore(&pcistub_devices_lock, flags); ++ ++ /* the final put for releasing from the list */ ++ pcistub_device_put(found_psdev); ++ } ++} ++ ++static const struct pci_device_id pcistub_ids[] = { ++ { ++ .vendor = PCI_ANY_ID, ++ .device = PCI_ANY_ID, ++ .subvendor = PCI_ANY_ID, ++ .subdevice = PCI_ANY_ID, ++ }, ++ {0,}, ++}; ++ ++static void kill_domain_by_device(struct pcistub_device *psdev) ++{ ++ struct xenbus_transaction xbt; ++ int err; ++ char nodename[1024]; ++ ++ if (!psdev) ++ dev_err(&psdev->dev->dev, ++ "device is NULL when do AER recovery/kill_domain\n"); ++ sprintf(nodename, "/local/domain/0/backend/pci/%d/0", ++ psdev->pdev->xdev->otherend_id); ++ nodename[strlen(nodename)] = '\0'; ++ ++again: ++ err = xenbus_transaction_start(&xbt); ++ if (err) ++ { ++ dev_err(&psdev->dev->dev, ++ "error %d when start xenbus transaction\n", err); ++ return; ++ } ++ /*PV AER handlers will set this flag*/ ++ xenbus_printf(xbt, nodename, "aerState" , "aerfail" ); ++ err = xenbus_transaction_end(xbt, 0); ++ if (err) ++ { ++ if (err == -EAGAIN) ++ goto again; ++ dev_err(&psdev->dev->dev, ++ "error %d when end xenbus transaction\n", err); ++ return; ++ } ++} ++ ++/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and ++ * backend need to have cooperation. In pciback, those steps will do similar ++ * jobs: send service request and waiting for front_end response. ++*/ ++static pci_ers_result_t common_process(struct pcistub_device *psdev, ++ pci_channel_state_t state, int aer_cmd, pci_ers_result_t result) ++{ ++ pci_ers_result_t res = result; ++ struct xen_pcie_aer_op *aer_op; ++ int ret; ++ ++ /*with PV AER drivers*/ ++ aer_op = &(psdev->pdev->sh_info->aer_op); ++ aer_op->cmd = aer_cmd ; ++ /*useful for error_detected callback*/ ++ aer_op->err = state; ++ /*pcifront_end BDF*/ ++ ret = pciback_get_pcifront_dev(psdev->dev, psdev->pdev, ++ &aer_op->domain, &aer_op->bus, &aer_op->devfn); ++ if (!ret) { ++ dev_err(&psdev->dev->dev, ++ "pciback: failed to get pcifront device\n"); ++ return PCI_ERS_RESULT_NONE; ++ } ++ wmb(); ++ ++ dev_dbg(&psdev->dev->dev, ++ "pciback: aer_op %x dom %x bus %x devfn %x\n", ++ aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn); ++ /*local flag to mark there's aer request, pciback callback will use this ++ * flag to judge whether we need to check pci-front give aer service ++ * ack signal ++ */ ++ set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); ++ ++ /*It is possible that a pcifront conf_read_write ops request invokes ++ * the callback which cause the spurious execution of wake_up. ++ * Yet it is harmless and better than a spinlock here ++ */ ++ set_bit(_XEN_PCIB_active, ++ (unsigned long *)&psdev->pdev->sh_info->flags); ++ wmb(); ++ notify_remote_via_irq(psdev->pdev->evtchn_irq); ++ ++ ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active, ++ (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ); ++ ++ if (!ret) { ++ if (test_bit(_XEN_PCIB_active, ++ (unsigned long *)&psdev->pdev->sh_info->flags)) { ++ dev_err(&psdev->dev->dev, ++ "pcifront aer process not responding!\n"); ++ clear_bit(_XEN_PCIB_active, ++ (unsigned long *)&psdev->pdev->sh_info->flags); ++ aer_op->err = PCI_ERS_RESULT_NONE; ++ return res; ++ } ++ } ++ clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); ++ ++ if ( test_bit( _XEN_PCIF_active, ++ (unsigned long*)&psdev->pdev->sh_info->flags)) { ++ dev_dbg(&psdev->dev->dev, ++ "schedule pci_conf service in pciback \n"); ++ test_and_schedule_op(psdev->pdev); ++ } ++ ++ res = (pci_ers_result_t)aer_op->err; ++ return res; ++} ++ ++/* ++* pciback_slot_reset: it will send the slot_reset request to pcifront in case ++* of the device driver could provide this service, and then wait for pcifront ++* ack. ++* @dev: pointer to PCI devices ++* return value is used by aer_core do_recovery policy ++*/ ++static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev) ++{ ++ struct pcistub_device *psdev; ++ pci_ers_result_t result; ++ ++ result = PCI_ERS_RESULT_RECOVERED; ++ dev_dbg(&dev->dev, "pciback_slot_reset(bus:%x,devfn:%x)\n", ++ dev->bus->number, dev->devfn); ++ ++ down_write(&pcistub_sem); ++ psdev = pcistub_device_find(pci_domain_nr(dev->bus), ++ dev->bus->number, ++ PCI_SLOT(dev->devfn), ++ PCI_FUNC(dev->devfn)); ++ ++ if ( !psdev || !psdev->pdev ) ++ { ++ dev_err(&dev->dev, ++ "pciback device is not found/assigned\n"); ++ goto end; ++ } ++ ++ if ( !psdev->pdev->sh_info ) ++ { ++ dev_err(&dev->dev, "pciback device is not connected or owned" ++ " by HVM, kill it\n"); ++ kill_domain_by_device(psdev); ++ goto release; ++ } ++ ++ if ( !test_bit(_XEN_PCIB_AERHANDLER, ++ (unsigned long *)&psdev->pdev->sh_info->flags) ) { ++ dev_err(&dev->dev, ++ "guest with no AER driver should have been killed\n"); ++ goto release; ++ } ++ result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result); ++ ++ if (result == PCI_ERS_RESULT_NONE || ++ result == PCI_ERS_RESULT_DISCONNECT) { ++ dev_dbg(&dev->dev, ++ "No AER slot_reset service or disconnected!\n"); ++ kill_domain_by_device(psdev); ++ } ++release: ++ pcistub_device_put(psdev); ++end: ++ up_write(&pcistub_sem); ++ return result; ++ ++} ++ ++ ++/*pciback_mmio_enabled: it will send the mmio_enabled request to pcifront ++* in case of the device driver could provide this service, and then wait ++* for pcifront ack. ++* @dev: pointer to PCI devices ++* return value is used by aer_core do_recovery policy ++*/ ++ ++static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev) ++{ ++ struct pcistub_device *psdev; ++ pci_ers_result_t result; ++ ++ result = PCI_ERS_RESULT_RECOVERED; ++ dev_dbg(&dev->dev, "pciback_mmio_enabled(bus:%x,devfn:%x)\n", ++ dev->bus->number, dev->devfn); ++ ++ down_write(&pcistub_sem); ++ psdev = pcistub_device_find(pci_domain_nr(dev->bus), ++ dev->bus->number, ++ PCI_SLOT(dev->devfn), ++ PCI_FUNC(dev->devfn)); ++ ++ if ( !psdev || !psdev->pdev ) ++ { ++ dev_err(&dev->dev, ++ "pciback device is not found/assigned\n"); ++ goto end; ++ } ++ ++ if ( !psdev->pdev->sh_info ) ++ { ++ dev_err(&dev->dev, "pciback device is not connected or owned" ++ " by HVM, kill it\n"); ++ kill_domain_by_device(psdev); ++ goto release; ++ } ++ ++ if ( !test_bit(_XEN_PCIB_AERHANDLER, ++ (unsigned long *)&psdev->pdev->sh_info->flags) ) { ++ dev_err(&dev->dev, ++ "guest with no AER driver should have been killed\n"); ++ goto release; ++ } ++ result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result); ++ ++ if (result == PCI_ERS_RESULT_NONE || ++ result == PCI_ERS_RESULT_DISCONNECT) { ++ dev_dbg(&dev->dev, ++ "No AER mmio_enabled service or disconnected!\n"); ++ kill_domain_by_device(psdev); ++ } ++release: ++ pcistub_device_put(psdev); ++end: ++ up_write(&pcistub_sem); ++ return result; ++} ++ ++/*pciback_error_detected: it will send the error_detected request to pcifront ++* in case of the device driver could provide this service, and then wait ++* for pcifront ack. ++* @dev: pointer to PCI devices ++* @error: the current PCI connection state ++* return value is used by aer_core do_recovery policy ++*/ ++ ++static pci_ers_result_t pciback_error_detected(struct pci_dev *dev, ++ pci_channel_state_t error) ++{ ++ struct pcistub_device *psdev; ++ pci_ers_result_t result; ++ ++ result = PCI_ERS_RESULT_CAN_RECOVER; ++ dev_dbg(&dev->dev, "pciback_error_detected(bus:%x,devfn:%x)\n", ++ dev->bus->number, dev->devfn); ++ ++ down_write(&pcistub_sem); ++ psdev = pcistub_device_find(pci_domain_nr(dev->bus), ++ dev->bus->number, ++ PCI_SLOT(dev->devfn), ++ PCI_FUNC(dev->devfn)); ++ ++ if ( !psdev || !psdev->pdev ) ++ { ++ dev_err(&dev->dev, ++ "pciback device is not found/assigned\n"); ++ goto end; ++ } ++ ++ if ( !psdev->pdev->sh_info ) ++ { ++ dev_err(&dev->dev, "pciback device is not connected or owned" ++ " by HVM, kill it\n"); ++ kill_domain_by_device(psdev); ++ goto release; ++ } ++ ++ /*Guest owns the device yet no aer handler regiested, kill guest*/ ++ if ( !test_bit(_XEN_PCIB_AERHANDLER, ++ (unsigned long *)&psdev->pdev->sh_info->flags) ) { ++ dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n"); ++ kill_domain_by_device(psdev); ++ goto release; ++ } ++ result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result); ++ ++ if (result == PCI_ERS_RESULT_NONE || ++ result == PCI_ERS_RESULT_DISCONNECT) { ++ dev_dbg(&dev->dev, ++ "No AER error_detected service or disconnected!\n"); ++ kill_domain_by_device(psdev); ++ } ++release: ++ pcistub_device_put(psdev); ++end: ++ up_write(&pcistub_sem); ++ return result; ++} ++ ++/*pciback_error_resume: it will send the error_resume request to pcifront ++* in case of the device driver could provide this service, and then wait ++* for pcifront ack. ++* @dev: pointer to PCI devices ++*/ ++ ++static void pciback_error_resume(struct pci_dev *dev) ++{ ++ struct pcistub_device *psdev; ++ ++ dev_dbg(&dev->dev, "pciback_error_resume(bus:%x,devfn:%x)\n", ++ dev->bus->number, dev->devfn); ++ ++ down_write(&pcistub_sem); ++ psdev = pcistub_device_find(pci_domain_nr(dev->bus), ++ dev->bus->number, ++ PCI_SLOT(dev->devfn), ++ PCI_FUNC(dev->devfn)); ++ ++ if ( !psdev || !psdev->pdev ) ++ { ++ dev_err(&dev->dev, ++ "pciback device is not found/assigned\n"); ++ goto end; ++ } ++ ++ if ( !psdev->pdev->sh_info ) ++ { ++ dev_err(&dev->dev, "pciback device is not connected or owned" ++ " by HVM, kill it\n"); ++ kill_domain_by_device(psdev); ++ goto release; ++ } ++ ++ if ( !test_bit(_XEN_PCIB_AERHANDLER, ++ (unsigned long *)&psdev->pdev->sh_info->flags) ) { ++ dev_err(&dev->dev, ++ "guest with no AER driver should have been killed\n"); ++ kill_domain_by_device(psdev); ++ goto release; ++ } ++ common_process(psdev, 1, XEN_PCI_OP_aer_resume, PCI_ERS_RESULT_RECOVERED); ++release: ++ pcistub_device_put(psdev); ++end: ++ up_write(&pcistub_sem); ++ return; ++} ++ ++/*add pciback AER handling*/ ++static struct pci_error_handlers pciback_error_handler = { ++ .error_detected = pciback_error_detected, ++ .mmio_enabled = pciback_mmio_enabled, ++ .slot_reset = pciback_slot_reset, ++ .resume = pciback_error_resume, ++}; ++ ++/* ++ * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't ++ * for a normal device. I don't want it to be loaded automatically. ++ */ ++ ++static struct pci_driver pciback_pci_driver = { ++ .name = "pciback", ++ .id_table = pcistub_ids, ++ .probe = pcistub_probe, ++ .remove = pcistub_remove, ++ .err_handler = &pciback_error_handler, ++}; ++ ++static inline int str_to_slot(const char *buf, int *domain, int *bus, ++ int *slot, int *func) ++{ ++ int err; ++ ++ err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func); ++ if (err == 4) ++ return 0; ++ else if (err < 0) ++ return -EINVAL; ++ ++ /* try again without domain */ ++ *domain = 0; ++ err = sscanf(buf, " %x:%x.%x", bus, slot, func); ++ if (err == 3) ++ return 0; ++ ++ return -EINVAL; ++} ++ ++static inline int str_to_quirk(const char *buf, int *domain, int *bus, int ++ *slot, int *func, int *reg, int *size, int *mask) ++{ ++ int err; ++ ++ err = ++ sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot, ++ func, reg, size, mask); ++ if (err == 7) ++ return 0; ++ return -EINVAL; ++} ++ ++static int pcistub_device_id_add(int domain, int bus, int slot, int func) ++{ ++ struct pcistub_device_id *pci_dev_id; ++ unsigned long flags; ++ ++ pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); ++ if (!pci_dev_id) ++ return -ENOMEM; ++ ++ pci_dev_id->domain = domain; ++ pci_dev_id->bus = bus; ++ pci_dev_id->devfn = PCI_DEVFN(slot, func); ++ ++ pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n", ++ domain, bus, slot, func); ++ ++ spin_lock_irqsave(&device_ids_lock, flags); ++ list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids); ++ spin_unlock_irqrestore(&device_ids_lock, flags); ++ ++ return 0; ++} ++ ++static int pcistub_device_id_remove(int domain, int bus, int slot, int func) ++{ ++ struct pcistub_device_id *pci_dev_id, *t; ++ int devfn = PCI_DEVFN(slot, func); ++ int err = -ENOENT; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&device_ids_lock, flags); ++ list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) { ++ ++ if (pci_dev_id->domain == domain ++ && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) { ++ /* Don't break; here because it's possible the same ++ * slot could be in the list more than once ++ */ ++ list_del(&pci_dev_id->slot_list); ++ kfree(pci_dev_id); ++ ++ err = 0; ++ ++ pr_debug("pciback: removed %04x:%02x:%02x.%01x from " ++ "seize list\n", domain, bus, slot, func); ++ } ++ } ++ spin_unlock_irqrestore(&device_ids_lock, flags); ++ ++ return err; ++} ++ ++static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, ++ int size, int mask) ++{ ++ int err = 0; ++ struct pcistub_device *psdev; ++ struct pci_dev *dev; ++ struct config_field *field; ++ ++ psdev = pcistub_device_find(domain, bus, slot, func); ++ if (!psdev || !psdev->dev) { ++ err = -ENODEV; ++ goto out; ++ } ++ dev = psdev->dev; ++ ++ field = kzalloc(sizeof(*field), GFP_ATOMIC); ++ if (!field) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ field->offset = reg; ++ field->size = size; ++ field->mask = mask; ++ field->init = NULL; ++ field->reset = NULL; ++ field->release = NULL; ++ field->clean = pciback_config_field_free; ++ ++ err = pciback_config_quirks_add_field(dev, field); ++ if (err) ++ kfree(field); ++ out: ++ return err; ++} ++ ++static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf, ++ size_t count) ++{ ++ int domain, bus, slot, func; ++ int err; ++ ++ err = str_to_slot(buf, &domain, &bus, &slot, &func); ++ if (err) ++ goto out; ++ ++ err = pcistub_device_id_add(domain, bus, slot, func); ++ ++ out: ++ if (!err) ++ err = count; ++ return err; ++} ++ ++DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); ++ ++static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf, ++ size_t count) ++{ ++ int domain, bus, slot, func; ++ int err; ++ ++ err = str_to_slot(buf, &domain, &bus, &slot, &func); ++ if (err) ++ goto out; ++ ++ err = pcistub_device_id_remove(domain, bus, slot, func); ++ ++ out: ++ if (!err) ++ err = count; ++ return err; ++} ++ ++DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); ++ ++static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) ++{ ++ struct pcistub_device_id *pci_dev_id; ++ size_t count = 0; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&device_ids_lock, flags); ++ list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) { ++ if (count >= PAGE_SIZE) ++ break; ++ ++ count += scnprintf(buf + count, PAGE_SIZE - count, ++ "%04x:%02x:%02x.%01x\n", ++ pci_dev_id->domain, pci_dev_id->bus, ++ PCI_SLOT(pci_dev_id->devfn), ++ PCI_FUNC(pci_dev_id->devfn)); ++ } ++ spin_unlock_irqrestore(&device_ids_lock, flags); ++ ++ return count; ++} ++ ++DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); ++ ++static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf, ++ size_t count) ++{ ++ int domain, bus, slot, func, reg, size, mask; ++ int err; ++ ++ err = str_to_quirk(buf, &domain, &bus, &slot, &func, ®, &size, ++ &mask); ++ if (err) ++ goto out; ++ ++ err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask); ++ ++ out: ++ if (!err) ++ err = count; ++ return err; ++} ++ ++static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf) ++{ ++ int count = 0; ++ unsigned long flags; ++ extern struct list_head pciback_quirks; ++ struct pciback_config_quirk *quirk; ++ struct pciback_dev_data *dev_data; ++ const struct config_field *field; ++ const struct config_field_entry *cfg_entry; ++ ++ spin_lock_irqsave(&device_ids_lock, flags); ++ list_for_each_entry(quirk, &pciback_quirks, quirks_list) { ++ if (count >= PAGE_SIZE) ++ goto out; ++ ++ count += scnprintf(buf + count, PAGE_SIZE - count, ++ "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n", ++ quirk->pdev->bus->number, ++ PCI_SLOT(quirk->pdev->devfn), ++ PCI_FUNC(quirk->pdev->devfn), ++ quirk->devid.vendor, quirk->devid.device, ++ quirk->devid.subvendor, ++ quirk->devid.subdevice); ++ ++ dev_data = pci_get_drvdata(quirk->pdev); ++ ++ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { ++ field = cfg_entry->field; ++ if (count >= PAGE_SIZE) ++ goto out; ++ ++ count += scnprintf(buf + count, PAGE_SIZE - count, ++ "\t\t%08x:%01x:%08x\n", ++ cfg_entry->base_offset + field->offset, ++ field->size, field->mask); ++ } ++ } ++ ++ out: ++ spin_unlock_irqrestore(&device_ids_lock, flags); ++ ++ return count; ++} ++ ++DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add); ++ ++static ssize_t permissive_add(struct device_driver *drv, const char *buf, ++ size_t count) ++{ ++ int domain, bus, slot, func; ++ int err; ++ struct pcistub_device *psdev; ++ struct pciback_dev_data *dev_data; ++ err = str_to_slot(buf, &domain, &bus, &slot, &func); ++ if (err) ++ goto out; ++ psdev = pcistub_device_find(domain, bus, slot, func); ++ if (!psdev) { ++ err = -ENODEV; ++ goto out; ++ } ++ if (!psdev->dev) { ++ err = -ENODEV; ++ goto release; ++ } ++ dev_data = pci_get_drvdata(psdev->dev); ++ /* the driver data for a device should never be null at this point */ ++ if (!dev_data) { ++ err = -ENXIO; ++ goto release; ++ } ++ if (!dev_data->permissive) { ++ dev_data->permissive = 1; ++ /* Let user know that what they're doing could be unsafe */ ++ dev_warn(&psdev->dev->dev, ++ "enabling permissive mode configuration space accesses!\n"); ++ dev_warn(&psdev->dev->dev, ++ "permissive mode is potentially unsafe!\n"); ++ } ++ release: ++ pcistub_device_put(psdev); ++ out: ++ if (!err) ++ err = count; ++ return err; ++} ++ ++static ssize_t permissive_show(struct device_driver *drv, char *buf) ++{ ++ struct pcistub_device *psdev; ++ struct pciback_dev_data *dev_data; ++ size_t count = 0; ++ unsigned long flags; ++ spin_lock_irqsave(&pcistub_devices_lock, flags); ++ list_for_each_entry(psdev, &pcistub_devices, dev_list) { ++ if (count >= PAGE_SIZE) ++ break; ++ if (!psdev->dev) ++ continue; ++ dev_data = pci_get_drvdata(psdev->dev); ++ if (!dev_data || !dev_data->permissive) ++ continue; ++ count += ++ scnprintf(buf + count, PAGE_SIZE - count, "%s\n", ++ pci_name(psdev->dev)); ++ } ++ spin_unlock_irqrestore(&pcistub_devices_lock, flags); ++ return count; ++} ++ ++DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add); ++ ++#ifdef CONFIG_PCI_MSI ++ ++int pciback_get_owner(struct pci_dev *dev) ++{ ++ struct pcistub_device *psdev; ++ ++ psdev = pcistub_device_find(pci_domain_nr(dev->bus), dev->bus->number, ++ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); ++ ++ if (!psdev || !psdev->pdev) ++ return -1; ++ ++ return psdev->pdev->xdev->otherend_id; ++} ++#endif ++ ++static void pcistub_exit(void) ++{ ++ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot); ++ driver_remove_file(&pciback_pci_driver.driver, ++ &driver_attr_remove_slot); ++ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots); ++ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks); ++ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive); ++ ++ pci_unregister_driver(&pciback_pci_driver); ++ WARN_ON(unregister_msi_get_owner(pciback_get_owner)); ++} ++ ++static int __init pcistub_init(void) ++{ ++ int pos = 0; ++ int err = 0; ++ int domain, bus, slot, func; ++ int parsed; ++ ++ if (pci_devs_to_hide && *pci_devs_to_hide) { ++ do { ++ parsed = 0; ++ ++ err = sscanf(pci_devs_to_hide + pos, ++ " (%x:%x:%x.%x) %n", ++ &domain, &bus, &slot, &func, &parsed); ++ if (err != 4) { ++ domain = 0; ++ err = sscanf(pci_devs_to_hide + pos, ++ " (%x:%x.%x) %n", ++ &bus, &slot, &func, &parsed); ++ if (err != 3) ++ goto parse_error; ++ } ++ ++ err = pcistub_device_id_add(domain, bus, slot, func); ++ if (err) ++ goto out; ++ ++ /* if parsed<=0, we've reached the end of the string */ ++ pos += parsed; ++ } while (parsed > 0 && pci_devs_to_hide[pos]); ++ } ++ ++ /* If we're the first PCI Device Driver to register, we're the ++ * first one to get offered PCI devices as they become ++ * available (and thus we can be the first to grab them) ++ */ ++ err = pci_register_driver(&pciback_pci_driver); ++ if (err < 0) ++ goto out; ++ ++ err = driver_create_file(&pciback_pci_driver.driver, ++ &driver_attr_new_slot); ++ if (!err) ++ err = driver_create_file(&pciback_pci_driver.driver, ++ &driver_attr_remove_slot); ++ if (!err) ++ err = driver_create_file(&pciback_pci_driver.driver, ++ &driver_attr_slots); ++ if (!err) ++ err = driver_create_file(&pciback_pci_driver.driver, ++ &driver_attr_quirks); ++ if (!err) ++ err = driver_create_file(&pciback_pci_driver.driver, ++ &driver_attr_permissive); ++ ++ if (!err) ++ err = register_msi_get_owner(pciback_get_owner); ++ if (err) ++ pcistub_exit(); ++ ++ out: ++ return err; ++ ++ parse_error: ++ printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n", ++ pci_devs_to_hide + pos); ++ return -EINVAL; ++} ++ ++#ifndef MODULE ++/* ++ * fs_initcall happens before device_initcall ++ * so pciback *should* get called first (b/c we ++ * want to suck up any device before other drivers ++ * get a chance by being the first pci device ++ * driver to register) ++ */ ++fs_initcall(pcistub_init); ++#endif ++ ++static int __init pciback_init(void) ++{ ++ int err; ++ ++ err = pciback_config_init(); ++ if (err) ++ return err; ++ ++#ifdef MODULE ++ err = pcistub_init(); ++ if (err < 0) ++ return err; ++#endif ++ ++ pcistub_init_devices_late(); ++ err = pciback_xenbus_register(); ++ if (err) ++ pcistub_exit(); ++ ++ return err; ++} ++ ++static void __exit pciback_cleanup(void) ++{ ++ pciback_xenbus_unregister(); ++ pcistub_exit(); ++} ++ ++module_init(pciback_init); ++module_exit(pciback_cleanup); ++ ++MODULE_LICENSE("Dual BSD/GPL"); +diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h +new file mode 100644 +index 0000000..6744f45 +--- /dev/null ++++ b/drivers/xen/pciback/pciback.h +@@ -0,0 +1,126 @@ ++/* ++ * PCI Backend Common Data Structures & Function Declarations ++ * ++ * Author: Ryan Wilson ++ */ ++#ifndef __XEN_PCIBACK_H__ ++#define __XEN_PCIBACK_H__ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct pci_dev_entry { ++ struct list_head list; ++ struct pci_dev *dev; ++}; ++ ++#define _PDEVF_op_active (0) ++#define PDEVF_op_active (1<<(_PDEVF_op_active)) ++#define _PCIB_op_pending (1) ++#define PCIB_op_pending (1<<(_PCIB_op_pending)) ++ ++struct pciback_device { ++ void *pci_dev_data; ++ spinlock_t dev_lock; ++ ++ struct xenbus_device *xdev; ++ ++ struct xenbus_watch be_watch; ++ u8 be_watching; ++ ++ int evtchn_irq; ++ ++ struct vm_struct *sh_area; ++ struct xen_pci_sharedinfo *sh_info; ++ ++ unsigned long flags; ++ ++ struct work_struct op_work; ++}; ++ ++struct pciback_dev_data { ++ struct list_head config_fields; ++ int permissive; ++ int warned_on_write; ++}; ++ ++/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */ ++struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev, ++ int domain, int bus, ++ int slot, int func); ++struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev, ++ struct pci_dev *dev); ++void pcistub_put_pci_dev(struct pci_dev *dev); ++ ++/* Ensure a device is turned off or reset */ ++void pciback_reset_device(struct pci_dev *pdev); ++ ++/* Access a virtual configuration space for a PCI device */ ++int pciback_config_init(void); ++int pciback_config_init_dev(struct pci_dev *dev); ++void pciback_config_free_dyn_fields(struct pci_dev *dev); ++void pciback_config_reset_dev(struct pci_dev *dev); ++void pciback_config_free_dev(struct pci_dev *dev); ++int pciback_config_read(struct pci_dev *dev, int offset, int size, ++ u32 * ret_val); ++int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value); ++ ++/* Handle requests for specific devices from the frontend */ ++typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev, ++ unsigned int domain, unsigned int bus, ++ unsigned int devfn, unsigned int devid); ++typedef int (*publish_pci_root_cb) (struct pciback_device * pdev, ++ unsigned int domain, unsigned int bus); ++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, ++ int devid, publish_pci_dev_cb publish_cb); ++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev); ++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, ++ unsigned int domain, unsigned int bus, ++ unsigned int devfn); ++ ++/** ++* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback ++* before sending aer request to pcifront, so that guest could identify ++* device, coopearte with pciback to finish aer recovery job if device driver ++* has the capability ++*/ ++ ++int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, ++ unsigned int *domain, unsigned int *bus, unsigned int *devfn); ++int pciback_init_devices(struct pciback_device *pdev); ++int pciback_publish_pci_roots(struct pciback_device *pdev, ++ publish_pci_root_cb cb); ++void pciback_release_devices(struct pciback_device *pdev); ++ ++/* Handles events from front-end */ ++irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs); ++void pciback_do_op(void *data); ++ ++int pciback_xenbus_register(void); ++void pciback_xenbus_unregister(void); ++ ++#ifdef CONFIG_PCI_MSI ++int pciback_enable_msi(struct pciback_device *pdev, ++ struct pci_dev *dev, struct xen_pci_op *op); ++ ++int pciback_disable_msi(struct pciback_device *pdev, ++ struct pci_dev *dev, struct xen_pci_op *op); ++ ++ ++int pciback_enable_msix(struct pciback_device *pdev, ++ struct pci_dev *dev, struct xen_pci_op *op); ++ ++int pciback_disable_msix(struct pciback_device *pdev, ++ struct pci_dev *dev, struct xen_pci_op *op); ++#endif ++extern int verbose_request; ++ ++void test_and_schedule_op(struct pciback_device *pdev); ++#endif ++ +diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c +new file mode 100644 +index 0000000..b85b2db +--- /dev/null ++++ b/drivers/xen/pciback/pciback_ops.c +@@ -0,0 +1,134 @@ ++/* ++ * PCI Backend Operations - respond to PCI requests from Frontend ++ * ++ * Author: Ryan Wilson ++ */ ++#include ++#include ++#include ++#include ++#include "pciback.h" ++ ++int verbose_request = 0; ++module_param(verbose_request, int, 0644); ++ ++/* Ensure a device is "turned off" and ready to be exported. ++ * (Also see pciback_config_reset to ensure virtual configuration space is ++ * ready to be re-exported) ++ */ ++void pciback_reset_device(struct pci_dev *dev) ++{ ++ u16 cmd; ++ ++ /* Disable devices (but not bridges) */ ++ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { ++ pci_disable_device(dev); ++ ++ pci_write_config_word(dev, PCI_COMMAND, 0); ++ ++ dev->is_enabled = 0; ++ dev->is_busmaster = 0; ++ } else { ++ pci_read_config_word(dev, PCI_COMMAND, &cmd); ++ if (cmd & (PCI_COMMAND_INVALIDATE)) { ++ cmd &= ~(PCI_COMMAND_INVALIDATE); ++ pci_write_config_word(dev, PCI_COMMAND, cmd); ++ ++ dev->is_busmaster = 0; ++ } ++ } ++} ++extern wait_queue_head_t aer_wait_queue; ++extern struct workqueue_struct *pciback_wq; ++/* ++* Now the same evtchn is used for both pcifront conf_read_write request ++* as well as pcie aer front end ack. We use a new work_queue to schedule ++* pciback conf_read_write service for avoiding confict with aer_core ++* do_recovery job which also use the system default work_queue ++*/ ++void test_and_schedule_op(struct pciback_device *pdev) ++{ ++ /* Check that frontend is requesting an operation and that we are not ++ * already processing a request */ ++ if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) ++ && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) ++ { ++ queue_work(pciback_wq, &pdev->op_work); ++ } ++ /*_XEN_PCIB_active should have been cleared by pcifront. And also make ++ sure pciback is waiting for ack by checking _PCIB_op_pending*/ ++ if (!test_bit(_XEN_PCIB_active,(unsigned long *)&pdev->sh_info->flags) ++ &&test_bit(_PCIB_op_pending, &pdev->flags)) { ++ wake_up(&aer_wait_queue); ++ } ++} ++ ++/* Performing the configuration space reads/writes must not be done in atomic ++ * context because some of the pci_* functions can sleep (mostly due to ACPI ++ * use of semaphores). This function is intended to be called from a work ++ * queue in process context taking a struct pciback_device as a parameter */ ++void pciback_do_op(void *data) ++{ ++ struct pciback_device *pdev = data; ++ struct pci_dev *dev; ++ struct xen_pci_op *op = &pdev->sh_info->op; ++ ++ dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn); ++ ++ if (dev == NULL) ++ op->err = XEN_PCI_ERR_dev_not_found; ++ else ++ { ++ switch (op->cmd) ++ { ++ case XEN_PCI_OP_conf_read: ++ op->err = pciback_config_read(dev, ++ op->offset, op->size, &op->value); ++ break; ++ case XEN_PCI_OP_conf_write: ++ op->err = pciback_config_write(dev, ++ op->offset, op->size, op->value); ++ break; ++#ifdef CONFIG_PCI_MSI ++ case XEN_PCI_OP_enable_msi: ++ op->err = pciback_enable_msi(pdev, dev, op); ++ break; ++ case XEN_PCI_OP_disable_msi: ++ op->err = pciback_disable_msi(pdev, dev, op); ++ break; ++ case XEN_PCI_OP_enable_msix: ++ op->err = pciback_enable_msix(pdev, dev, op); ++ break; ++ case XEN_PCI_OP_disable_msix: ++ op->err = pciback_disable_msix(pdev, dev, op); ++ break; ++#endif ++ default: ++ op->err = XEN_PCI_ERR_not_implemented; ++ break; ++ } ++ } ++ /* Tell the driver domain that we're done. */ ++ wmb(); ++ clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); ++ notify_remote_via_irq(pdev->evtchn_irq); ++ ++ /* Mark that we're done. */ ++ smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */ ++ clear_bit(_PDEVF_op_active, &pdev->flags); ++ smp_mb__after_clear_bit(); /* /before/ final check for work */ ++ ++ /* Check to see if the driver domain tried to start another request in ++ * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. ++ */ ++ test_and_schedule_op(pdev); ++} ++ ++irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs) ++{ ++ struct pciback_device *pdev = dev_id; ++ ++ test_and_schedule_op(pdev); ++ ++ return IRQ_HANDLED; ++} +diff --git a/drivers/xen/pciback/slot.c b/drivers/xen/pciback/slot.c +new file mode 100644 +index 0000000..105a8b6 +--- /dev/null ++++ b/drivers/xen/pciback/slot.c +@@ -0,0 +1,187 @@ ++/* ++ * PCI Backend - Provides a Virtual PCI bus (with real devices) ++ * to the frontend ++ * ++ * Author: Ryan Wilson (vpci.c) ++ * Author: Tristan Gingold , from vpci.c ++ */ ++ ++#include ++#include ++#include ++#include ++#include "pciback.h" ++ ++/* There are at most 32 slots in a pci bus. */ ++#define PCI_SLOT_MAX 32 ++ ++#define PCI_BUS_NBR 2 ++ ++struct slot_dev_data { ++ /* Access to dev_list must be protected by lock */ ++ struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX]; ++ spinlock_t lock; ++}; ++ ++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, ++ unsigned int domain, unsigned int bus, ++ unsigned int devfn) ++{ ++ struct pci_dev *dev = NULL; ++ struct slot_dev_data *slot_dev = pdev->pci_dev_data; ++ unsigned long flags; ++ ++ if (domain != 0 || PCI_FUNC(devfn) != 0) ++ return NULL; ++ ++ if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR) ++ return NULL; ++ ++ spin_lock_irqsave(&slot_dev->lock, flags); ++ dev = slot_dev->slots[bus][PCI_SLOT(devfn)]; ++ spin_unlock_irqrestore(&slot_dev->lock, flags); ++ ++ return dev; ++} ++ ++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, ++ int devid, publish_pci_dev_cb publish_cb) ++{ ++ int err = 0, slot, bus; ++ struct slot_dev_data *slot_dev = pdev->pci_dev_data; ++ unsigned long flags; ++ ++ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { ++ err = -EFAULT; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Can't export bridges on the virtual PCI bus"); ++ goto out; ++ } ++ ++ spin_lock_irqsave(&slot_dev->lock, flags); ++ ++ /* Assign to a new slot on the virtual PCI bus */ ++ for (bus = 0; bus < PCI_BUS_NBR; bus++) ++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { ++ if (slot_dev->slots[bus][slot] == NULL) { ++ printk(KERN_INFO ++ "pciback: slot: %s: assign to virtual slot %d, bus %d\n", ++ pci_name(dev), slot, bus); ++ slot_dev->slots[bus][slot] = dev; ++ goto unlock; ++ } ++ } ++ ++ err = -ENOMEM; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "No more space on root virtual PCI bus"); ++ ++ unlock: ++ spin_unlock_irqrestore(&slot_dev->lock, flags); ++ ++ /* Publish this device. */ ++ if(!err) ++ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid); ++ ++ out: ++ return err; ++} ++ ++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) ++{ ++ int slot, bus; ++ struct slot_dev_data *slot_dev = pdev->pci_dev_data; ++ struct pci_dev *found_dev = NULL; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&slot_dev->lock, flags); ++ ++ for (bus = 0; bus < PCI_BUS_NBR; bus++) ++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { ++ if (slot_dev->slots[bus][slot] == dev) { ++ slot_dev->slots[bus][slot] = NULL; ++ found_dev = dev; ++ goto out; ++ } ++ } ++ ++ out: ++ spin_unlock_irqrestore(&slot_dev->lock, flags); ++ ++ if (found_dev) ++ pcistub_put_pci_dev(found_dev); ++} ++ ++int pciback_init_devices(struct pciback_device *pdev) ++{ ++ int slot, bus; ++ struct slot_dev_data *slot_dev; ++ ++ slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL); ++ if (!slot_dev) ++ return -ENOMEM; ++ ++ spin_lock_init(&slot_dev->lock); ++ ++ for (bus = 0; bus < PCI_BUS_NBR; bus++) ++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) ++ slot_dev->slots[bus][slot] = NULL; ++ ++ pdev->pci_dev_data = slot_dev; ++ ++ return 0; ++} ++ ++int pciback_publish_pci_roots(struct pciback_device *pdev, ++ publish_pci_root_cb publish_cb) ++{ ++ /* The Virtual PCI bus has only one root */ ++ return publish_cb(pdev, 0, 0); ++} ++ ++void pciback_release_devices(struct pciback_device *pdev) ++{ ++ int slot, bus; ++ struct slot_dev_data *slot_dev = pdev->pci_dev_data; ++ struct pci_dev *dev; ++ ++ for (bus = 0; bus < PCI_BUS_NBR; bus++) ++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { ++ dev = slot_dev->slots[bus][slot]; ++ if (dev != NULL) ++ pcistub_put_pci_dev(dev); ++ } ++ ++ kfree(slot_dev); ++ pdev->pci_dev_data = NULL; ++} ++ ++int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, ++ unsigned int *domain, unsigned int *bus, unsigned int *devfn) ++{ ++ int slot, busnr; ++ struct slot_dev_data *slot_dev = pdev->pci_dev_data; ++ struct pci_dev *dev; ++ int found = 0; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&slot_dev->lock, flags); ++ ++ for (busnr = 0; busnr < PCI_BUS_NBR; bus++) ++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { ++ dev = slot_dev->slots[busnr][slot]; ++ if (dev && dev->bus->number == pcidev->bus->number ++ && dev->devfn == pcidev->devfn ++ && pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)) { ++ found = 1; ++ *domain = 0; ++ *bus = busnr; ++ *devfn = PCI_DEVFN(slot,0); ++ goto out; ++ } ++ } ++out: ++ spin_unlock_irqrestore(&slot_dev->lock, flags); ++ return found; ++ ++} +diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c +new file mode 100644 +index 0000000..a5b7ece +--- /dev/null ++++ b/drivers/xen/pciback/vpci.c +@@ -0,0 +1,242 @@ ++/* ++ * PCI Backend - Provides a Virtual PCI bus (with real devices) ++ * to the frontend ++ * ++ * Author: Ryan Wilson ++ */ ++ ++#include ++#include ++#include ++#include ++#include "pciback.h" ++ ++#define PCI_SLOT_MAX 32 ++ ++struct vpci_dev_data { ++ /* Access to dev_list must be protected by lock */ ++ struct list_head dev_list[PCI_SLOT_MAX]; ++ spinlock_t lock; ++}; ++ ++static inline struct list_head *list_first(struct list_head *head) ++{ ++ return head->next; ++} ++ ++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, ++ unsigned int domain, unsigned int bus, ++ unsigned int devfn) ++{ ++ struct pci_dev_entry *entry; ++ struct pci_dev *dev = NULL; ++ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; ++ unsigned long flags; ++ ++ if (domain != 0 || bus != 0) ++ return NULL; ++ ++ if (PCI_SLOT(devfn) < PCI_SLOT_MAX) { ++ spin_lock_irqsave(&vpci_dev->lock, flags); ++ ++ list_for_each_entry(entry, ++ &vpci_dev->dev_list[PCI_SLOT(devfn)], ++ list) { ++ if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) { ++ dev = entry->dev; ++ break; ++ } ++ } ++ ++ spin_unlock_irqrestore(&vpci_dev->lock, flags); ++ } ++ return dev; ++} ++ ++static inline int match_slot(struct pci_dev *l, struct pci_dev *r) ++{ ++ if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus) ++ && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn)) ++ return 1; ++ ++ return 0; ++} ++ ++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, ++ int devid, publish_pci_dev_cb publish_cb) ++{ ++ int err = 0, slot, func; ++ struct pci_dev_entry *t, *dev_entry; ++ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; ++ unsigned long flags; ++ ++ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { ++ err = -EFAULT; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Can't export bridges on the virtual PCI bus"); ++ goto out; ++ } ++ ++ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL); ++ if (!dev_entry) { ++ err = -ENOMEM; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error adding entry to virtual PCI bus"); ++ goto out; ++ } ++ ++ dev_entry->dev = dev; ++ ++ spin_lock_irqsave(&vpci_dev->lock, flags); ++ ++ /* Keep multi-function devices together on the virtual PCI bus */ ++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { ++ if (!list_empty(&vpci_dev->dev_list[slot])) { ++ t = list_entry(list_first(&vpci_dev->dev_list[slot]), ++ struct pci_dev_entry, list); ++ ++ if (match_slot(dev, t->dev)) { ++ pr_info("pciback: vpci: %s: " ++ "assign to virtual slot %d func %d\n", ++ pci_name(dev), slot, ++ PCI_FUNC(dev->devfn)); ++ list_add_tail(&dev_entry->list, ++ &vpci_dev->dev_list[slot]); ++ func = PCI_FUNC(dev->devfn); ++ goto unlock; ++ } ++ } ++ } ++ ++ /* Assign to a new slot on the virtual PCI bus */ ++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { ++ if (list_empty(&vpci_dev->dev_list[slot])) { ++ printk(KERN_INFO ++ "pciback: vpci: %s: assign to virtual slot %d\n", ++ pci_name(dev), slot); ++ list_add_tail(&dev_entry->list, ++ &vpci_dev->dev_list[slot]); ++ func = PCI_FUNC(dev->devfn); ++ goto unlock; ++ } ++ } ++ ++ err = -ENOMEM; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "No more space on root virtual PCI bus"); ++ ++ unlock: ++ spin_unlock_irqrestore(&vpci_dev->lock, flags); ++ ++ /* Publish this device. */ ++ if(!err) ++ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid); ++ ++ out: ++ return err; ++} ++ ++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) ++{ ++ int slot; ++ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; ++ struct pci_dev *found_dev = NULL; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&vpci_dev->lock, flags); ++ ++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { ++ struct pci_dev_entry *e, *tmp; ++ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], ++ list) { ++ if (e->dev == dev) { ++ list_del(&e->list); ++ found_dev = e->dev; ++ kfree(e); ++ goto out; ++ } ++ } ++ } ++ ++ out: ++ spin_unlock_irqrestore(&vpci_dev->lock, flags); ++ ++ if (found_dev) ++ pcistub_put_pci_dev(found_dev); ++} ++ ++int pciback_init_devices(struct pciback_device *pdev) ++{ ++ int slot; ++ struct vpci_dev_data *vpci_dev; ++ ++ vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL); ++ if (!vpci_dev) ++ return -ENOMEM; ++ ++ spin_lock_init(&vpci_dev->lock); ++ ++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { ++ INIT_LIST_HEAD(&vpci_dev->dev_list[slot]); ++ } ++ ++ pdev->pci_dev_data = vpci_dev; ++ ++ return 0; ++} ++ ++int pciback_publish_pci_roots(struct pciback_device *pdev, ++ publish_pci_root_cb publish_cb) ++{ ++ /* The Virtual PCI bus has only one root */ ++ return publish_cb(pdev, 0, 0); ++} ++ ++void pciback_release_devices(struct pciback_device *pdev) ++{ ++ int slot; ++ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; ++ ++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { ++ struct pci_dev_entry *e, *tmp; ++ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], ++ list) { ++ list_del(&e->list); ++ pcistub_put_pci_dev(e->dev); ++ kfree(e); ++ } ++ } ++ ++ kfree(vpci_dev); ++ pdev->pci_dev_data = NULL; ++} ++ ++int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, ++ unsigned int *domain, unsigned int *bus, unsigned int *devfn) ++{ ++ struct pci_dev_entry *entry; ++ struct pci_dev *dev = NULL; ++ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; ++ unsigned long flags; ++ int found = 0, slot; ++ ++ spin_lock_irqsave(&vpci_dev->lock, flags); ++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { ++ list_for_each_entry(entry, ++ &vpci_dev->dev_list[slot], ++ list) { ++ dev = entry->dev; ++ if (dev && dev->bus->number == pcidev->bus->number ++ && pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus) ++ && dev->devfn == pcidev->devfn) ++ { ++ found = 1; ++ *domain = 0; ++ *bus = 0; ++ *devfn = PCI_DEVFN(slot, PCI_FUNC(pcidev->devfn)); ++ } ++ } ++ } ++ spin_unlock_irqrestore(&vpci_dev->lock, flags); ++ return found; ++} +diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c +new file mode 100644 +index 0000000..4d56c45 +--- /dev/null ++++ b/drivers/xen/pciback/xenbus.c +@@ -0,0 +1,710 @@ ++/* ++ * PCI Backend Xenbus Setup - handles setup with frontend and xend ++ * ++ * Author: Ryan Wilson ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "pciback.h" ++ ++#define INVALID_EVTCHN_IRQ (-1) ++struct workqueue_struct *pciback_wq; ++ ++static struct pciback_device *alloc_pdev(struct xenbus_device *xdev) ++{ ++ struct pciback_device *pdev; ++ ++ pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL); ++ if (pdev == NULL) ++ goto out; ++ dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); ++ ++ pdev->xdev = xdev; ++ xdev->dev.driver_data = pdev; ++ ++ spin_lock_init(&pdev->dev_lock); ++ ++ pdev->sh_area = NULL; ++ pdev->sh_info = NULL; ++ pdev->evtchn_irq = INVALID_EVTCHN_IRQ; ++ pdev->be_watching = 0; ++ ++ INIT_WORK(&pdev->op_work, pciback_do_op, pdev); ++ ++ if (pciback_init_devices(pdev)) { ++ kfree(pdev); ++ pdev = NULL; ++ } ++ out: ++ return pdev; ++} ++ ++static void pciback_disconnect(struct pciback_device *pdev) ++{ ++ spin_lock(&pdev->dev_lock); ++ ++ /* Ensure the guest can't trigger our handler before removing devices */ ++ if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) { ++ unbind_from_irqhandler(pdev->evtchn_irq, pdev); ++ pdev->evtchn_irq = INVALID_EVTCHN_IRQ; ++ } ++ ++ /* If the driver domain started an op, make sure we complete it ++ * before releasing the shared memory */ ++ flush_workqueue(pciback_wq); ++ ++ if (pdev->sh_info != NULL) { ++ xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area); ++ pdev->sh_info = NULL; ++ } ++ ++ spin_unlock(&pdev->dev_lock); ++} ++ ++static void free_pdev(struct pciback_device *pdev) ++{ ++ if (pdev->be_watching) ++ unregister_xenbus_watch(&pdev->be_watch); ++ ++ pciback_disconnect(pdev); ++ ++ pciback_release_devices(pdev); ++ ++ pdev->xdev->dev.driver_data = NULL; ++ pdev->xdev = NULL; ++ ++ kfree(pdev); ++} ++ ++static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref, ++ int remote_evtchn) ++{ ++ int err = 0; ++ struct vm_struct *area; ++ ++ dev_dbg(&pdev->xdev->dev, ++ "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", ++ gnt_ref, remote_evtchn); ++ ++ area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref); ++ if (IS_ERR(area)) { ++ err = PTR_ERR(area); ++ goto out; ++ } ++ pdev->sh_area = area; ++ pdev->sh_info = area->addr; ++ ++ err = bind_interdomain_evtchn_to_irqhandler( ++ pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event, ++ SA_SAMPLE_RANDOM, "pciback", pdev); ++ if (err < 0) { ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error binding event channel to IRQ"); ++ goto out; ++ } ++ pdev->evtchn_irq = err; ++ err = 0; ++ ++ dev_dbg(&pdev->xdev->dev, "Attached!\n"); ++ out: ++ return err; ++} ++ ++static int pciback_attach(struct pciback_device *pdev) ++{ ++ int err = 0; ++ int gnt_ref, remote_evtchn; ++ char *magic = NULL; ++ ++ spin_lock(&pdev->dev_lock); ++ ++ /* Make sure we only do this setup once */ ++ if (xenbus_read_driver_state(pdev->xdev->nodename) != ++ XenbusStateInitialised) ++ goto out; ++ ++ /* Wait for frontend to state that it has published the configuration */ ++ if (xenbus_read_driver_state(pdev->xdev->otherend) != ++ XenbusStateInitialised) ++ goto out; ++ ++ dev_dbg(&pdev->xdev->dev, "Reading frontend config\n"); ++ ++ err = xenbus_gather(XBT_NIL, pdev->xdev->otherend, ++ "pci-op-ref", "%u", &gnt_ref, ++ "event-channel", "%u", &remote_evtchn, ++ "magic", NULL, &magic, NULL); ++ if (err) { ++ /* If configuration didn't get read correctly, wait longer */ ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error reading configuration from frontend"); ++ goto out; ++ } ++ ++ if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) { ++ xenbus_dev_fatal(pdev->xdev, -EFAULT, ++ "version mismatch (%s/%s) with pcifront - " ++ "halting pciback", ++ magic, XEN_PCI_MAGIC); ++ goto out; ++ } ++ ++ err = pciback_do_attach(pdev, gnt_ref, remote_evtchn); ++ if (err) ++ goto out; ++ ++ dev_dbg(&pdev->xdev->dev, "Connecting...\n"); ++ ++ err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); ++ if (err) ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error switching to connected state!"); ++ ++ dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); ++ out: ++ spin_unlock(&pdev->dev_lock); ++ ++ if (magic) ++ kfree(magic); ++ ++ return err; ++} ++ ++static int pciback_publish_pci_dev(struct pciback_device *pdev, ++ unsigned int domain, unsigned int bus, ++ unsigned int devfn, unsigned int devid) ++{ ++ int err; ++ int len; ++ char str[64]; ++ ++ len = snprintf(str, sizeof(str), "vdev-%d", devid); ++ if (unlikely(len >= (sizeof(str) - 1))) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, ++ "%04x:%02x:%02x.%02x", domain, bus, ++ PCI_SLOT(devfn), PCI_FUNC(devfn)); ++ ++ out: ++ return err; ++} ++ ++static int pciback_export_device(struct pciback_device *pdev, ++ int domain, int bus, int slot, int func, ++ int devid) ++{ ++ struct pci_dev *dev; ++ int err = 0; ++ ++ dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n", ++ domain, bus, slot, func); ++ ++ dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func); ++ if (!dev) { ++ err = -EINVAL; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Couldn't locate PCI device " ++ "(%04x:%02x:%02x.%01x)! " ++ "perhaps already in-use?", ++ domain, bus, slot, func); ++ goto out; ++ } ++ ++ err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev); ++ if (err) ++ goto out; ++ ++ /* TODO: It'd be nice to export a bridge and have all of its children ++ * get exported with it. This may be best done in xend (which will ++ * have to calculate resource usage anyway) but we probably want to ++ * put something in here to ensure that if a bridge gets given to a ++ * driver domain, that all devices under that bridge are not given ++ * to other driver domains (as he who controls the bridge can disable ++ * it and stop the other devices from working). ++ */ ++ out: ++ return err; ++} ++ ++static int pciback_remove_device(struct pciback_device *pdev, ++ int domain, int bus, int slot, int func) ++{ ++ int err = 0; ++ struct pci_dev *dev; ++ ++ dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n", ++ domain, bus, slot, func); ++ ++ dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func)); ++ if (!dev) { ++ err = -EINVAL; ++ dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device " ++ "(%04x:%02x:%02x.%01x)! not owned by this domain\n", ++ domain, bus, slot, func); ++ goto out; ++ } ++ ++ pciback_release_pci_dev(pdev, dev); ++ ++ out: ++ return err; ++} ++ ++static int pciback_publish_pci_root(struct pciback_device *pdev, ++ unsigned int domain, unsigned int bus) ++{ ++ unsigned int d, b; ++ int i, root_num, len, err; ++ char str[64]; ++ ++ dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n"); ++ ++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, ++ "root_num", "%d", &root_num); ++ if (err == 0 || err == -ENOENT) ++ root_num = 0; ++ else if (err < 0) ++ goto out; ++ ++ /* Verify that we haven't already published this pci root */ ++ for (i = 0; i < root_num; i++) { ++ len = snprintf(str, sizeof(str), "root-%d", i); ++ if (unlikely(len >= (sizeof(str) - 1))) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, ++ str, "%x:%x", &d, &b); ++ if (err < 0) ++ goto out; ++ if (err != 2) { ++ err = -EINVAL; ++ goto out; ++ } ++ ++ if (d == domain && b == bus) { ++ err = 0; ++ goto out; ++ } ++ } ++ ++ len = snprintf(str, sizeof(str), "root-%d", root_num); ++ if (unlikely(len >= (sizeof(str) - 1))) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n", ++ root_num, domain, bus); ++ ++ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, ++ "%04x:%02x", domain, bus); ++ if (err) ++ goto out; ++ ++ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, ++ "root_num", "%d", (root_num + 1)); ++ ++ out: ++ return err; ++} ++ ++static int pciback_reconfigure(struct pciback_device *pdev) ++{ ++ int err = 0; ++ int num_devs; ++ int domain, bus, slot, func; ++ int substate; ++ int i, len; ++ char state_str[64]; ++ char dev_str[64]; ++ ++ spin_lock(&pdev->dev_lock); ++ ++ dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); ++ ++ /* Make sure we only reconfigure once */ ++ if (xenbus_read_driver_state(pdev->xdev->nodename) != ++ XenbusStateReconfiguring) ++ goto out; ++ ++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", ++ &num_devs); ++ if (err != 1) { ++ if (err >= 0) ++ err = -EINVAL; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error reading number of devices"); ++ goto out; ++ } ++ ++ for (i = 0; i < num_devs; i++) { ++ len = snprintf(state_str, sizeof(state_str), "state-%d", i); ++ if (unlikely(len >= (sizeof(state_str) - 1))) { ++ err = -ENOMEM; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "String overflow while reading " ++ "configuration"); ++ goto out; ++ } ++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str, ++ "%d", &substate); ++ if (err != 1) ++ substate = XenbusStateUnknown; ++ ++ switch (substate) { ++ case XenbusStateInitialising: ++ dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i); ++ ++ len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); ++ if (unlikely(len >= (sizeof(dev_str) - 1))) { ++ err = -ENOMEM; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "String overflow while " ++ "reading configuration"); ++ goto out; ++ } ++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, ++ dev_str, "%x:%x:%x.%x", ++ &domain, &bus, &slot, &func); ++ if (err < 0) { ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error reading device " ++ "configuration"); ++ goto out; ++ } ++ if (err != 4) { ++ err = -EINVAL; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error parsing pci device " ++ "configuration"); ++ goto out; ++ } ++ ++ err = pciback_export_device(pdev, domain, bus, slot, ++ func, i); ++ if (err) ++ goto out; ++ ++ /* Publish pci roots. */ ++ err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root); ++ if (err) { ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error while publish PCI root" ++ "buses for frontend"); ++ goto out; ++ } ++ ++ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, ++ state_str, "%d", ++ XenbusStateInitialised); ++ if (err) { ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error switching substate of " ++ "dev-%d\n", i); ++ goto out; ++ } ++ break; ++ ++ case XenbusStateClosing: ++ dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i); ++ ++ len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i); ++ if (unlikely(len >= (sizeof(dev_str) - 1))) { ++ err = -ENOMEM; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "String overflow while " ++ "reading configuration"); ++ goto out; ++ } ++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, ++ dev_str, "%x:%x:%x.%x", ++ &domain, &bus, &slot, &func); ++ if (err < 0) { ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error reading device " ++ "configuration"); ++ goto out; ++ } ++ if (err != 4) { ++ err = -EINVAL; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error parsing pci device " ++ "configuration"); ++ goto out; ++ } ++ ++ err = pciback_remove_device(pdev, domain, bus, slot, ++ func); ++ if(err) ++ goto out; ++ ++ /* TODO: If at some point we implement support for pci ++ * root hot-remove on pcifront side, we'll need to ++ * remove unnecessary xenstore nodes of pci roots here. ++ */ ++ ++ break; ++ ++ default: ++ break; ++ } ++ } ++ ++ err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured); ++ if (err) { ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error switching to reconfigured state!"); ++ goto out; ++ } ++ ++ out: ++ spin_unlock(&pdev->dev_lock); ++ ++ return 0; ++} ++ ++static void pciback_frontend_changed(struct xenbus_device *xdev, ++ enum xenbus_state fe_state) ++{ ++ struct pciback_device *pdev = xdev->dev.driver_data; ++ ++ dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state); ++ ++ switch (fe_state) { ++ case XenbusStateInitialised: ++ pciback_attach(pdev); ++ break; ++ ++ case XenbusStateReconfiguring: ++ pciback_reconfigure(pdev); ++ break; ++ ++ case XenbusStateConnected: ++ /* pcifront switched its state from reconfiguring to connected. ++ * Then switch to connected state. ++ */ ++ xenbus_switch_state(xdev, XenbusStateConnected); ++ break; ++ ++ case XenbusStateClosing: ++ pciback_disconnect(pdev); ++ xenbus_switch_state(xdev, XenbusStateClosing); ++ break; ++ ++ case XenbusStateClosed: ++ pciback_disconnect(pdev); ++ xenbus_switch_state(xdev, XenbusStateClosed); ++ if (xenbus_dev_is_online(xdev)) ++ break; ++ /* fall through if not online */ ++ case XenbusStateUnknown: ++ dev_dbg(&xdev->dev, "frontend is gone! unregister device\n"); ++ device_unregister(&xdev->dev); ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++static int pciback_setup_backend(struct pciback_device *pdev) ++{ ++ /* Get configuration from xend (if available now) */ ++ int domain, bus, slot, func; ++ int err = 0; ++ int i, num_devs; ++ char dev_str[64]; ++ char state_str[64]; ++ ++ spin_lock(&pdev->dev_lock); ++ ++ /* It's possible we could get the call to setup twice, so make sure ++ * we're not already connected. ++ */ ++ if (xenbus_read_driver_state(pdev->xdev->nodename) != ++ XenbusStateInitWait) ++ goto out; ++ ++ dev_dbg(&pdev->xdev->dev, "getting be setup\n"); ++ ++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", ++ &num_devs); ++ if (err != 1) { ++ if (err >= 0) ++ err = -EINVAL; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error reading number of devices"); ++ goto out; ++ } ++ ++ for (i = 0; i < num_devs; i++) { ++ int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); ++ if (unlikely(l >= (sizeof(dev_str) - 1))) { ++ err = -ENOMEM; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "String overflow while reading " ++ "configuration"); ++ goto out; ++ } ++ ++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str, ++ "%x:%x:%x.%x", &domain, &bus, &slot, &func); ++ if (err < 0) { ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error reading device configuration"); ++ goto out; ++ } ++ if (err != 4) { ++ err = -EINVAL; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error parsing pci device " ++ "configuration"); ++ goto out; ++ } ++ ++ err = pciback_export_device(pdev, domain, bus, slot, func, i); ++ if (err) ++ goto out; ++ ++ /* Switch substate of this device. */ ++ l = snprintf(state_str, sizeof(state_str), "state-%d", i); ++ if (unlikely(l >= (sizeof(state_str) - 1))) { ++ err = -ENOMEM; ++ xenbus_dev_fatal(pdev->xdev, err, ++ "String overflow while reading " ++ "configuration"); ++ goto out; ++ } ++ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str, ++ "%d", XenbusStateInitialised); ++ if (err) { ++ xenbus_dev_fatal(pdev->xdev, err, "Error switching " ++ "substate of dev-%d\n", i); ++ goto out; ++ } ++ } ++ ++ err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root); ++ if (err) { ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error while publish PCI root buses " ++ "for frontend"); ++ goto out; ++ } ++ ++ err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised); ++ if (err) ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error switching to initialised state!"); ++ ++ out: ++ spin_unlock(&pdev->dev_lock); ++ ++ if (!err) ++ /* see if pcifront is already configured (if not, we'll wait) */ ++ pciback_attach(pdev); ++ ++ return err; ++} ++ ++static void pciback_be_watch(struct xenbus_watch *watch, ++ const char **vec, unsigned int len) ++{ ++ struct pciback_device *pdev = ++ container_of(watch, struct pciback_device, be_watch); ++ ++ switch (xenbus_read_driver_state(pdev->xdev->nodename)) { ++ case XenbusStateInitWait: ++ pciback_setup_backend(pdev); ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++static int pciback_xenbus_probe(struct xenbus_device *dev, ++ const struct xenbus_device_id *id) ++{ ++ int err = 0; ++ struct pciback_device *pdev = alloc_pdev(dev); ++ ++ if (pdev == NULL) { ++ err = -ENOMEM; ++ xenbus_dev_fatal(dev, err, ++ "Error allocating pciback_device struct"); ++ goto out; ++ } ++ ++ /* wait for xend to configure us */ ++ err = xenbus_switch_state(dev, XenbusStateInitWait); ++ if (err) ++ goto out; ++ ++ /* watch the backend node for backend configuration information */ ++ err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch, ++ pciback_be_watch); ++ if (err) ++ goto out; ++ pdev->be_watching = 1; ++ ++ /* We need to force a call to our callback here in case ++ * xend already configured us! ++ */ ++ pciback_be_watch(&pdev->be_watch, NULL, 0); ++ ++ out: ++ return err; ++} ++ ++static int pciback_xenbus_remove(struct xenbus_device *dev) ++{ ++ struct pciback_device *pdev = dev->dev.driver_data; ++ ++ if (pdev != NULL) ++ free_pdev(pdev); ++ ++ return 0; ++} ++ ++static const struct xenbus_device_id xenpci_ids[] = { ++ {"pci"}, ++ {{0}}, ++}; ++ ++static struct xenbus_driver xenbus_pciback_driver = { ++ .name = "pciback", ++ .owner = THIS_MODULE, ++ .ids = xenpci_ids, ++ .probe = pciback_xenbus_probe, ++ .remove = pciback_xenbus_remove, ++ .otherend_changed = pciback_frontend_changed, ++}; ++ ++int __init pciback_xenbus_register(void) ++{ ++ if (!is_running_on_xen()) ++ return -ENODEV; ++ pciback_wq = create_workqueue("pciback_workqueue"); ++ if (!pciback_wq) { ++ printk(KERN_ERR "pciback_xenbus_register: create" ++ "pciback_workqueue failed\n"); ++ return -EFAULT; ++ } ++ return xenbus_register_backend(&xenbus_pciback_driver); ++} ++ ++void __exit pciback_xenbus_unregister(void) ++{ ++ destroy_workqueue(pciback_wq); ++ xenbus_unregister_driver(&xenbus_pciback_driver); ++} +-- +1.7.3.4 + + +From cf2a64556286b762ce6a3a9b408ba7ecdcaea03a Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Tue, 13 Oct 2009 17:22:22 -0400 +Subject: [PATCH 008/139] xen-pciback: Fix include header name change (evtchn.h is now events.h) + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/pci_stub.c | 2 +- + drivers/xen/pciback/pciback_ops.c | 2 +- + drivers/xen/pciback/xenbus.c | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c +index c481a73..c02f21f 100644 +--- a/drivers/xen/pciback/pci_stub.c ++++ b/drivers/xen/pciback/pci_stub.c +@@ -13,7 +13,7 @@ + #include + #include + #include +-#include ++#include + #include "pciback.h" + #include "conf_space.h" + #include "conf_space_quirks.h" +diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c +index b85b2db..58d09eb 100644 +--- a/drivers/xen/pciback/pciback_ops.c ++++ b/drivers/xen/pciback/pciback_ops.c +@@ -6,7 +6,7 @@ + #include + #include + #include +-#include ++#include + #include "pciback.h" + + int verbose_request = 0; +diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c +index 4d56c45..bbca3fe 100644 +--- a/drivers/xen/pciback/xenbus.c ++++ b/drivers/xen/pciback/xenbus.c +@@ -8,7 +8,7 @@ + #include + #include + #include +-#include ++#include + #include "pciback.h" + + #define INVALID_EVTCHN_IRQ (-1) +-- +1.7.3.4 + + +From f6222ae41f2fee3f67983f833ee8dcba2c7a1362 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Tue, 13 Oct 2009 17:22:24 -0400 +Subject: [PATCH 009/139] xen-pciback: Use pci_is_enabled() instead of is_enabled. + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/conf_space_header.c | 4 ++-- + drivers/xen/pciback/pciback_ops.c | 1 - + 2 files changed, 2 insertions(+), 3 deletions(-) + +diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c +index f794e12..5a9e028 100644 +--- a/drivers/xen/pciback/conf_space_header.c ++++ b/drivers/xen/pciback/conf_space_header.c +@@ -22,14 +22,14 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) + { + int err; + +- if (!dev->is_enabled && is_enable_cmd(value)) { ++ if (!pci_is_enabled(dev) && is_enable_cmd(value)) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: enable\n", + pci_name(dev)); + err = pci_enable_device(dev); + if (err) + return err; +- } else if (dev->is_enabled && !is_enable_cmd(value)) { ++ } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: disable\n", + pci_name(dev)); +diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c +index 58d09eb..2d570e7 100644 +--- a/drivers/xen/pciback/pciback_ops.c ++++ b/drivers/xen/pciback/pciback_ops.c +@@ -26,7 +26,6 @@ void pciback_reset_device(struct pci_dev *dev) + + pci_write_config_word(dev, PCI_COMMAND, 0); + +- dev->is_enabled = 0; + dev->is_busmaster = 0; + } else { + pci_read_config_word(dev, PCI_COMMAND, &cmd); +-- +1.7.3.4 + + +From 0d379d03a3284e4b4d890b7e1b8163d485cc72e6 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Tue, 13 Oct 2009 17:22:25 -0400 +Subject: [PATCH 010/139] xen-pciback: Fix usage of INIT_WORK. + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/pciback.h | 4 ++-- + drivers/xen/pciback/pciback_ops.c | 7 ++++--- + drivers/xen/pciback/xenbus.c | 3 ++- + 3 files changed, 8 insertions(+), 6 deletions(-) + +diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h +index 6744f45..4fb8c05 100644 +--- a/drivers/xen/pciback/pciback.h ++++ b/drivers/xen/pciback/pciback.h +@@ -99,8 +99,8 @@ int pciback_publish_pci_roots(struct pciback_device *pdev, + void pciback_release_devices(struct pciback_device *pdev); + + /* Handles events from front-end */ +-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs); +-void pciback_do_op(void *data); ++irqreturn_t pciback_handle_event(int irq, void *dev_id); ++void pciback_do_op(struct work_struct *data); + + int pciback_xenbus_register(void); + void pciback_xenbus_unregister(void); +diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c +index 2d570e7..6624faf 100644 +--- a/drivers/xen/pciback/pciback_ops.c ++++ b/drivers/xen/pciback/pciback_ops.c +@@ -66,9 +66,10 @@ void test_and_schedule_op(struct pciback_device *pdev) + * context because some of the pci_* functions can sleep (mostly due to ACPI + * use of semaphores). This function is intended to be called from a work + * queue in process context taking a struct pciback_device as a parameter */ +-void pciback_do_op(void *data) ++ ++void pciback_do_op(struct work_struct *data) + { +- struct pciback_device *pdev = data; ++ struct pciback_device *pdev = container_of(data, struct pciback_device, op_work); + struct pci_dev *dev; + struct xen_pci_op *op = &pdev->sh_info->op; + +@@ -123,7 +124,7 @@ void pciback_do_op(void *data) + test_and_schedule_op(pdev); + } + +-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs) ++irqreturn_t pciback_handle_event(int irq, void *dev_id) + { + struct pciback_device *pdev = dev_id; + +diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c +index bbca3fe..bd52289 100644 +--- a/drivers/xen/pciback/xenbus.c ++++ b/drivers/xen/pciback/xenbus.c +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + #include "pciback.h" + + #define INVALID_EVTCHN_IRQ (-1) +@@ -33,7 +34,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev) + pdev->evtchn_irq = INVALID_EVTCHN_IRQ; + pdev->be_watching = 0; + +- INIT_WORK(&pdev->op_work, pciback_do_op, pdev); ++ INIT_WORK(&pdev->op_work, pciback_do_op); + + if (pciback_init_devices(pdev)) { + kfree(pdev); +-- +1.7.3.4 + + +From 57f6c49d0f428f96cca49147d68b0bb6156613a6 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Tue, 13 Oct 2009 17:22:26 -0400 +Subject: [PATCH 011/139] xen-pciback: Update the calling mechanism for xenbus_[map|unmap]_ring_valloc functions. + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/pciback.h | 1 - + drivers/xen/pciback/xenbus.c | 18 +++++++++--------- + 2 files changed, 9 insertions(+), 10 deletions(-) + +diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h +index 4fb8c05..5e8e14e 100644 +--- a/drivers/xen/pciback/pciback.h ++++ b/drivers/xen/pciback/pciback.h +@@ -36,7 +36,6 @@ struct pciback_device { + + int evtchn_irq; + +- struct vm_struct *sh_area; + struct xen_pci_sharedinfo *sh_info; + + unsigned long flags; +diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c +index bd52289..5be1350 100644 +--- a/drivers/xen/pciback/xenbus.c ++++ b/drivers/xen/pciback/xenbus.c +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -29,7 +30,6 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev) + + spin_lock_init(&pdev->dev_lock); + +- pdev->sh_area = NULL; + pdev->sh_info = NULL; + pdev->evtchn_irq = INVALID_EVTCHN_IRQ; + pdev->be_watching = 0; +@@ -59,7 +59,7 @@ static void pciback_disconnect(struct pciback_device *pdev) + flush_workqueue(pciback_wq); + + if (pdev->sh_info != NULL) { +- xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area); ++ xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); + pdev->sh_info = NULL; + } + +@@ -85,23 +85,23 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref, + int remote_evtchn) + { + int err = 0; +- struct vm_struct *area; ++ void *vaddr; + + dev_dbg(&pdev->xdev->dev, + "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", + gnt_ref, remote_evtchn); + +- area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref); +- if (IS_ERR(area)) { +- err = PTR_ERR(area); ++ err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr); ++ if (err < 0) { ++ xenbus_dev_fatal(pdev->xdev, err, ++ "Error mapping other domain page in ours."); + goto out; + } +- pdev->sh_area = area; +- pdev->sh_info = area->addr; ++ pdev->sh_info = vaddr; + + err = bind_interdomain_evtchn_to_irqhandler( + pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event, +- SA_SAMPLE_RANDOM, "pciback", pdev); ++ 0, "pciback", pdev); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error binding event channel to IRQ"); +-- +1.7.3.4 + + +From 6e86fcb926e41fb55f512972603e5aaf77e2efb8 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Tue, 13 Oct 2009 17:22:30 -0400 +Subject: [PATCH 012/139] xen-pciback: Add check to load only under priviliged domain. + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/pci_stub.c | 5 +++++ + drivers/xen/pciback/xenbus.c | 2 -- + 2 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c +index c02f21f..d97dac5 100644 +--- a/drivers/xen/pciback/pci_stub.c ++++ b/drivers/xen/pciback/pci_stub.c +@@ -14,6 +14,8 @@ + #include + #include + #include ++#include ++#include + #include "pciback.h" + #include "conf_space.h" + #include "conf_space_quirks.h" +@@ -1286,6 +1288,9 @@ static int __init pciback_init(void) + { + int err; + ++ if (!xen_initial_domain()) ++ return -ENODEV; ++ + err = pciback_config_init(); + if (err) + return err; +diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c +index 5be1350..a85c413 100644 +--- a/drivers/xen/pciback/xenbus.c ++++ b/drivers/xen/pciback/xenbus.c +@@ -693,8 +693,6 @@ static struct xenbus_driver xenbus_pciback_driver = { + + int __init pciback_xenbus_register(void) + { +- if (!is_running_on_xen()) +- return -ENODEV; + pciback_wq = create_workqueue("pciback_workqueue"); + if (!pciback_wq) { + printk(KERN_ERR "pciback_xenbus_register: create" +-- +1.7.3.4 + + +From c1139f912c1336538e51966d56e5905954052cba Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Tue, 13 Oct 2009 17:22:31 -0400 +Subject: [PATCH 013/139] xen-pciback: Remove usage of pci_restore_bars() as Linux handles the power-up states correctly now. + +Originally this code was pulled from the upstream kernel, and stuck +in the linux-2.6-sparse tree. At that point of time, the Linux tree (2.6.16?) +did not know how to handle this. Nowadays the pci_set_power_state routine +handles this case so we do not need this anymore. + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/conf_space_capability_pm.c | 13 ------------- + 1 files changed, 0 insertions(+), 13 deletions(-) + +diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c +index e2f99c7..e1d3af4 100644 +--- a/drivers/xen/pciback/conf_space_capability_pm.c ++++ b/drivers/xen/pciback/conf_space_capability_pm.c +@@ -58,19 +58,6 @@ static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, + goto out; + } + +- /* +- * Device may lose PCI config info on D3->D0 transition. This +- * is a problem for some guests which will not reset BARs. Even +- * those that have a go will be foiled by our BAR-write handler +- * which will discard the write! Since Linux won't re-init +- * the config space automatically in all cases, we do it here. +- * Future: Should we re-initialise all first 64 bytes of config space? +- */ +- if (new_state == PCI_D0 && +- (old_state == PCI_D3hot || old_state == PCI_D3cold) && +- !(old_value & PCI_PM_CTRL_NO_SOFT_RESET)) +- pci_restore_bars(dev); +- + out: + return err; + } +-- +1.7.3.4 + + +From 721657d92623cfcf2f6f68c14abf97eb40fa6b20 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Tue, 13 Oct 2009 17:22:32 -0400 +Subject: [PATCH 014/139] xen-pciback: Enable Xen-PCI-back to be compiled. + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/Kconfig | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++ + drivers/xen/Makefile | 1 + + 2 files changed, 66 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig +index 6e6180c..d874453 100644 +--- a/drivers/xen/Kconfig ++++ b/drivers/xen/Kconfig +@@ -29,6 +29,71 @@ config XEN_DEV_EVTCHN + Support for backend device drivers that provide I/O services + to other virtual machines. + ++config XEN_PCIDEV_BACKEND ++ tristate "PCI-device backend driver" ++ depends on PCI ++ depends on XEN_BACKEND ++ help ++ The PCI device backend driver allows the kernel to export arbitrary ++ PCI devices to other guests. If you select this to be a module, you ++ will need to make sure no other driver has bound to the device(s) ++ you want to make visible to other guests. ++ ++choice ++ prompt "PCI Backend Mode" ++ depends on XEN_PCIDEV_BACKEND ++ default XEN_PCIDEV_BACKEND_VPCI if !IA64 ++ default XEN_PCIDEV_BACKEND_CONTROLLER if IA64 ++ ++config XEN_PCIDEV_BACKEND_VPCI ++ bool "Virtual PCI" ++ ---help--- ++ This PCI Backend hides the true PCI topology and makes the frontend ++ think there is a single PCI bus with only the exported devices on it. ++ For example, a device at 03:05.0 will be re-assigned to 00:00.0. A ++ second device at 02:1a.1 will be re-assigned to 00:01.1. ++ ++config XEN_PCIDEV_BACKEND_PASS ++ bool "Passthrough" ++ ---help--- ++ This PCI Backend provides a real view of the PCI topology to the ++ frontend (for example, a device at 06:01.b will still appear at ++ 06:01.b to the frontend). This is similar to how Xen 2.0.x exposed ++ PCI devices to its driver domains. This may be required for drivers ++ which depend on finding their hardward in certain bus/slot ++ locations. ++ ++config XEN_PCIDEV_BACKEND_SLOT ++ bool "Slot" ++ ---help--- ++ This PCI Backend hides the true PCI topology and makes the frontend ++ think there is a single PCI bus with only the exported devices on it. ++ Contrary to the virtual PCI backend, a function becomes a new slot. ++ For example, a device at 03:05.2 will be re-assigned to 00:00.0. A ++ second device at 02:1a.1 will be re-assigned to 00:01.0. ++ ++config XEN_PCIDEV_BACKEND_CONTROLLER ++ bool "Controller" ++ depends on IA64 ++ ---help--- ++ This PCI backend virtualizes the PCI bus topology by providing a ++ virtual bus per PCI root device. Devices which are physically under ++ the same root bus will appear on the same virtual bus. For systems ++ with complex I/O addressing, this is the only backend which supports ++ extended I/O port spaces and MMIO translation offsets. This backend ++ also supports slot virtualization. For example, a device at ++ 0000:01:02.1 will be re-assigned to 0000:00:00.0. A second device ++ at 0000:02:05.0 (behind a P2P bridge on bus 0000:01) will be ++ re-assigned to 0000:00:01.0. A third device at 0000:16:05.0 (under ++ a different PCI root bus) will be re-assigned to 0000:01:00.0. ++ ++endchoice ++ ++config XEN_PCIDEV_BE_DEBUG ++ bool "PCI Backend Debugging" ++ depends on XEN_PCIDEV_BACKEND ++ ++ + config XENFS + tristate "Xen filesystem" + default y +diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile +index eb8a78d..3737dee 100644 +--- a/drivers/xen/Makefile ++++ b/drivers/xen/Makefile +@@ -9,6 +9,7 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o + obj-$(CONFIG_XEN_BALLOON) += balloon.o + obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o + obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o ++obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/ + obj-$(CONFIG_XENFS) += xenfs/ + obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o + obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o +-- +1.7.3.4 + + +From c164cd8577017d1c4e001b475fadddc7d2ff5c78 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Thu, 5 Nov 2009 15:25:43 -0500 +Subject: [PATCH 015/139] xen-pciback: Return the physical IRQ number instead of the allocated IRQ number to pcifront. + +The allocation of IRQ numbers in Linux privileged domains is based +on finding the first unbound IRQ number. After the allocation is done +a HYPERCALL to Xen is done, which allocates a PIRQ globally. +That PIRQ->IRQ binding is saved in data structures that are used +during ISR executions. + +Before this patch, for non-privileged domains we would return the local +IRQ number instead of the PIRQ. The non-privileged domains require the +PIRQ so that they can attach the their own interrupt handler to it. +Fortunatly there is a function, 'xen_gsi_from_irq' that returns +that global IRQ number. + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/conf_space_capability_msi.c | 12 ++++++++---- + 1 files changed, 8 insertions(+), 4 deletions(-) + +diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c +index 762e396..7fb5371 100644 +--- a/drivers/xen/pciback/conf_space_capability_msi.c ++++ b/drivers/xen/pciback/conf_space_capability_msi.c +@@ -6,6 +6,7 @@ + #include "conf_space.h" + #include "conf_space_capability.h" + #include ++#include + #include "pciback.h" + + int pciback_enable_msi(struct pciback_device *pdev, +@@ -22,7 +23,9 @@ int pciback_enable_msi(struct pciback_device *pdev, + return XEN_PCI_ERR_op_failed; + } + +- op->value = dev->irq; ++ /* The value the guest needs is actually the IDT vector, not the ++ * the local domain's IRQ number. */ ++ op->value = xen_gsi_from_irq(dev->irq); + return 0; + } + +@@ -31,7 +34,7 @@ int pciback_disable_msi(struct pciback_device *pdev, + { + pci_disable_msi(dev); + +- op->value = dev->irq; ++ op->value = xen_gsi_from_irq(dev->irq); + return 0; + } + +@@ -57,7 +60,8 @@ int pciback_enable_msix(struct pciback_device *pdev, + + for (i = 0; i < op->value; i++) { + op->msix_entries[i].entry = entries[i].entry; +- op->msix_entries[i].vector = entries[i].vector; ++ op->msix_entries[i].vector = ++ xen_gsi_from_irq(entries[i].vector); + } + + kfree(entries); +@@ -73,7 +77,7 @@ int pciback_disable_msix(struct pciback_device *pdev, + + pci_disable_msix(dev); + +- op->value = dev->irq; ++ op->value = xen_gsi_from_irq(dev->irq); + return 0; + } + +-- +1.7.3.4 + + +From b0b035f1de3282aa96a6dc28007d513e8fce793d Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Thu, 5 Nov 2009 15:25:44 -0500 +Subject: [PATCH 016/139] xen-pciback: Fix checkpatch warnings and errors for pciback/ directory. + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/conf_space.c | 34 +++--- + drivers/xen/pciback/conf_space.h | 30 ++-- + drivers/xen/pciback/conf_space_capability.c | 5 +- + drivers/xen/pciback/conf_space_capability.h | 3 + + drivers/xen/pciback/conf_space_capability_msi.c | 3 +- + drivers/xen/pciback/conf_space_capability_pm.c | 4 +- + drivers/xen/pciback/conf_space_capability_vpd.c | 2 +- + drivers/xen/pciback/conf_space_header.c | 7 +- + drivers/xen/pciback/conf_space_quirks.c | 16 ++- + drivers/xen/pciback/controller.c | 15 +- + drivers/xen/pciback/passthrough.c | 6 +- + drivers/xen/pciback/pci_stub.c | 165 +++++++++++------------ + drivers/xen/pciback/pciback.h | 28 +++-- + drivers/xen/pciback/pciback_ops.c | 74 +++++------ + drivers/xen/pciback/slot.c | 22 ++-- + drivers/xen/pciback/vpci.c | 28 ++-- + drivers/xen/pciback/xenbus.c | 42 +++--- + 17 files changed, 245 insertions(+), 239 deletions(-) + +diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c +index 0c76db1..370c18e 100644 +--- a/drivers/xen/pciback/conf_space.c ++++ b/drivers/xen/pciback/conf_space.c +@@ -18,11 +18,11 @@ + static int permissive; + module_param(permissive, bool, 0644); + +-#define DEFINE_PCI_CONFIG(op,size,type) \ ++#define DEFINE_PCI_CONFIG(op, size, type) \ + int pciback_##op##_config_##size \ + (struct pci_dev *dev, int offset, type value, void *data) \ + { \ +- return pci_##op##_config_##size (dev, offset, value); \ ++ return pci_##op##_config_##size(dev, offset, value); \ + } + + DEFINE_PCI_CONFIG(read, byte, u8 *) +@@ -139,14 +139,15 @@ static int pcibios_err_to_errno(int err) + } + + int pciback_config_read(struct pci_dev *dev, int offset, int size, +- u32 * ret_val) ++ u32 *ret_val) + { + int err = 0; + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + const struct config_field_entry *cfg_entry; + const struct config_field *field; + int req_start, req_end, field_start, field_end; +- /* if read fails for any reason, return 0 (as if device didn't respond) */ ++ /* if read fails for any reason, return 0 ++ * (as if device didn't respond) */ + u32 value = 0, tmp_val; + + if (unlikely(verbose_request)) +@@ -161,10 +162,10 @@ int pciback_config_read(struct pci_dev *dev, int offset, int size, + /* Get the real value first, then modify as appropriate */ + switch (size) { + case 1: +- err = pci_read_config_byte(dev, offset, (u8 *) & value); ++ err = pci_read_config_byte(dev, offset, (u8 *) &value); + break; + case 2: +- err = pci_read_config_word(dev, offset, (u16 *) & value); ++ err = pci_read_config_word(dev, offset, (u16 *) &value); + break; + case 4: + err = pci_read_config_dword(dev, offset, &value); +@@ -192,7 +193,7 @@ int pciback_config_read(struct pci_dev *dev, int offset, int size, + } + } + +- out: ++out: + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n", + pci_name(dev), size, offset, value); +@@ -276,8 +277,8 @@ int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value) + } else if (!dev_data->warned_on_write) { + dev_data->warned_on_write = 1; + dev_warn(&dev->dev, "Driver tried to write to a " +- "read-only configuration space field at offset " +- "0x%x, size %d. This may be harmless, but if " ++ "read-only configuration space field at offset" ++ " 0x%x, size %d. This may be harmless, but if " + "you have problems with your device:\n" + "1) see permissive attribute in sysfs\n" + "2) report problems to the xen-devel " +@@ -295,8 +296,8 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev) + struct config_field_entry *cfg_entry, *t; + const struct config_field *field; + +- dev_dbg(&dev->dev, +- "free-ing dynamically allocated virtual configuration space fields\n"); ++ dev_dbg(&dev->dev, "free-ing dynamically allocated virtual " ++ "configuration space fields\n"); + if (!dev_data) + return; + +@@ -306,8 +307,7 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev) + if (field->clean) { + field->clean((struct config_field *)field); + +- if (cfg_entry->data) +- kfree(cfg_entry->data); ++ kfree(cfg_entry->data); + + list_del(&cfg_entry->list); + kfree(cfg_entry); +@@ -376,7 +376,7 @@ int pciback_config_add_field_offset(struct pci_dev *dev, + cfg_entry->base_offset = base_offset; + + /* silently ignore duplicate fields */ +- err = pciback_field_is_dup(dev,OFFSET(cfg_entry)); ++ err = pciback_field_is_dup(dev, OFFSET(cfg_entry)); + if (err) + goto out; + +@@ -395,14 +395,14 @@ int pciback_config_add_field_offset(struct pci_dev *dev, + OFFSET(cfg_entry)); + list_add_tail(&cfg_entry->list, &dev_data->config_fields); + +- out: ++out: + if (err) + kfree(cfg_entry); + + return err; + } + +-/* This sets up the device's virtual configuration space to keep track of ++/* This sets up the device's virtual configuration space to keep track of + * certain registers (like the base address registers (BARs) so that we can + * keep the client from manipulating them directly. + */ +@@ -425,7 +425,7 @@ int pciback_config_init_dev(struct pci_dev *dev) + + err = pciback_config_quirks_init(dev); + +- out: ++out: + return err; + } + +diff --git a/drivers/xen/pciback/conf_space.h b/drivers/xen/pciback/conf_space.h +index fe746ef..50ebef2 100644 +--- a/drivers/xen/pciback/conf_space.h ++++ b/drivers/xen/pciback/conf_space.h +@@ -11,21 +11,21 @@ + #include + + /* conf_field_init can return an errno in a ptr with ERR_PTR() */ +-typedef void *(*conf_field_init) (struct pci_dev * dev, int offset); +-typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data); +-typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data); ++typedef void *(*conf_field_init) (struct pci_dev *dev, int offset); ++typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data); ++typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data); + +-typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value, ++typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value, + void *data); +-typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value, ++typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value, + void *data); +-typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value, ++typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value, + void *data); +-typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value, ++typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value, + void *data); +-typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value, ++typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value, + void *data); +-typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value, ++typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value, + void *data); + + /* These are the fields within the configuration space which we +@@ -39,7 +39,7 @@ struct config_field { + conf_field_init init; + conf_field_reset reset; + conf_field_free release; +- void (*clean) (struct config_field * field); ++ void (*clean) (struct config_field *field); + union { + struct { + conf_dword_write write; +@@ -92,8 +92,8 @@ static inline int pciback_config_add_fields(struct pci_dev *dev, + } + + static inline int pciback_config_add_fields_offset(struct pci_dev *dev, +- const struct config_field *field, +- unsigned int offset) ++ const struct config_field *field, ++ unsigned int offset) + { + int i, err = 0; + for (i = 0; field[i].size != 0; i++) { +@@ -105,11 +105,11 @@ static inline int pciback_config_add_fields_offset(struct pci_dev *dev, + } + + /* Read/Write the real configuration space */ +-int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value, ++int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 *value, + void *data); +-int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value, ++int pciback_read_config_word(struct pci_dev *dev, int offset, u16 *value, + void *data); +-int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value, ++int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 *value, + void *data); + int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value, + void *data); +diff --git a/drivers/xen/pciback/conf_space_capability.c b/drivers/xen/pciback/conf_space_capability.c +index 50efca4..0ea84d6 100644 +--- a/drivers/xen/pciback/conf_space_capability.c ++++ b/drivers/xen/pciback/conf_space_capability.c +@@ -53,13 +53,10 @@ int pciback_config_capability_add_fields(struct pci_dev *dev) + } + } + +- out: ++out: + return err; + } + +-extern struct pciback_config_capability pciback_config_capability_vpd; +-extern struct pciback_config_capability pciback_config_capability_pm; +- + int pciback_config_capability_init(void) + { + register_capability(&pciback_config_capability_vpd); +diff --git a/drivers/xen/pciback/conf_space_capability.h b/drivers/xen/pciback/conf_space_capability.h +index 823392e..8da3ac4 100644 +--- a/drivers/xen/pciback/conf_space_capability.h ++++ b/drivers/xen/pciback/conf_space_capability.h +@@ -20,4 +20,7 @@ struct pciback_config_capability { + const struct config_field *fields; + }; + ++extern struct pciback_config_capability pciback_config_capability_vpd; ++extern struct pciback_config_capability pciback_config_capability_pm; ++ + #endif +diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c +index 7fb5371..b70ea8b 100644 +--- a/drivers/xen/pciback/conf_space_capability_msi.c ++++ b/drivers/xen/pciback/conf_space_capability_msi.c +@@ -18,7 +18,8 @@ int pciback_enable_msi(struct pciback_device *pdev, + status = pci_enable_msi(dev); + + if (status) { +- printk("error enable msi for guest %x status %x\n", otherend, status); ++ printk(KERN_ERR "error enable msi for guest %x status %x\n", ++ otherend, status); + op->value = 0; + return XEN_PCI_ERR_op_failed; + } +diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c +index e1d3af4..0442616 100644 +--- a/drivers/xen/pciback/conf_space_capability_pm.c ++++ b/drivers/xen/pciback/conf_space_capability_pm.c +@@ -20,7 +20,7 @@ static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value, + + *value = real_value & ~PCI_PM_CAP_PME_MASK; + +- out: ++out: + return err; + } + +@@ -77,7 +77,7 @@ static void *pm_ctrl_init(struct pci_dev *dev, int offset) + err = pci_write_config_word(dev, offset, value); + } + +- out: ++out: + return ERR_PTR(err); + } + +diff --git a/drivers/xen/pciback/conf_space_capability_vpd.c b/drivers/xen/pciback/conf_space_capability_vpd.c +index 920cb4a..e7b4d66 100644 +--- a/drivers/xen/pciback/conf_space_capability_vpd.c ++++ b/drivers/xen/pciback/conf_space_capability_vpd.c +@@ -33,7 +33,7 @@ static const struct config_field caplist_vpd[] = { + }, + {} + }; +- ++ + struct pciback_config_capability pciback_config_capability_vpd = { + .capability = PCI_CAP_ID_VPD, + .fields = caplist_vpd, +diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c +index 5a9e028..3ae7da1 100644 +--- a/drivers/xen/pciback/conf_space_header.c ++++ b/drivers/xen/pciback/conf_space_header.c +@@ -51,7 +51,8 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) + err = pci_set_mwi(dev); + if (err) { + printk(KERN_WARNING +- "pciback: %s: cannot enable memory-write-invalidate (%d)\n", ++ "pciback: %s: cannot enable " ++ "memory-write-invalidate (%d)\n", + pci_name(dev), err); + value &= ~PCI_COMMAND_INVALIDATE; + } +@@ -206,7 +207,7 @@ static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data) + || value == PCI_BIST_START) + err = pci_write_config_byte(dev, offset, value); + +- out: ++out: + return err; + } + +@@ -312,6 +313,6 @@ int pciback_config_header_add_fields(struct pci_dev *dev) + break; + } + +- out: ++out: + return err; + } +diff --git a/drivers/xen/pciback/conf_space_quirks.c b/drivers/xen/pciback/conf_space_quirks.c +index 244a438..45c31fb 100644 +--- a/drivers/xen/pciback/conf_space_quirks.c ++++ b/drivers/xen/pciback/conf_space_quirks.c +@@ -18,8 +18,10 @@ match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) + { + if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) && + (id->device == PCI_ANY_ID || id->device == dev->device) && +- (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) && +- (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) && ++ (id->subvendor == PCI_ANY_ID || ++ id->subvendor == dev->subsystem_vendor) && ++ (id->subdevice == PCI_ANY_ID || ++ id->subdevice == dev->subsystem_device) && + !((id->class ^ dev->class) & id->class_mask)) + return id; + return NULL; +@@ -35,7 +37,7 @@ struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev) + tmp_quirk = NULL; + printk(KERN_DEBUG + "quirk didn't match any device pciback knows about\n"); +- out: ++out: + return tmp_quirk; + } + +@@ -51,7 +53,7 @@ int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg) + struct config_field_entry *cfg_entry; + + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { +- if ( OFFSET(cfg_entry) == reg) { ++ if (OFFSET(cfg_entry) == reg) { + ret = 1; + break; + } +@@ -84,7 +86,7 @@ int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field + + pciback_config_add_field(dev, field); + +- out: ++out: + return err; + } + +@@ -110,7 +112,7 @@ int pciback_config_quirks_init(struct pci_dev *dev) + quirk->pdev = dev; + + register_quirk(quirk); +- out: ++out: + return ret; + } + +@@ -133,6 +135,6 @@ int pciback_config_quirk_release(struct pci_dev *dev) + list_del(&quirk->quirks_list); + kfree(quirk); + +- out: ++out: + return ret; + } +diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c +index 294e48f..7f04f11 100644 +--- a/drivers/xen/pciback/controller.c ++++ b/drivers/xen/pciback/controller.c +@@ -259,7 +259,7 @@ static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data) + !(addr.resource_type == ACPI_IO_RANGE && + addr.info.io.translation)) + return AE_OK; +- ++ + /* Store the resource in xenbus for the guest */ + len = snprintf(str, sizeof(str), "root-%d-resource-%d", + info->root_num, info->resource_count); +@@ -314,7 +314,7 @@ int pciback_publish_pci_roots(struct pciback_device *pdev, + goto out; + + /* +- * Now figure out which root-%d this belongs to ++ * Now figure out which root-%d this belongs to + * so we can associate resources with it. + */ + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, +@@ -407,8 +407,8 @@ void pciback_release_devices(struct pciback_device *pdev) + pdev->pci_dev_data = NULL; + } + +-int pciback_get_pcifront_dev(struct pci_dev *pcidev, +- struct pciback_device *pdev, ++int pciback_get_pcifront_dev(struct pci_dev *pcidev, ++ struct pciback_device *pdev, + unsigned int *domain, unsigned int *bus, unsigned int *devfn) + { + struct controller_dev_data *dev_data = pdev->pci_dev_data; +@@ -420,13 +420,12 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev, + + list_for_each_entry(cntrl_entry, &dev_data->list, list) { + list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { +- if ( (dev_entry->dev->bus->number == ++ if ((dev_entry->dev->bus->number == + pcidev->bus->number) && +- (dev_entry->dev->devfn == ++ (dev_entry->dev->devfn == + pcidev->devfn) && + (pci_domain_nr(dev_entry->dev->bus) == +- pci_domain_nr(pcidev->bus))) +- { ++ pci_domain_nr(pcidev->bus))) { + found = 1; + *domain = cntrl_entry->domain; + *bus = cntrl_entry->bus; +diff --git a/drivers/xen/pciback/passthrough.c b/drivers/xen/pciback/passthrough.c +index 9e7a0c4..5386bebf 100644 +--- a/drivers/xen/pciback/passthrough.c ++++ b/drivers/xen/pciback/passthrough.c +@@ -165,8 +165,10 @@ void pciback_release_devices(struct pciback_device *pdev) + pdev->pci_dev_data = NULL; + } + +-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, +- unsigned int *domain, unsigned int *bus, unsigned int *devfn) ++int pciback_get_pcifront_dev(struct pci_dev *pcidev, ++ struct pciback_device *pdev, ++ unsigned int *domain, unsigned int *bus, ++ unsigned int *devfn) + + { + *domain = pci_domain_nr(pcidev->bus); +diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c +index d97dac5..28222ee 100644 +--- a/drivers/xen/pciback/pci_stub.c ++++ b/drivers/xen/pciback/pci_stub.c +@@ -20,7 +20,7 @@ + #include "conf_space.h" + #include "conf_space_quirks.h" + +-static char *pci_devs_to_hide = NULL; ++static char *pci_devs_to_hide; + wait_queue_head_t aer_wait_queue; + /*Add sem for sync AER handling and pciback remove/reconfigue ops, + * We want to avoid in middle of AER ops, pciback devices is being removed +@@ -43,7 +43,7 @@ struct pcistub_device { + spinlock_t lock; + + struct pci_dev *dev; +- struct pciback_device *pdev; /* non-NULL if struct pci_dev is in use */ ++ struct pciback_device *pdev;/* non-NULL if struct pci_dev is in use */ + }; + + /* Access to pcistub_devices & seized_devices lists and the initialize_devices +@@ -55,7 +55,7 @@ static LIST_HEAD(pcistub_devices); + /* wait for device_initcall before initializing our devices + * (see pcistub_init_devices_late) + */ +-static int initialize_devices = 0; ++static int initialize_devices; + static LIST_HEAD(seized_devices); + + static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) +@@ -132,7 +132,7 @@ static struct pcistub_device *pcistub_device_find(int domain, int bus, + /* didn't find it */ + psdev = NULL; + +- out: ++out: + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + return psdev; + } +@@ -321,10 +321,10 @@ static int __devinit pcistub_init_device(struct pci_dev *dev) + + return 0; + +- config_release: ++config_release: + pciback_config_free_dev(dev); + +- out: ++out: + pci_set_drvdata(dev, NULL); + kfree(dev_data); + return err; +@@ -443,7 +443,7 @@ static int __devinit pcistub_probe(struct pci_dev *dev, + /* Didn't find the device */ + err = -ENODEV; + +- out: ++out: + return err; + } + +@@ -511,26 +511,24 @@ static void kill_domain_by_device(struct pcistub_device *psdev) + int err; + char nodename[1024]; + +- if (!psdev) ++ if (!psdev) + dev_err(&psdev->dev->dev, + "device is NULL when do AER recovery/kill_domain\n"); +- sprintf(nodename, "/local/domain/0/backend/pci/%d/0", ++ sprintf(nodename, "/local/domain/0/backend/pci/%d/0", + psdev->pdev->xdev->otherend_id); + nodename[strlen(nodename)] = '\0'; + + again: + err = xenbus_transaction_start(&xbt); +- if (err) +- { ++ if (err) { + dev_err(&psdev->dev->dev, + "error %d when start xenbus transaction\n", err); + return; + } + /*PV AER handlers will set this flag*/ +- xenbus_printf(xbt, nodename, "aerState" , "aerfail" ); ++ xenbus_printf(xbt, nodename, "aerState" , "aerfail"); + err = xenbus_transaction_end(xbt, 0); +- if (err) +- { ++ if (err) { + if (err == -EAGAIN) + goto again; + dev_err(&psdev->dev->dev, +@@ -541,9 +539,9 @@ again: + + /* For each aer recovery step error_detected, mmio_enabled, etc, front_end and + * backend need to have cooperation. In pciback, those steps will do similar +- * jobs: send service request and waiting for front_end response. ++ * jobs: send service request and waiting for front_end response. + */ +-static pci_ers_result_t common_process(struct pcistub_device *psdev, ++static pci_ers_result_t common_process(struct pcistub_device *psdev, + pci_channel_state_t state, int aer_cmd, pci_ers_result_t result) + { + pci_ers_result_t res = result; +@@ -561,12 +559,12 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, + if (!ret) { + dev_err(&psdev->dev->dev, + "pciback: failed to get pcifront device\n"); +- return PCI_ERS_RESULT_NONE; ++ return PCI_ERS_RESULT_NONE; + } + wmb(); + +- dev_dbg(&psdev->dev->dev, +- "pciback: aer_op %x dom %x bus %x devfn %x\n", ++ dev_dbg(&psdev->dev->dev, ++ "pciback: aer_op %x dom %x bus %x devfn %x\n", + aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn); + /*local flag to mark there's aer request, pciback callback will use this + * flag to judge whether we need to check pci-front give aer service +@@ -575,21 +573,21 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, + set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); + + /*It is possible that a pcifront conf_read_write ops request invokes +- * the callback which cause the spurious execution of wake_up. ++ * the callback which cause the spurious execution of wake_up. + * Yet it is harmless and better than a spinlock here + */ +- set_bit(_XEN_PCIB_active, ++ set_bit(_XEN_PCIB_active, + (unsigned long *)&psdev->pdev->sh_info->flags); + wmb(); + notify_remote_via_irq(psdev->pdev->evtchn_irq); + + ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active, +- (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ); ++ (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ); + + if (!ret) { +- if (test_bit(_XEN_PCIB_active, ++ if (test_bit(_XEN_PCIB_active, + (unsigned long *)&psdev->pdev->sh_info->flags)) { +- dev_err(&psdev->dev->dev, ++ dev_err(&psdev->dev->dev, + "pcifront aer process not responding!\n"); + clear_bit(_XEN_PCIB_active, + (unsigned long *)&psdev->pdev->sh_info->flags); +@@ -599,16 +597,16 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, + } + clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); + +- if ( test_bit( _XEN_PCIF_active, +- (unsigned long*)&psdev->pdev->sh_info->flags)) { +- dev_dbg(&psdev->dev->dev, ++ if (test_bit(_XEN_PCIF_active, ++ (unsigned long *)&psdev->pdev->sh_info->flags)) { ++ dev_dbg(&psdev->dev->dev, + "schedule pci_conf service in pciback \n"); + test_and_schedule_op(psdev->pdev); + } + + res = (pci_ers_result_t)aer_op->err; + return res; +-} ++} + + /* + * pciback_slot_reset: it will send the slot_reset request to pcifront in case +@@ -632,24 +630,22 @@ static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev) + PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + +- if ( !psdev || !psdev->pdev ) +- { +- dev_err(&dev->dev, ++ if (!psdev || !psdev->pdev) { ++ dev_err(&dev->dev, + "pciback device is not found/assigned\n"); + goto end; + } + +- if ( !psdev->pdev->sh_info ) +- { ++ if (!psdev->pdev->sh_info) { + dev_err(&dev->dev, "pciback device is not connected or owned" + " by HVM, kill it\n"); + kill_domain_by_device(psdev); + goto release; + } + +- if ( !test_bit(_XEN_PCIB_AERHANDLER, +- (unsigned long *)&psdev->pdev->sh_info->flags) ) { +- dev_err(&dev->dev, ++ if (!test_bit(_XEN_PCIB_AERHANDLER, ++ (unsigned long *)&psdev->pdev->sh_info->flags)) { ++ dev_err(&dev->dev, + "guest with no AER driver should have been killed\n"); + goto release; + } +@@ -657,7 +653,7 @@ static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev) + + if (result == PCI_ERS_RESULT_NONE || + result == PCI_ERS_RESULT_DISCONNECT) { +- dev_dbg(&dev->dev, ++ dev_dbg(&dev->dev, + "No AER slot_reset service or disconnected!\n"); + kill_domain_by_device(psdev); + } +@@ -670,9 +666,9 @@ end: + } + + +-/*pciback_mmio_enabled: it will send the mmio_enabled request to pcifront +-* in case of the device driver could provide this service, and then wait +-* for pcifront ack. ++/*pciback_mmio_enabled: it will send the mmio_enabled request to pcifront ++* in case of the device driver could provide this service, and then wait ++* for pcifront ack + * @dev: pointer to PCI devices + * return value is used by aer_core do_recovery policy + */ +@@ -692,24 +688,22 @@ static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev) + PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + +- if ( !psdev || !psdev->pdev ) +- { +- dev_err(&dev->dev, ++ if (!psdev || !psdev->pdev) { ++ dev_err(&dev->dev, + "pciback device is not found/assigned\n"); + goto end; + } + +- if ( !psdev->pdev->sh_info ) +- { ++ if (!psdev->pdev->sh_info) { + dev_err(&dev->dev, "pciback device is not connected or owned" + " by HVM, kill it\n"); + kill_domain_by_device(psdev); + goto release; + } + +- if ( !test_bit(_XEN_PCIB_AERHANDLER, +- (unsigned long *)&psdev->pdev->sh_info->flags) ) { +- dev_err(&dev->dev, ++ if (!test_bit(_XEN_PCIB_AERHANDLER, ++ (unsigned long *)&psdev->pdev->sh_info->flags)) { ++ dev_err(&dev->dev, + "guest with no AER driver should have been killed\n"); + goto release; + } +@@ -717,7 +711,7 @@ static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev) + + if (result == PCI_ERS_RESULT_NONE || + result == PCI_ERS_RESULT_DISCONNECT) { +- dev_dbg(&dev->dev, ++ dev_dbg(&dev->dev, + "No AER mmio_enabled service or disconnected!\n"); + kill_domain_by_device(psdev); + } +@@ -728,8 +722,8 @@ end: + return result; + } + +-/*pciback_error_detected: it will send the error_detected request to pcifront +-* in case of the device driver could provide this service, and then wait ++/*pciback_error_detected: it will send the error_detected request to pcifront ++* in case of the device driver could provide this service, and then wait + * for pcifront ack. + * @dev: pointer to PCI devices + * @error: the current PCI connection state +@@ -752,15 +746,13 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev, + PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + +- if ( !psdev || !psdev->pdev ) +- { +- dev_err(&dev->dev, ++ if (!psdev || !psdev->pdev) { ++ dev_err(&dev->dev, + "pciback device is not found/assigned\n"); + goto end; + } + +- if ( !psdev->pdev->sh_info ) +- { ++ if (!psdev->pdev->sh_info) { + dev_err(&dev->dev, "pciback device is not connected or owned" + " by HVM, kill it\n"); + kill_domain_by_device(psdev); +@@ -768,8 +760,8 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev, + } + + /*Guest owns the device yet no aer handler regiested, kill guest*/ +- if ( !test_bit(_XEN_PCIB_AERHANDLER, +- (unsigned long *)&psdev->pdev->sh_info->flags) ) { ++ if (!test_bit(_XEN_PCIB_AERHANDLER, ++ (unsigned long *)&psdev->pdev->sh_info->flags)) { + dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n"); + kill_domain_by_device(psdev); + goto release; +@@ -778,7 +770,7 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev, + + if (result == PCI_ERS_RESULT_NONE || + result == PCI_ERS_RESULT_DISCONNECT) { +- dev_dbg(&dev->dev, ++ dev_dbg(&dev->dev, + "No AER error_detected service or disconnected!\n"); + kill_domain_by_device(psdev); + } +@@ -789,8 +781,8 @@ end: + return result; + } + +-/*pciback_error_resume: it will send the error_resume request to pcifront +-* in case of the device driver could provide this service, and then wait ++/*pciback_error_resume: it will send the error_resume request to pcifront ++* in case of the device driver could provide this service, and then wait + * for pcifront ack. + * @dev: pointer to PCI devices + */ +@@ -808,29 +800,28 @@ static void pciback_error_resume(struct pci_dev *dev) + PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + +- if ( !psdev || !psdev->pdev ) +- { +- dev_err(&dev->dev, ++ if (!psdev || !psdev->pdev) { ++ dev_err(&dev->dev, + "pciback device is not found/assigned\n"); + goto end; + } + +- if ( !psdev->pdev->sh_info ) +- { ++ if (!psdev->pdev->sh_info) { + dev_err(&dev->dev, "pciback device is not connected or owned" + " by HVM, kill it\n"); + kill_domain_by_device(psdev); + goto release; + } + +- if ( !test_bit(_XEN_PCIB_AERHANDLER, +- (unsigned long *)&psdev->pdev->sh_info->flags) ) { +- dev_err(&dev->dev, ++ if (!test_bit(_XEN_PCIB_AERHANDLER, ++ (unsigned long *)&psdev->pdev->sh_info->flags)) { ++ dev_err(&dev->dev, + "guest with no AER driver should have been killed\n"); + kill_domain_by_device(psdev); + goto release; + } +- common_process(psdev, 1, XEN_PCI_OP_aer_resume, PCI_ERS_RESULT_RECOVERED); ++ common_process(psdev, 1, XEN_PCI_OP_aer_resume, ++ PCI_ERS_RESULT_RECOVERED); + release: + pcistub_device_put(psdev); + end: +@@ -923,8 +914,8 @@ static int pcistub_device_id_remove(int domain, int bus, int slot, int func) + unsigned long flags; + + spin_lock_irqsave(&device_ids_lock, flags); +- list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) { +- ++ list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, ++ slot_list) { + if (pci_dev_id->domain == domain + && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) { + /* Don't break; here because it's possible the same +@@ -976,7 +967,7 @@ static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, + err = pciback_config_quirks_add_field(dev, field); + if (err) + kfree(field); +- out: ++out: + return err; + } + +@@ -992,7 +983,7 @@ static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf, + + err = pcistub_device_id_add(domain, bus, slot, func); + +- out: ++out: + if (!err) + err = count; + return err; +@@ -1012,7 +1003,7 @@ static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf, + + err = pcistub_device_id_remove(domain, bus, slot, func); + +- out: ++out: + if (!err) + err = count; + return err; +@@ -1057,7 +1048,7 @@ static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf, + + err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask); + +- out: ++out: + if (!err) + err = count; + return err; +@@ -1067,7 +1058,6 @@ static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf) + { + int count = 0; + unsigned long flags; +- extern struct list_head pciback_quirks; + struct pciback_config_quirk *quirk; + struct pciback_dev_data *dev_data; + const struct config_field *field; +@@ -1096,12 +1086,13 @@ static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf) + + count += scnprintf(buf + count, PAGE_SIZE - count, + "\t\t%08x:%01x:%08x\n", +- cfg_entry->base_offset + field->offset, +- field->size, field->mask); ++ cfg_entry->base_offset + ++ field->offset, field->size, ++ field->mask); + } + } + +- out: ++out: + spin_unlock_irqrestore(&device_ids_lock, flags); + + return count; +@@ -1137,14 +1128,14 @@ static ssize_t permissive_add(struct device_driver *drv, const char *buf, + if (!dev_data->permissive) { + dev_data->permissive = 1; + /* Let user know that what they're doing could be unsafe */ +- dev_warn(&psdev->dev->dev, +- "enabling permissive mode configuration space accesses!\n"); ++ dev_warn(&psdev->dev->dev, "enabling permissive mode " ++ "configuration space accesses!\n"); + dev_warn(&psdev->dev->dev, + "permissive mode is potentially unsafe!\n"); + } +- release: ++release: + pcistub_device_put(psdev); +- out: ++out: + if (!err) + err = count; + return err; +@@ -1264,10 +1255,10 @@ static int __init pcistub_init(void) + if (err) + pcistub_exit(); + +- out: ++out: + return err; + +- parse_error: ++parse_error: + printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n", + pci_devs_to_hide + pos); + return -EINVAL; +@@ -1276,7 +1267,7 @@ static int __init pcistub_init(void) + #ifndef MODULE + /* + * fs_initcall happens before device_initcall +- * so pciback *should* get called first (b/c we ++ * so pciback *should* get called first (b/c we + * want to suck up any device before other drivers + * get a chance by being the first pci device + * driver to register) +diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h +index 5e8e14e..98e2912 100644 +--- a/drivers/xen/pciback/pciback.h ++++ b/drivers/xen/pciback/pciback.h +@@ -49,6 +49,12 @@ struct pciback_dev_data { + int warned_on_write; + }; + ++/* Used by XenBus and pciback_ops.c */ ++extern wait_queue_head_t aer_wait_queue; ++extern struct workqueue_struct *pciback_wq; ++/* Used by pcistub.c and conf_space_quirks.c */ ++extern struct list_head pciback_quirks; ++ + /* Get/Put PCI Devices that are hidden from the PCI Backend Domain */ + struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev, + int domain, int bus, +@@ -67,14 +73,14 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev); + void pciback_config_reset_dev(struct pci_dev *dev); + void pciback_config_free_dev(struct pci_dev *dev); + int pciback_config_read(struct pci_dev *dev, int offset, int size, +- u32 * ret_val); ++ u32 *ret_val); + int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value); + + /* Handle requests for specific devices from the frontend */ + typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn, unsigned int devid); +-typedef int (*publish_pci_root_cb) (struct pciback_device * pdev, ++typedef int (*publish_pci_root_cb) (struct pciback_device *pdev, + unsigned int domain, unsigned int bus); + int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, + int devid, publish_pci_dev_cb publish_cb); +@@ -83,15 +89,17 @@ struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn); + +-/** ++/** + * Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback +-* before sending aer request to pcifront, so that guest could identify ++* before sending aer request to pcifront, so that guest could identify + * device, coopearte with pciback to finish aer recovery job if device driver + * has the capability + */ + +-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, +- unsigned int *domain, unsigned int *bus, unsigned int *devfn); ++int pciback_get_pcifront_dev(struct pci_dev *pcidev, ++ struct pciback_device *pdev, ++ unsigned int *domain, unsigned int *bus, ++ unsigned int *devfn); + int pciback_init_devices(struct pciback_device *pdev); + int pciback_publish_pci_roots(struct pciback_device *pdev, + publish_pci_root_cb cb); +@@ -106,17 +114,17 @@ void pciback_xenbus_unregister(void); + + #ifdef CONFIG_PCI_MSI + int pciback_enable_msi(struct pciback_device *pdev, +- struct pci_dev *dev, struct xen_pci_op *op); ++ struct pci_dev *dev, struct xen_pci_op *op); + + int pciback_disable_msi(struct pciback_device *pdev, +- struct pci_dev *dev, struct xen_pci_op *op); ++ struct pci_dev *dev, struct xen_pci_op *op); + + + int pciback_enable_msix(struct pciback_device *pdev, +- struct pci_dev *dev, struct xen_pci_op *op); ++ struct pci_dev *dev, struct xen_pci_op *op); + + int pciback_disable_msix(struct pciback_device *pdev, +- struct pci_dev *dev, struct xen_pci_op *op); ++ struct pci_dev *dev, struct xen_pci_op *op); + #endif + extern int verbose_request; + +diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c +index 6624faf..bf83dca 100644 +--- a/drivers/xen/pciback/pciback_ops.c ++++ b/drivers/xen/pciback/pciback_ops.c +@@ -5,11 +5,11 @@ + */ + #include + #include +-#include ++#include + #include + #include "pciback.h" + +-int verbose_request = 0; ++int verbose_request; + module_param(verbose_request, int, 0644); + + /* Ensure a device is "turned off" and ready to be exported. +@@ -37,12 +37,10 @@ void pciback_reset_device(struct pci_dev *dev) + } + } + } +-extern wait_queue_head_t aer_wait_queue; +-extern struct workqueue_struct *pciback_wq; + /* + * Now the same evtchn is used for both pcifront conf_read_write request + * as well as pcie aer front end ack. We use a new work_queue to schedule +-* pciback conf_read_write service for avoiding confict with aer_core ++* pciback conf_read_write service for avoiding confict with aer_core + * do_recovery job which also use the system default work_queue + */ + void test_and_schedule_op(struct pciback_device *pdev) +@@ -50,14 +48,13 @@ void test_and_schedule_op(struct pciback_device *pdev) + /* Check that frontend is requesting an operation and that we are not + * already processing a request */ + if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) +- && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) +- { ++ && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { + queue_work(pciback_wq, &pdev->op_work); + } + /*_XEN_PCIB_active should have been cleared by pcifront. And also make + sure pciback is waiting for ack by checking _PCIB_op_pending*/ +- if (!test_bit(_XEN_PCIB_active,(unsigned long *)&pdev->sh_info->flags) +- &&test_bit(_PCIB_op_pending, &pdev->flags)) { ++ if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) ++ && test_bit(_PCIB_op_pending, &pdev->flags)) { + wake_up(&aer_wait_queue); + } + } +@@ -69,7 +66,8 @@ void test_and_schedule_op(struct pciback_device *pdev) + + void pciback_do_op(struct work_struct *data) + { +- struct pciback_device *pdev = container_of(data, struct pciback_device, op_work); ++ struct pciback_device *pdev = ++ container_of(data, struct pciback_device, op_work); + struct pci_dev *dev; + struct xen_pci_op *op = &pdev->sh_info->op; + +@@ -77,38 +75,36 @@ void pciback_do_op(struct work_struct *data) + + if (dev == NULL) + op->err = XEN_PCI_ERR_dev_not_found; +- else +- { +- switch (op->cmd) +- { +- case XEN_PCI_OP_conf_read: +- op->err = pciback_config_read(dev, +- op->offset, op->size, &op->value); +- break; +- case XEN_PCI_OP_conf_write: +- op->err = pciback_config_write(dev, +- op->offset, op->size, op->value); +- break; ++ else { ++ switch (op->cmd) { ++ case XEN_PCI_OP_conf_read: ++ op->err = pciback_config_read(dev, ++ op->offset, op->size, &op->value); ++ break; ++ case XEN_PCI_OP_conf_write: ++ op->err = pciback_config_write(dev, ++ op->offset, op->size, op->value); ++ break; + #ifdef CONFIG_PCI_MSI +- case XEN_PCI_OP_enable_msi: +- op->err = pciback_enable_msi(pdev, dev, op); +- break; +- case XEN_PCI_OP_disable_msi: +- op->err = pciback_disable_msi(pdev, dev, op); +- break; +- case XEN_PCI_OP_enable_msix: +- op->err = pciback_enable_msix(pdev, dev, op); +- break; +- case XEN_PCI_OP_disable_msix: +- op->err = pciback_disable_msix(pdev, dev, op); +- break; ++ case XEN_PCI_OP_enable_msi: ++ op->err = pciback_enable_msi(pdev, dev, op); ++ break; ++ case XEN_PCI_OP_disable_msi: ++ op->err = pciback_disable_msi(pdev, dev, op); ++ break; ++ case XEN_PCI_OP_enable_msix: ++ op->err = pciback_enable_msix(pdev, dev, op); ++ break; ++ case XEN_PCI_OP_disable_msix: ++ op->err = pciback_disable_msix(pdev, dev, op); ++ break; + #endif +- default: +- op->err = XEN_PCI_ERR_not_implemented; +- break; ++ default: ++ op->err = XEN_PCI_ERR_not_implemented; ++ break; + } + } +- /* Tell the driver domain that we're done. */ ++ /* Tell the driver domain that we're done. */ + wmb(); + clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); + notify_remote_via_irq(pdev->evtchn_irq); +@@ -119,7 +115,7 @@ void pciback_do_op(struct work_struct *data) + smp_mb__after_clear_bit(); /* /before/ final check for work */ + + /* Check to see if the driver domain tried to start another request in +- * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. ++ * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. + */ + test_and_schedule_op(pdev); + } +diff --git a/drivers/xen/pciback/slot.c b/drivers/xen/pciback/slot.c +index 105a8b6..efb922d 100644 +--- a/drivers/xen/pciback/slot.c ++++ b/drivers/xen/pciback/slot.c +@@ -65,7 +65,8 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + if (slot_dev->slots[bus][slot] == NULL) { + printk(KERN_INFO +- "pciback: slot: %s: assign to virtual slot %d, bus %d\n", ++ "pciback: slot: %s: assign to virtual " ++ "slot %d, bus %d\n", + pci_name(dev), slot, bus); + slot_dev->slots[bus][slot] = dev; + goto unlock; +@@ -76,14 +77,14 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, + xenbus_dev_fatal(pdev->xdev, err, + "No more space on root virtual PCI bus"); + +- unlock: ++unlock: + spin_unlock_irqrestore(&slot_dev->lock, flags); + + /* Publish this device. */ +- if(!err) ++ if (!err) + err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid); + +- out: ++out: + return err; + } + +@@ -105,7 +106,7 @@ void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) + } + } + +- out: ++out: + spin_unlock_irqrestore(&slot_dev->lock, flags); + + if (found_dev) +@@ -156,8 +157,10 @@ void pciback_release_devices(struct pciback_device *pdev) + pdev->pci_dev_data = NULL; + } + +-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, +- unsigned int *domain, unsigned int *bus, unsigned int *devfn) ++int pciback_get_pcifront_dev(struct pci_dev *pcidev, ++ struct pciback_device *pdev, ++ unsigned int *domain, unsigned int *bus, ++ unsigned int *devfn) + { + int slot, busnr; + struct slot_dev_data *slot_dev = pdev->pci_dev_data; +@@ -172,11 +175,12 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev + dev = slot_dev->slots[busnr][slot]; + if (dev && dev->bus->number == pcidev->bus->number + && dev->devfn == pcidev->devfn +- && pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)) { ++ && pci_domain_nr(dev->bus) == ++ pci_domain_nr(pcidev->bus)) { + found = 1; + *domain = 0; + *bus = busnr; +- *devfn = PCI_DEVFN(slot,0); ++ *devfn = PCI_DEVFN(slot, 0); + goto out; + } + } +diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c +index a5b7ece..721b81b 100644 +--- a/drivers/xen/pciback/vpci.c ++++ b/drivers/xen/pciback/vpci.c +@@ -125,14 +125,14 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, + xenbus_dev_fatal(pdev->xdev, err, + "No more space on root virtual PCI bus"); + +- unlock: ++unlock: + spin_unlock_irqrestore(&vpci_dev->lock, flags); + + /* Publish this device. */ +- if(!err) ++ if (!err) + err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid); + +- out: ++out: + return err; + } + +@@ -158,7 +158,7 @@ void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) + } + } + +- out: ++out: + spin_unlock_irqrestore(&vpci_dev->lock, flags); + + if (found_dev) +@@ -176,9 +176,8 @@ int pciback_init_devices(struct pciback_device *pdev) + + spin_lock_init(&vpci_dev->lock); + +- for (slot = 0; slot < PCI_SLOT_MAX; slot++) { ++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) + INIT_LIST_HEAD(&vpci_dev->dev_list[slot]); +- } + + pdev->pci_dev_data = vpci_dev; + +@@ -211,8 +210,10 @@ void pciback_release_devices(struct pciback_device *pdev) + pdev->pci_dev_data = NULL; + } + +-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, +- unsigned int *domain, unsigned int *bus, unsigned int *devfn) ++int pciback_get_pcifront_dev(struct pci_dev *pcidev, ++ struct pciback_device *pdev, ++ unsigned int *domain, unsigned int *bus, ++ unsigned int *devfn) + { + struct pci_dev_entry *entry; + struct pci_dev *dev = NULL; +@@ -227,15 +228,16 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev + list) { + dev = entry->dev; + if (dev && dev->bus->number == pcidev->bus->number +- && pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus) +- && dev->devfn == pcidev->devfn) +- { ++ && pci_domain_nr(dev->bus) == ++ pci_domain_nr(pcidev->bus) ++ && dev->devfn == pcidev->devfn) { + found = 1; + *domain = 0; + *bus = 0; +- *devfn = PCI_DEVFN(slot, PCI_FUNC(pcidev->devfn)); ++ *devfn = PCI_DEVFN(slot, ++ PCI_FUNC(pcidev->devfn)); + } +- } ++ } + } + spin_unlock_irqrestore(&vpci_dev->lock, flags); + return found; +diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c +index a85c413..efec585 100644 +--- a/drivers/xen/pciback/xenbus.c ++++ b/drivers/xen/pciback/xenbus.c +@@ -40,7 +40,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev) + kfree(pdev); + pdev = NULL; + } +- out: ++out: + return pdev; + } + +@@ -111,7 +111,7 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref, + err = 0; + + dev_dbg(&pdev->xdev->dev, "Attached!\n"); +- out: ++out: + return err; + } + +@@ -166,11 +166,10 @@ static int pciback_attach(struct pciback_device *pdev) + "Error switching to connected state!"); + + dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); +- out: ++out: + spin_unlock(&pdev->dev_lock); + +- if (magic) +- kfree(magic); ++ kfree(magic); + + return err; + } +@@ -193,7 +192,7 @@ static int pciback_publish_pci_dev(struct pciback_device *pdev, + "%04x:%02x:%02x.%02x", domain, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); + +- out: ++out: + return err; + } + +@@ -230,7 +229,7 @@ static int pciback_export_device(struct pciback_device *pdev, + * to other driver domains (as he who controls the bridge can disable + * it and stop the other devices from working). + */ +- out: ++out: + return err; + } + +@@ -253,8 +252,8 @@ static int pciback_remove_device(struct pciback_device *pdev, + } + + pciback_release_pci_dev(pdev, dev); +- +- out: ++ ++out: + return err; + } + +@@ -314,7 +313,7 @@ static int pciback_publish_pci_root(struct pciback_device *pdev, + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, + "root_num", "%d", (root_num + 1)); + +- out: ++out: + return err; + } + +@@ -358,7 +357,7 @@ static int pciback_reconfigure(struct pciback_device *pdev) + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str, + "%d", &substate); +- if (err != 1) ++ if (err != 1) + substate = XenbusStateUnknown; + + switch (substate) { +@@ -389,14 +388,15 @@ static int pciback_reconfigure(struct pciback_device *pdev) + "configuration"); + goto out; + } +- ++ + err = pciback_export_device(pdev, domain, bus, slot, + func, i); + if (err) + goto out; + + /* Publish pci roots. */ +- err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root); ++ err = pciback_publish_pci_roots(pdev, ++ pciback_publish_pci_root); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error while publish PCI root" +@@ -412,7 +412,7 @@ static int pciback_reconfigure(struct pciback_device *pdev) + "Error switching substate of " + "dev-%d\n", i); + goto out; +- } ++ } + break; + + case XenbusStateClosing: +@@ -445,7 +445,7 @@ static int pciback_reconfigure(struct pciback_device *pdev) + + err = pciback_remove_device(pdev, domain, bus, slot, + func); +- if(err) ++ if (err) + goto out; + + /* TODO: If at some point we implement support for pci +@@ -466,8 +466,8 @@ static int pciback_reconfigure(struct pciback_device *pdev) + "Error switching to reconfigured state!"); + goto out; + } +- +- out: ++ ++out: + spin_unlock(&pdev->dev_lock); + + return 0; +@@ -591,7 +591,7 @@ static int pciback_setup_backend(struct pciback_device *pdev) + xenbus_dev_fatal(pdev->xdev, err, "Error switching " + "substate of dev-%d\n", i); + goto out; +- } ++ } + } + + err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root); +@@ -607,7 +607,7 @@ static int pciback_setup_backend(struct pciback_device *pdev) + xenbus_dev_fatal(pdev->xdev, err, + "Error switching to initialised state!"); + +- out: ++out: + spin_unlock(&pdev->dev_lock); + + if (!err) +@@ -663,7 +663,7 @@ static int pciback_xenbus_probe(struct xenbus_device *dev, + */ + pciback_be_watch(&pdev->be_watch, NULL, 0); + +- out: ++out: + return err; + } + +@@ -679,7 +679,7 @@ static int pciback_xenbus_remove(struct xenbus_device *dev) + + static const struct xenbus_device_id xenpci_ids[] = { + {"pci"}, +- {{0}}, ++ {""}, + }; + + static struct xenbus_driver xenbus_pciback_driver = { +-- +1.7.3.4 + + +From ca1ee0c25b425d9739b1a24cf911de2e041a2514 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 8 Mar 2010 18:39:15 -0500 +Subject: [PATCH 017/139] xen-pciback: remove driver_data direct access to struct device + +The driver core is going to not allow direct access to the +driver_data pointer in struct device. Instead, the functions +dev_get_drvdata() and dev_set_drvdata() should be used. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/pciback/xenbus.c | 8 ++++---- + 1 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c +index efec585..af6c25a 100644 +--- a/drivers/xen/pciback/xenbus.c ++++ b/drivers/xen/pciback/xenbus.c +@@ -26,7 +26,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev) + dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); + + pdev->xdev = xdev; +- xdev->dev.driver_data = pdev; ++ dev_set_drvdata(&xdev->dev, pdev); + + spin_lock_init(&pdev->dev_lock); + +@@ -75,7 +75,7 @@ static void free_pdev(struct pciback_device *pdev) + + pciback_release_devices(pdev); + +- pdev->xdev->dev.driver_data = NULL; ++ dev_set_drvdata(&pdev->xdev->dev, NULL); + pdev->xdev = NULL; + + kfree(pdev); +@@ -476,7 +476,7 @@ out: + static void pciback_frontend_changed(struct xenbus_device *xdev, + enum xenbus_state fe_state) + { +- struct pciback_device *pdev = xdev->dev.driver_data; ++ struct pciback_device *pdev = dev_get_drvdata(&xdev->dev); + + dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state); + +@@ -669,7 +669,7 @@ out: + + static int pciback_xenbus_remove(struct xenbus_device *dev) + { +- struct pciback_device *pdev = dev->dev.driver_data; ++ struct pciback_device *pdev = dev_get_drvdata(&dev->dev); + + if (pdev != NULL) + free_pdev(pdev); +-- +1.7.3.4 + + +From 585f088e6aec3e4514ac2563852961f71c74e47e Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 8 Mar 2010 18:47:55 -0500 +Subject: [PATCH 018/139] xen-pciback: Fix compile error: 'TASK_NORMAL' undeclared. + +Both files were missing the #include + +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/pciback/pci_stub.c | 1 + + drivers/xen/pciback/pciback_ops.c | 1 + + 2 files changed, 2 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c +index 28222ee..6fc0b6e 100644 +--- a/drivers/xen/pciback/pci_stub.c ++++ b/drivers/xen/pciback/pci_stub.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + #include + #include + #include +diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c +index bf83dca..2b9a93e 100644 +--- a/drivers/xen/pciback/pciback_ops.c ++++ b/drivers/xen/pciback/pciback_ops.c +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include + #include "pciback.h" + + int verbose_request; +-- +1.7.3.4 + + +From 03dd111c81bad8e69cdb8b5d67381702adb24593 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Wed, 9 Dec 2009 17:43:16 -0500 +Subject: [PATCH 019/139] xen-pciback: Remove the vestiges of CONFIG_PCI_GUESTDEV. + +The same functionality for this (that used to be called +pci_is_guestdev) is now via: "pci=resource_alignment=" +command line argument. + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/pci_stub.c | 10 ---------- + 1 files changed, 0 insertions(+), 10 deletions(-) + +diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c +index 6fc0b6e..d30aa7c 100644 +--- a/drivers/xen/pciback/pci_stub.c ++++ b/drivers/xen/pciback/pci_stub.c +@@ -430,16 +430,6 @@ static int __devinit pcistub_probe(struct pci_dev *dev, + + dev_info(&dev->dev, "seizing device\n"); + err = pcistub_seize(dev); +-#ifdef CONFIG_PCI_GUESTDEV +- } else if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { +- if (!pci_is_guestdev(dev)) { +- err = -ENODEV; +- goto out; +- } +- +- dev_info(&dev->dev, "seizing device\n"); +- err = pcistub_seize(dev); +-#endif /* CONFIG_PCI_GUESTDEV */ + } else + /* Didn't find the device */ + err = -ENODEV; +-- +1.7.3.4 + + +From 30acb3491495a43b59a64612ad92a7a290c59e82 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Wed, 9 Dec 2009 17:43:17 -0500 +Subject: [PATCH 020/139] xen-pciback: Remove deprecated routine to find domain owner of PCI device. + +In linux-2.6.18.hg tree the mechanism to find the domain owner was +for the MSI driver (msi-xen.c) to call in this function to retrieve +the domain number. This is not the way anymore. + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/pci_stub.c | 19 ------------------- + 1 files changed, 0 insertions(+), 19 deletions(-) + +diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c +index d30aa7c..30e7b59 100644 +--- a/drivers/xen/pciback/pci_stub.c ++++ b/drivers/xen/pciback/pci_stub.c +@@ -1157,22 +1157,6 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf) + + DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add); + +-#ifdef CONFIG_PCI_MSI +- +-int pciback_get_owner(struct pci_dev *dev) +-{ +- struct pcistub_device *psdev; +- +- psdev = pcistub_device_find(pci_domain_nr(dev->bus), dev->bus->number, +- PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); +- +- if (!psdev || !psdev->pdev) +- return -1; +- +- return psdev->pdev->xdev->otherend_id; +-} +-#endif +- + static void pcistub_exit(void) + { + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot); +@@ -1183,7 +1167,6 @@ static void pcistub_exit(void) + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive); + + pci_unregister_driver(&pciback_pci_driver); +- WARN_ON(unregister_msi_get_owner(pciback_get_owner)); + } + + static int __init pcistub_init(void) +@@ -1241,8 +1224,6 @@ static int __init pcistub_init(void) + err = driver_create_file(&pciback_pci_driver.driver, + &driver_attr_permissive); + +- if (!err) +- err = register_msi_get_owner(pciback_get_owner); + if (err) + pcistub_exit(); + +-- +1.7.3.4 + + +From da36c7662d9738ce44c37b4f1f41c045c64d6914 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Tue, 22 Dec 2009 13:53:41 -0500 +Subject: [PATCH 021/139] xen-pciback: Fix compiler warning in pci_stub.c. + +warning: the frame size of 1036 bytes is larger than 1024 bytes + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/pci_stub.c | 5 +++-- + 1 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c +index 30e7b59..0b5a16b 100644 +--- a/drivers/xen/pciback/pci_stub.c ++++ b/drivers/xen/pciback/pci_stub.c +@@ -496,16 +496,17 @@ static const struct pci_device_id pcistub_ids[] = { + {0,}, + }; + ++#define PCI_NODENAME_MAX 40 + static void kill_domain_by_device(struct pcistub_device *psdev) + { + struct xenbus_transaction xbt; + int err; +- char nodename[1024]; ++ char nodename[PCI_NODENAME_MAX]; + + if (!psdev) + dev_err(&psdev->dev->dev, + "device is NULL when do AER recovery/kill_domain\n"); +- sprintf(nodename, "/local/domain/0/backend/pci/%d/0", ++ snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0", + psdev->pdev->xdev->otherend_id); + nodename[strlen(nodename)] = '\0'; + +-- +1.7.3.4 + + +From 83484f34b2cc42807c71514fbabbd40e281ec094 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Tue, 22 Dec 2009 13:53:42 -0500 +Subject: [PATCH 022/139] xen-pciback: Fix compile warning in vpci.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +warning: ‘func’ may be used uninitialized in this function + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/vpci.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c +index 721b81b..2857ab8 100644 +--- a/drivers/xen/pciback/vpci.c ++++ b/drivers/xen/pciback/vpci.c +@@ -65,7 +65,7 @@ static inline int match_slot(struct pci_dev *l, struct pci_dev *r) + int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, + int devid, publish_pci_dev_cb publish_cb) + { +- int err = 0, slot, func; ++ int err = 0, slot, func = -1; + struct pci_dev_entry *t, *dev_entry; + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; + unsigned long flags; +-- +1.7.3.4 + + +From 5612e6358835700c49d8be5671823614ace30c94 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Thu, 3 Dec 2009 21:56:20 +0000 +Subject: [PATCH 023/139] xen: rename pciback module to xen-pciback. + +pciback is rather generic for a modular distro style kernel. + +Signed-off-by: Ian Campbell +Cc: Jeremy Fitzhardinge +Cc: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/Makefile | 24 ++++++++++++------------ + 1 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/drivers/xen/pciback/Makefile b/drivers/xen/pciback/Makefile +index 106dae7..38bc123 100644 +--- a/drivers/xen/pciback/Makefile ++++ b/drivers/xen/pciback/Makefile +@@ -1,16 +1,16 @@ +-obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o ++obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o + +-pciback-y := pci_stub.o pciback_ops.o xenbus.o +-pciback-y += conf_space.o conf_space_header.o \ +- conf_space_capability.o \ +- conf_space_capability_vpd.o \ +- conf_space_capability_pm.o \ +- conf_space_quirks.o +-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o +-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o +-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o +-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o +-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o ++xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o ++xen-pciback-y += conf_space.o conf_space_header.o \ ++ conf_space_capability.o \ ++ conf_space_capability_vpd.o \ ++ conf_space_capability_pm.o \ ++ conf_space_quirks.o ++xen-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o ++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o ++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o ++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o ++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o + + ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y) + EXTRA_CFLAGS += -DDEBUG +-- +1.7.3.4 + + +From 245a9ec5ef1f9c8a6bc6b5c0ac1bb616c3c8c979 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Wed, 9 Dec 2009 17:43:15 -0500 +Subject: [PATCH 024/139] xen-pciback: Register the owner (domain) of the PCI device. + +When the front-end and back-end start negotiating we register +the domain that will use the PCI device. Furthermore during shutdown +of guest or unbinding of the PCI device (and unloading of module) +from pciback we unregister the domain owner. + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/pciback/pci_stub.c | 2 ++ + drivers/xen/pciback/xenbus.c | 13 +++++++++++++ + 2 files changed, 15 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c +index 0b5a16b..02178e2 100644 +--- a/drivers/xen/pciback/pci_stub.c ++++ b/drivers/xen/pciback/pci_stub.c +@@ -90,6 +90,8 @@ static void pcistub_device_release(struct kref *kref) + + dev_dbg(&psdev->dev->dev, "pcistub_device_release\n"); + ++ xen_unregister_device_domain_owner(psdev->dev); ++ + /* Clean-up the device */ + pciback_reset_device(psdev->dev); + pciback_config_free_dyn_fields(psdev->dev); +diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c +index af6c25a..d448bf5 100644 +--- a/drivers/xen/pciback/xenbus.c ++++ b/drivers/xen/pciback/xenbus.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + #include + #include "pciback.h" + +@@ -221,6 +222,15 @@ static int pciback_export_device(struct pciback_device *pdev, + if (err) + goto out; + ++ dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id); ++ if (xen_register_device_domain_owner(dev, ++ pdev->xdev->otherend_id) != 0) { ++ dev_err(&dev->dev, "device has been assigned to another " \ ++ "domain! Over-writting the ownership, but beware.\n"); ++ xen_unregister_device_domain_owner(dev); ++ xen_register_device_domain_owner(dev, pdev->xdev->otherend_id); ++ } ++ + /* TODO: It'd be nice to export a bridge and have all of its children + * get exported with it. This may be best done in xend (which will + * have to calculate resource usage anyway) but we probably want to +@@ -251,6 +261,9 @@ static int pciback_remove_device(struct pciback_device *pdev, + goto out; + } + ++ dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id); ++ xen_unregister_device_domain_owner(dev); ++ + pciback_release_pci_dev(pdev, dev); + + out: +-- +1.7.3.4 + + +From cb6c976606d16119e8608c8bcc1ef9265881dd7f Mon Sep 17 00:00:00 2001 +From: Zhao, Yu +Date: Wed, 3 Mar 2010 13:27:55 -0500 +Subject: [PATCH 025/139] xen-pciback: guest SR-IOV support for PV guest + +These changes are for PV guest to use Virtual Function. Because the VF's +vendor, device registers in cfg space are 0xffff, which are invalid and +ignored by PCI device scan. Values in 'struct pci_dev' are fixed up by +SR-IOV code, and using these values will present correct VID and DID to +PV guest kernel. + +And command registers in the cfg space are read only 0, which means we +have to emulate MMIO enable bit (VF only uses MMIO resource) so PV +kernel can work properly. + +Acked-by: jbeulich@novell.com + +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/pciback/conf_space_header.c | 71 ++++++++++++++++++++++++++++-- + 1 files changed, 66 insertions(+), 5 deletions(-) + +diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c +index 3ae7da1..1f4f86e 100644 +--- a/drivers/xen/pciback/conf_space_header.c ++++ b/drivers/xen/pciback/conf_space_header.c +@@ -18,6 +18,25 @@ struct pci_bar_info { + #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) + #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) + ++static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) ++{ ++ int i; ++ int ret; ++ ++ ret = pciback_read_config_word(dev, offset, value, data); ++ if (!atomic_read(&dev->enable_cnt)) ++ return ret; ++ ++ for (i = 0; i < PCI_ROM_RESOURCE; i++) { ++ if (dev->resource[i].flags & IORESOURCE_IO) ++ *value |= PCI_COMMAND_IO; ++ if (dev->resource[i].flags & IORESOURCE_MEM) ++ *value |= PCI_COMMAND_MEMORY; ++ } ++ ++ return ret; ++} ++ + static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) + { + int err; +@@ -142,10 +161,26 @@ static inline void read_dev_bar(struct pci_dev *dev, + struct pci_bar_info *bar_info, int offset, + u32 len_mask) + { +- pci_read_config_dword(dev, offset, &bar_info->val); +- pci_write_config_dword(dev, offset, len_mask); +- pci_read_config_dword(dev, offset, &bar_info->len_val); +- pci_write_config_dword(dev, offset, bar_info->val); ++ int pos; ++ struct resource *res = dev->resource; ++ ++ if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1) ++ pos = PCI_ROM_RESOURCE; ++ else { ++ pos = (offset - PCI_BASE_ADDRESS_0) / 4; ++ if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE | ++ PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == ++ (PCI_BASE_ADDRESS_SPACE_MEMORY | ++ PCI_BASE_ADDRESS_MEM_TYPE_64))) { ++ bar_info->val = res[pos - 1].start >> 32; ++ bar_info->len_val = res[pos - 1].end >> 32; ++ return; ++ } ++ } ++ ++ bar_info->val = res[pos].start | ++ (res[pos].flags & PCI_REGION_FLAG_MASK); ++ bar_info->len_val = res[pos].end - res[pos].start + 1; + } + + static void *bar_init(struct pci_dev *dev, int offset) +@@ -186,6 +221,22 @@ static void bar_release(struct pci_dev *dev, int offset, void *data) + kfree(data); + } + ++static int pciback_read_vendor(struct pci_dev *dev, int offset, ++ u16 *value, void *data) ++{ ++ *value = dev->vendor; ++ ++ return 0; ++} ++ ++static int pciback_read_device(struct pci_dev *dev, int offset, ++ u16 *value, void *data) ++{ ++ *value = dev->device; ++ ++ return 0; ++} ++ + static int interrupt_read(struct pci_dev *dev, int offset, u8 * value, + void *data) + { +@@ -213,9 +264,19 @@ out: + + static const struct config_field header_common[] = { + { ++ .offset = PCI_VENDOR_ID, ++ .size = 2, ++ .u.w.read = pciback_read_vendor, ++ }, ++ { ++ .offset = PCI_DEVICE_ID, ++ .size = 2, ++ .u.w.read = pciback_read_device, ++ }, ++ { + .offset = PCI_COMMAND, + .size = 2, +- .u.w.read = pciback_read_config_word, ++ .u.w.read = command_read, + .u.w.write = command_write, + }, + { +-- +1.7.3.4 + + +From 1d77305c7900f3b6ec5d403d9aba6f0034b0112e Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Wed, 3 Mar 2010 13:38:43 -0500 +Subject: [PATCH 026/139] xen-pciback: Disable MSI/MSI-X when reseting device + +In cases where the guest is abruptly killed and has not disabled +MSI/MSI-X interrupts we want to do that. + +Otherwise when the guest is started up and enables MSI, we would +get a WARN() that the device already had been enabled. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/pciback/pciback_ops.c | 8 ++++++++ + 1 files changed, 8 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c +index 2b9a93e..011db67 100644 +--- a/drivers/xen/pciback/pciback_ops.c ++++ b/drivers/xen/pciback/pciback_ops.c +@@ -23,6 +23,14 @@ void pciback_reset_device(struct pci_dev *dev) + + /* Disable devices (but not bridges) */ + if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { ++#ifdef CONFIG_PCI_MSI ++ /* The guest could have been abruptly killed without ++ * disabling MSI/MSI-X interrupts.*/ ++ if (dev->msix_enabled) ++ pci_disable_msix(dev); ++ if (dev->msi_enabled) ++ pci_disable_msi(dev); ++#endif + pci_disable_device(dev); + + pci_write_config_word(dev, PCI_COMMAND, 0); +-- +1.7.3.4 + + +From c89edb63b60166fe354493dd465cf5662b2c077d Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 12 Apr 2010 11:46:00 -0400 +Subject: [PATCH 027/139] xen-pciback: Allocate IRQ handler for device that is shared with guest. + +If the pciback module is loaded with fake_irq_handler=1 we install +for all devices that are to be passed to the guest domain a IRQ handler. +The IRQ handler will return IRQ_HANDLED or IRQ_NONE depending on +on the ack_intr flag. + +The trigger to install this IRQ handler is when the enable_isr flag +is set. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/pciback/pci_stub.c | 13 ++++- + drivers/xen/pciback/pciback.h | 12 ++++- + drivers/xen/pciback/pciback_ops.c | 95 ++++++++++++++++++++++++++++++++++++- + 3 files changed, 115 insertions(+), 5 deletions(-) + +diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c +index 02178e2..45bbe99 100644 +--- a/drivers/xen/pciback/pci_stub.c ++++ b/drivers/xen/pciback/pci_stub.c +@@ -21,6 +21,8 @@ + #include "conf_space.h" + #include "conf_space_quirks.h" + ++#define DRV_NAME "pciback" ++ + static char *pci_devs_to_hide; + wait_queue_head_t aer_wait_queue; + /*Add sem for sync AER handling and pciback remove/reconfigue ops, +@@ -290,13 +292,20 @@ static int __devinit pcistub_init_device(struct pci_dev *dev) + * would need to be called somewhere to free the memory allocated + * here and then to call kfree(pci_get_drvdata(psdev->dev)). + */ +- dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC); ++ dev_data = kzalloc(sizeof(*dev_data) + strlen(DRV_NAME "[]") ++ + strlen(pci_name(dev)) + 1, GFP_ATOMIC); + if (!dev_data) { + err = -ENOMEM; + goto out; + } + pci_set_drvdata(dev, dev_data); + ++ /* ++ * Setup name for fake IRQ handler. It will only be enabled ++ * once the device is turned on by the guest. ++ */ ++ sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev)); ++ + dev_dbg(&dev->dev, "initializing config\n"); + + init_waitqueue_head(&aer_wait_queue); +@@ -837,7 +846,7 @@ static struct pci_error_handlers pciback_error_handler = { + */ + + static struct pci_driver pciback_pci_driver = { +- .name = "pciback", ++ .name = DRV_NAME, + .id_table = pcistub_ids, + .probe = pcistub_probe, + .remove = pcistub_remove, +diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h +index 98e2912..9d1b0a6 100644 +--- a/drivers/xen/pciback/pciback.h ++++ b/drivers/xen/pciback/pciback.h +@@ -45,8 +45,13 @@ struct pciback_device { + + struct pciback_dev_data { + struct list_head config_fields; +- int permissive; +- int warned_on_write; ++ unsigned int permissive : 1; ++ unsigned int warned_on_write : 1; ++ unsigned int enable_intx : 1; ++ unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */ ++ unsigned int ack_intr : 1; /* .. and ACK-ing */ ++ unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */ ++ char irq_name[0]; /* pciback[000:04:00.0] */ + }; + + /* Used by XenBus and pciback_ops.c */ +@@ -131,3 +136,6 @@ extern int verbose_request; + void test_and_schedule_op(struct pciback_device *pdev); + #endif + ++/* Handles shared IRQs that can to device domain and control domain. */ ++void pciback_irq_handler(struct pci_dev *dev, int reset); ++irqreturn_t pciback_guest_interrupt(int irq, void *dev_id); +diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c +index 011db67..cb54893 100644 +--- a/drivers/xen/pciback/pciback_ops.c ++++ b/drivers/xen/pciback/pciback_ops.c +@@ -13,6 +13,78 @@ + int verbose_request; + module_param(verbose_request, int, 0644); + ++/* Ensure a device is has the fake IRQ handler "turned on/off" and is ++ * ready to be exported. This MUST be run after pciback_reset_device ++ * which does the actual PCI device enable/disable. ++ */ ++void pciback_control_isr(struct pci_dev *dev, int reset) ++{ ++ struct pciback_dev_data *dev_data; ++ int rc; ++ int enable = 0; ++ ++ dev_data = pci_get_drvdata(dev); ++ if (!dev_data) ++ return; ++ ++ /* We don't deal with bridges */ ++ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ++ return; ++ ++ if (reset) { ++ dev_data->enable_intx = 0; ++ dev_data->ack_intr = 0; ++ } ++ enable = dev_data->enable_intx; ++ ++ /* Asked to disable, but ISR isn't runnig */ ++ if (!enable && !dev_data->isr_on) ++ return; ++ ++ /* Squirrel away the IRQs in the dev_data. We need this ++ * b/c when device transitions to MSI, the dev->irq is ++ * overwritten with the MSI vector. ++ */ ++ if (enable) ++ dev_data->irq = dev->irq; ++ ++ dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", ++ dev_data->irq_name, ++ dev_data->irq, ++ pci_is_enabled(dev) ? "on" : "off", ++ dev->msi_enabled ? "MSI" : "", ++ dev->msix_enabled ? "MSI/X" : "", ++ dev_data->isr_on ? "enable" : "disable", ++ enable ? "enable" : "disable"); ++ ++ if (enable) { ++ rc = request_irq(dev_data->irq, ++ pciback_guest_interrupt, IRQF_SHARED, ++ dev_data->irq_name, dev); ++ if (rc) { ++ dev_err(&dev->dev, "%s: failed to install fake IRQ " \ ++ "handler for IRQ %d! (rc:%d)\n", dev_data->irq_name, ++ dev_data->irq, rc); ++ goto out; ++ } ++ } ++ else { ++ free_irq(dev_data->irq, dev); ++ dev_data->irq = 0; ++ } ++ dev_data->isr_on = enable; ++ dev_data->ack_intr = enable; ++out: ++ dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n", ++ dev_data->irq_name, ++ dev_data->irq, ++ pci_is_enabled(dev) ? "on" : "off", ++ dev->msi_enabled ? "MSI" : "", ++ dev->msix_enabled ? "MSI/X" : "", ++ enable ? (dev_data->isr_on ? "enabled" : "failed to enable") : ++ (dev_data->isr_on ? "failed to disable" : "disabled")); ++} ++ + /* Ensure a device is "turned off" and ready to be exported. + * (Also see pciback_config_reset to ensure virtual configuration space is + * ready to be re-exported) +@@ -21,6 +93,8 @@ void pciback_reset_device(struct pci_dev *dev) + { + u16 cmd; + ++ pciback_control_isr(dev, 1 /* reset device */); ++ + /* Disable devices (but not bridges) */ + if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { + #ifdef CONFIG_PCI_MSI +@@ -78,13 +152,18 @@ void pciback_do_op(struct work_struct *data) + struct pciback_device *pdev = + container_of(data, struct pciback_device, op_work); + struct pci_dev *dev; ++ struct pciback_dev_data *dev_data = NULL; + struct xen_pci_op *op = &pdev->sh_info->op; ++ int test_intx = 0; + + dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn); + + if (dev == NULL) + op->err = XEN_PCI_ERR_dev_not_found; + else { ++ dev_data = pci_get_drvdata(dev); ++ if (dev_data) ++ test_intx = dev_data->enable_intx; + switch (op->cmd) { + case XEN_PCI_OP_conf_read: + op->err = pciback_config_read(dev, +@@ -109,10 +188,15 @@ void pciback_do_op(struct work_struct *data) + break; + #endif + default: +- op->err = XEN_PCI_ERR_not_implemented; ++ op->err = XEN_PCI_ERR_not_implemented; + break; + } + } ++ if (!op->err && dev && dev_data) { ++ /* Transition detected */ ++ if ((dev_data->enable_intx != test_intx)) ++ pciback_control_isr(dev, 0 /* no reset */); ++ } + /* Tell the driver domain that we're done. */ + wmb(); + clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); +@@ -137,3 +221,12 @@ irqreturn_t pciback_handle_event(int irq, void *dev_id) + + return IRQ_HANDLED; + } ++irqreturn_t pciback_guest_interrupt(int irq, void *dev_id) ++{ ++ struct pci_dev *dev = (struct pci_dev *)dev_id; ++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); ++ ++ if (dev_data->isr_on && dev_data->ack_intr) ++ return IRQ_HANDLED; ++ return IRQ_NONE; ++} +-- +1.7.3.4 + + +From 29a451f41647deedc2fa535520e648c76755568c Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 12 Apr 2010 11:47:15 -0400 +Subject: [PATCH 028/139] xen-pciback: Add SysFS instrumentation for the fake IRQ handler. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/pciback/pci_stub.c | 75 +++++++++++++++++++++++++++++++++++++++- + 1 files changed, 74 insertions(+), 1 deletions(-) + +diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c +index 45bbe99..ee2cd68 100644 +--- a/drivers/xen/pciback/pci_stub.c ++++ b/drivers/xen/pciback/pci_stub.c +@@ -1038,6 +1038,70 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) + + DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); + ++static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) ++{ ++ struct pcistub_device *psdev; ++ struct pciback_dev_data *dev_data; ++ size_t count = 0; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&pcistub_devices_lock, flags); ++ list_for_each_entry(psdev, &pcistub_devices, dev_list) { ++ if (count >= PAGE_SIZE) ++ break; ++ if (!psdev->dev) ++ continue; ++ dev_data = pci_get_drvdata(psdev->dev); ++ if (!dev_data) ++ continue; ++ count += ++ scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing\n", ++ pci_name(psdev->dev), ++ dev_data->isr_on ? "on" : "off", ++ dev_data->ack_intr ? "ack" : "not ack"); ++ } ++ spin_unlock_irqrestore(&pcistub_devices_lock, flags); ++ return count; ++} ++ ++DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL); ++ ++static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, ++ const char *buf, ++ size_t count) ++{ ++ struct pcistub_device *psdev; ++ struct pciback_dev_data *dev_data; ++ int domain, bus, slot, func; ++ int err = -ENOENT; ++ ++ err = str_to_slot(buf, &domain, &bus, &slot, &func); ++ if (err) ++ goto out; ++ ++ psdev = pcistub_device_find(domain, bus, slot, func); ++ ++ if (!psdev) ++ goto out; ++ ++ dev_data = pci_get_drvdata(psdev->dev); ++ if (!dev_data) ++ goto out; ++ ++ dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n", ++ dev_data->irq_name, dev_data->isr_on, ++ !dev_data->isr_on); ++ ++ dev_data->isr_on = !(dev_data->isr_on); ++ if (dev_data->isr_on) ++ dev_data->ack_intr = 1; ++out: ++ if (!err) ++ err = count; ++ return err; ++} ++DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch); ++ + static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf, + size_t count) + { +@@ -1177,7 +1241,10 @@ static void pcistub_exit(void) + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots); + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks); + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive); +- ++ driver_remove_file(&pciback_pci_driver.driver, ++ &driver_attr_irq_handlers); ++ driver_remove_file(&pciback_pci_driver.driver, ++ &driver_attr_irq_handler_state); + pci_unregister_driver(&pciback_pci_driver); + } + +@@ -1236,6 +1303,12 @@ static int __init pcistub_init(void) + err = driver_create_file(&pciback_pci_driver.driver, + &driver_attr_permissive); + ++ if (!err) ++ err = driver_create_file(&pciback_pci_driver.driver, ++ &driver_attr_irq_handlers); ++ if (!err) ++ err = driver_create_file(&pciback_pci_driver.driver, ++ &driver_attr_irq_handler_state); + if (err) + pcistub_exit(); + +-- +1.7.3.4 + + +From 6c7c36d411eeab67192fe0ed96ac1e048b4a1755 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 19 Apr 2010 14:39:10 -0400 +Subject: [PATCH 029/139] xen-pciback: When device transitions to MSI/MSI-X stop ACK-ing on the + legacy interrupt. + +But don't remove the irq handler from the legacy interrupt. The device +might still transition back to the legacy interrupts. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/pciback/conf_space_capability_msi.c | 17 ++++++++++++++++- + 1 files changed, 16 insertions(+), 1 deletions(-) + +diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c +index b70ea8b..a236e2d 100644 +--- a/drivers/xen/pciback/conf_space_capability_msi.c ++++ b/drivers/xen/pciback/conf_space_capability_msi.c +@@ -12,6 +12,7 @@ + int pciback_enable_msi(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) + { ++ struct pciback_dev_data *dev_data; + int otherend = pdev->xdev->otherend_id; + int status; + +@@ -27,21 +28,29 @@ int pciback_enable_msi(struct pciback_device *pdev, + /* The value the guest needs is actually the IDT vector, not the + * the local domain's IRQ number. */ + op->value = xen_gsi_from_irq(dev->irq); ++ dev_data = pci_get_drvdata(dev); ++ if (dev_data) ++ dev_data->ack_intr = 0; + return 0; + } + + int pciback_disable_msi(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) + { ++ struct pciback_dev_data *dev_data; + pci_disable_msi(dev); + + op->value = xen_gsi_from_irq(dev->irq); ++ dev_data = pci_get_drvdata(dev); ++ if (dev_data) ++ dev_data->ack_intr = 1; + return 0; + } + + int pciback_enable_msix(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) + { ++ struct pciback_dev_data *dev_data; + int i, result; + struct msix_entry *entries; + +@@ -68,6 +77,9 @@ int pciback_enable_msix(struct pciback_device *pdev, + kfree(entries); + + op->value = result; ++ dev_data = pci_get_drvdata(dev); ++ if (dev_data) ++ dev_data->ack_intr = 0; + + return result; + } +@@ -75,10 +87,13 @@ int pciback_enable_msix(struct pciback_device *pdev, + int pciback_disable_msix(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) + { +- ++ struct pciback_dev_data *dev_data; + pci_disable_msix(dev); + + op->value = xen_gsi_from_irq(dev->irq); ++ dev_data = pci_get_drvdata(dev); ++ if (dev_data) ++ dev_data->ack_intr = 1; + return 0; + } + +-- +1.7.3.4 + + +From c1cc36c68f096f2b1e796ba84d9c583009939d91 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 19 Apr 2010 14:40:38 -0400 +Subject: [PATCH 030/139] xen-pciback: Enable interrupt handler when device is enabled. + +And also request it to be disabled when the device has been +disabled. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/pciback/conf_space_header.c | 6 ++++++ + 1 files changed, 6 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c +index 1f4f86e..cb450f4 100644 +--- a/drivers/xen/pciback/conf_space_header.c ++++ b/drivers/xen/pciback/conf_space_header.c +@@ -39,8 +39,10 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) + + static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) + { ++ struct pciback_dev_data *dev_data; + int err; + ++ dev_data = pci_get_drvdata(dev); + if (!pci_is_enabled(dev) && is_enable_cmd(value)) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: enable\n", +@@ -48,11 +50,15 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) + err = pci_enable_device(dev); + if (err) + return err; ++ if (dev_data) ++ dev_data->enable_intx = 1; + } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: disable\n", + pci_name(dev)); + pci_disable_device(dev); ++ if (dev_data) ++ dev_data->enable_intx = 0; + } + + if (!dev->is_busmaster && is_master_cmd(value)) { +-- +1.7.3.4 + + +From a732e3d6ed4831c460586bd7a16ef7f6b7d28936 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 19 Apr 2010 16:23:06 -0400 +Subject: [PATCH 031/139] xen-pciback: Probe the IRQ line to check if it is not shared. + +If it is not shared, we stop ACK-ing the IRQ line as there is +no need for this irq handler to return IRQ_HANDLED. + +We have to this check once much much later than the pciback +and pcifront have started talking as guests doing the hypercall +that would notify the other guest that the IRQ line is shared +is done asynchronously. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/pciback/pci_stub.c | 5 +++-- + drivers/xen/pciback/pciback.h | 1 + + drivers/xen/pciback/pciback_ops.c | 12 +++++++++++- + 3 files changed, 15 insertions(+), 3 deletions(-) + +diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c +index ee2cd68..88c7ca1 100644 +--- a/drivers/xen/pciback/pci_stub.c ++++ b/drivers/xen/pciback/pci_stub.c +@@ -1055,10 +1055,11 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) + if (!dev_data) + continue; + count += +- scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing\n", ++ scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing:%ld\n", + pci_name(psdev->dev), + dev_data->isr_on ? "on" : "off", +- dev_data->ack_intr ? "ack" : "not ack"); ++ dev_data->ack_intr ? "ack" : "not ack", ++ dev_data->handled); + } + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + return count; +diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h +index 9d1b0a6..fc31052 100644 +--- a/drivers/xen/pciback/pciback.h ++++ b/drivers/xen/pciback/pciback.h +@@ -50,6 +50,7 @@ struct pciback_dev_data { + unsigned int enable_intx : 1; + unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */ + unsigned int ack_intr : 1; /* .. and ACK-ing */ ++ unsigned long handled; + unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */ + char irq_name[0]; /* pciback[000:04:00.0] */ + }; +diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c +index cb54893..5543881 100644 +--- a/drivers/xen/pciback/pciback_ops.c ++++ b/drivers/xen/pciback/pciback_ops.c +@@ -226,7 +226,17 @@ irqreturn_t pciback_guest_interrupt(int irq, void *dev_id) + struct pci_dev *dev = (struct pci_dev *)dev_id; + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + +- if (dev_data->isr_on && dev_data->ack_intr) ++ if (dev_data->isr_on && dev_data->ack_intr) { ++ dev_data->handled++; ++ if ((dev_data->handled % 1000) == 0) { ++ if (xen_ignore_irq(irq)) { ++ printk(KERN_INFO "%s IRQ line is not shared " ++ "with other domains. Turning ISR off\n", ++ dev_data->irq_name); ++ dev_data->ack_intr = 0; ++ } ++ } + return IRQ_HANDLED; ++ } + return IRQ_NONE; + } +-- +1.7.3.4 + + +From 3312c11c3f9c857b2457c293e6b6e15928a32f32 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Tue, 20 Apr 2010 20:22:40 -0400 +Subject: [PATCH 032/139] xen-pciback: Add debug statements for the MSI/MSI-X configuration module. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/pciback/conf_space_capability_msi.c | 11 +++++++++++ + 1 files changed, 11 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c +index a236e2d..b15131e 100644 +--- a/drivers/xen/pciback/conf_space_capability_msi.c ++++ b/drivers/xen/pciback/conf_space_capability_msi.c +@@ -16,6 +16,9 @@ int pciback_enable_msi(struct pciback_device *pdev, + int otherend = pdev->xdev->otherend_id; + int status; + ++ if (unlikely(verbose_request)) ++ printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev)); ++ + status = pci_enable_msi(dev); + + if (status) { +@@ -31,6 +34,7 @@ int pciback_enable_msi(struct pciback_device *pdev, + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 0; ++ + return 0; + } + +@@ -38,6 +42,9 @@ int pciback_disable_msi(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) + { + struct pciback_dev_data *dev_data; ++ ++ if (unlikely(verbose_request)) ++ printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev)); + pci_disable_msi(dev); + + op->value = xen_gsi_from_irq(dev->irq); +@@ -54,6 +61,8 @@ int pciback_enable_msix(struct pciback_device *pdev, + int i, result; + struct msix_entry *entries; + ++ if (unlikely(verbose_request)) ++ printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", pci_name(dev)); + if (op->value > SH_INFO_MAX_VEC) + return -EINVAL; + +@@ -88,6 +97,8 @@ int pciback_disable_msix(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) + { + struct pciback_dev_data *dev_data; ++ if (unlikely(verbose_request)) ++ printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", pci_name(dev)); + pci_disable_msix(dev); + + op->value = xen_gsi_from_irq(dev->irq); +-- +1.7.3.4 + + +From 52257d7ad18bd91fd614df5ef960a88af3ed5200 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Fri, 23 Jul 2010 14:35:47 -0400 +Subject: [PATCH 033/139] xen-pciback: Redo spinlock usage. + +We were using coarse spinlocks that could end up with a deadlock. +This patch fixes that and makes the spinlocks much more fine-grained. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/pciback/xenbus.c | 34 +++++++++++++++++++++------------- + 1 files changed, 21 insertions(+), 13 deletions(-) + +diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c +index d448bf5..f0d5426 100644 +--- a/drivers/xen/pciback/xenbus.c ++++ b/drivers/xen/pciback/xenbus.c +@@ -54,23 +54,31 @@ static void pciback_disconnect(struct pciback_device *pdev) + unbind_from_irqhandler(pdev->evtchn_irq, pdev); + pdev->evtchn_irq = INVALID_EVTCHN_IRQ; + } ++ spin_unlock(&pdev->dev_lock); + + /* If the driver domain started an op, make sure we complete it + * before releasing the shared memory */ ++ ++ /* Note, the workqueue does not use spinlocks at all.*/ + flush_workqueue(pciback_wq); + ++ spin_lock(&pdev->dev_lock); + if (pdev->sh_info != NULL) { + xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); + pdev->sh_info = NULL; + } +- + spin_unlock(&pdev->dev_lock); ++ + } + + static void free_pdev(struct pciback_device *pdev) + { +- if (pdev->be_watching) ++ spin_lock(&pdev->dev_lock); ++ if (pdev->be_watching) { + unregister_xenbus_watch(&pdev->be_watch); ++ pdev->be_watching = 0; ++ } ++ spin_unlock(&pdev->dev_lock); + + pciback_disconnect(pdev); + +@@ -98,7 +106,10 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref, + "Error mapping other domain page in ours."); + goto out; + } ++ ++ spin_lock(&pdev->dev_lock); + pdev->sh_info = vaddr; ++ spin_unlock(&pdev->dev_lock); + + err = bind_interdomain_evtchn_to_irqhandler( + pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event, +@@ -108,7 +119,10 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref, + "Error binding event channel to IRQ"); + goto out; + } ++ ++ spin_lock(&pdev->dev_lock); + pdev->evtchn_irq = err; ++ spin_unlock(&pdev->dev_lock); + err = 0; + + dev_dbg(&pdev->xdev->dev, "Attached!\n"); +@@ -122,7 +136,6 @@ static int pciback_attach(struct pciback_device *pdev) + int gnt_ref, remote_evtchn; + char *magic = NULL; + +- spin_lock(&pdev->dev_lock); + + /* Make sure we only do this setup once */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != +@@ -168,7 +181,6 @@ static int pciback_attach(struct pciback_device *pdev) + + dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); + out: +- spin_unlock(&pdev->dev_lock); + + kfree(magic); + +@@ -340,7 +352,6 @@ static int pciback_reconfigure(struct pciback_device *pdev) + char state_str[64]; + char dev_str[64]; + +- spin_lock(&pdev->dev_lock); + + dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); + +@@ -481,8 +492,6 @@ static int pciback_reconfigure(struct pciback_device *pdev) + } + + out: +- spin_unlock(&pdev->dev_lock); +- + return 0; + } + +@@ -539,8 +548,6 @@ static int pciback_setup_backend(struct pciback_device *pdev) + char dev_str[64]; + char state_str[64]; + +- spin_lock(&pdev->dev_lock); +- + /* It's possible we could get the call to setup twice, so make sure + * we're not already connected. + */ +@@ -621,8 +628,6 @@ static int pciback_setup_backend(struct pciback_device *pdev) + "Error switching to initialised state!"); + + out: +- spin_unlock(&pdev->dev_lock); +- + if (!err) + /* see if pcifront is already configured (if not, we'll wait) */ + pciback_attach(pdev); +@@ -669,7 +674,10 @@ static int pciback_xenbus_probe(struct xenbus_device *dev, + pciback_be_watch); + if (err) + goto out; ++ ++ spin_lock(&pdev->dev_lock); + pdev->be_watching = 1; ++ spin_unlock(&pdev->dev_lock); + + /* We need to force a call to our callback here in case + * xend already configured us! +@@ -708,8 +716,8 @@ int __init pciback_xenbus_register(void) + { + pciback_wq = create_workqueue("pciback_workqueue"); + if (!pciback_wq) { +- printk(KERN_ERR "pciback_xenbus_register: create" +- "pciback_workqueue failed\n"); ++ printk(KERN_ERR "%s: create" ++ "pciback_workqueue failed\n",__FUNCTION__); + return -EFAULT; + } + return xenbus_register_backend(&xenbus_pciback_driver); +-- +1.7.3.4 + + +From a9e0cfab0577730e74787b701edc727756a52b11 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Wed, 28 Jul 2010 13:28:34 -0400 +Subject: [PATCH 034/139] xen-pciback: Remove spinlock for be->watching state. + +There is no need to guard this with a spinlock. It +is already guarded by the xenwatch_thread against multiple +customers. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/pciback/xenbus.c | 4 ---- + 1 files changed, 0 insertions(+), 4 deletions(-) + +diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c +index f0d5426..993b659 100644 +--- a/drivers/xen/pciback/xenbus.c ++++ b/drivers/xen/pciback/xenbus.c +@@ -73,12 +73,10 @@ static void pciback_disconnect(struct pciback_device *pdev) + + static void free_pdev(struct pciback_device *pdev) + { +- spin_lock(&pdev->dev_lock); + if (pdev->be_watching) { + unregister_xenbus_watch(&pdev->be_watch); + pdev->be_watching = 0; + } +- spin_unlock(&pdev->dev_lock); + + pciback_disconnect(pdev); + +@@ -675,9 +673,7 @@ static int pciback_xenbus_probe(struct xenbus_device *dev, + if (err) + goto out; + +- spin_lock(&pdev->dev_lock); + pdev->be_watching = 1; +- spin_unlock(&pdev->dev_lock); + + /* We need to force a call to our callback here in case + * xend already configured us! +-- +1.7.3.4 + + +From c0cae0b36c43e75d4d69c60f5319e6ba802b2233 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 13 Dec 2010 11:06:36 -0500 +Subject: [PATCH 035/139] xen/pciback: Fix checkpatch warnings and errors. + +Checkpatch found some extra warnings and errors. This mega +patch fixes all of them in one swoop. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + arch/x86/include/asm/xen/pci.h | 2 +- + drivers/xen/events.c | 38 ++++++++++---------- + drivers/xen/pciback/conf_space.c | 4 +- + drivers/xen/pciback/conf_space_capability_msi.c | 11 +++--- + drivers/xen/pciback/conf_space_header.c | 42 +++++++++++----------- + drivers/xen/pciback/controller.c | 2 +- + drivers/xen/pciback/pci_stub.c | 7 ++-- + drivers/xen/pciback/pciback.h | 16 ++++---- + drivers/xen/pciback/pciback_ops.c | 9 ++--- + drivers/xen/pciback/xenbus.c | 14 ++++---- + 10 files changed, 73 insertions(+), 72 deletions(-) + +diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h +index 8474b4b..7e61d78 100644 +--- a/arch/x86/include/asm/xen/pci.h ++++ b/arch/x86/include/asm/xen/pci.h +@@ -27,7 +27,7 @@ static inline int xen_find_device_domain_owner(struct pci_dev *dev) + return -1; + } + static inline int xen_register_device_domain_owner(struct pci_dev *dev, +- uint16_t domain) ++ uint16_t domain) + { + return -1; + } +diff --git a/drivers/xen/events.c b/drivers/xen/events.c +index 95eea13..3929c20 100644 +--- a/drivers/xen/events.c ++++ b/drivers/xen/events.c +@@ -698,7 +698,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) + domid = rc = xen_find_device_domain_owner(dev); + if (rc < 0) + domid = DOMID_SELF; +- ++ + memset(&map_irq, 0, sizeof(map_irq)); + map_irq.domid = domid; + map_irq.type = MAP_PIRQ_TYPE_MSI; +@@ -850,18 +850,18 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) + } + + static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, +- unsigned int remote_port) ++ unsigned int remote_port) + { +- struct evtchn_bind_interdomain bind_interdomain; +- int err; ++ struct evtchn_bind_interdomain bind_interdomain; ++ int err; + +- bind_interdomain.remote_dom = remote_domain; +- bind_interdomain.remote_port = remote_port; ++ bind_interdomain.remote_dom = remote_domain; ++ bind_interdomain.remote_port = remote_port; + +- err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, +- &bind_interdomain); ++ err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, ++ &bind_interdomain); + +- return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); ++ return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); + } + + +@@ -966,19 +966,19 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, + const char *devname, + void *dev_id) + { +- int irq, retval; ++ int irq, retval; + +- irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); +- if (irq < 0) +- return irq; ++ irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); ++ if (irq < 0) ++ return irq; + +- retval = request_irq(irq, handler, irqflags, devname, dev_id); +- if (retval != 0) { +- unbind_from_irq(irq); +- return retval; +- } ++ retval = request_irq(irq, handler, irqflags, devname, dev_id); ++ if (retval != 0) { ++ unbind_from_irq(irq); ++ return retval; ++ } + +- return irq; ++ return irq; + } + EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); + +diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c +index 370c18e..eb6bba0 100644 +--- a/drivers/xen/pciback/conf_space.c ++++ b/drivers/xen/pciback/conf_space.c +@@ -18,8 +18,8 @@ + static int permissive; + module_param(permissive, bool, 0644); + +-#define DEFINE_PCI_CONFIG(op, size, type) \ +-int pciback_##op##_config_##size \ ++#define DEFINE_PCI_CONFIG(op, size, type) \ ++int pciback_##op##_config_##size \ + (struct pci_dev *dev, int offset, type value, void *data) \ + { \ + return pci_##op##_config_##size(dev, offset, value); \ +diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c +index b15131e..3acda69 100644 +--- a/drivers/xen/pciback/conf_space_capability_msi.c ++++ b/drivers/xen/pciback/conf_space_capability_msi.c +@@ -16,7 +16,7 @@ int pciback_enable_msi(struct pciback_device *pdev, + int otherend = pdev->xdev->otherend_id; + int status; + +- if (unlikely(verbose_request)) ++ if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev)); + + status = pci_enable_msi(dev); +@@ -43,7 +43,7 @@ int pciback_disable_msi(struct pciback_device *pdev, + { + struct pciback_dev_data *dev_data; + +- if (unlikely(verbose_request)) ++ if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev)); + pci_disable_msi(dev); + +@@ -61,7 +61,7 @@ int pciback_enable_msix(struct pciback_device *pdev, + int i, result; + struct msix_entry *entries; + +- if (unlikely(verbose_request)) ++ if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", pci_name(dev)); + if (op->value > SH_INFO_MAX_VEC) + return -EINVAL; +@@ -97,8 +97,9 @@ int pciback_disable_msix(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) + { + struct pciback_dev_data *dev_data; +- if (unlikely(verbose_request)) +- printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", pci_name(dev)); ++ if (unlikely(verbose_request)) ++ printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", ++ pci_name(dev)); + pci_disable_msix(dev); + + op->value = xen_gsi_from_irq(dev->irq); +diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c +index cb450f4..22ad0f5 100644 +--- a/drivers/xen/pciback/conf_space_header.c ++++ b/drivers/xen/pciback/conf_space_header.c +@@ -316,27 +316,27 @@ static const struct config_field header_common[] = { + {} + }; + +-#define CFG_FIELD_BAR(reg_offset) \ +- { \ +- .offset = reg_offset, \ +- .size = 4, \ +- .init = bar_init, \ +- .reset = bar_reset, \ +- .release = bar_release, \ +- .u.dw.read = bar_read, \ +- .u.dw.write = bar_write, \ +- } +- +-#define CFG_FIELD_ROM(reg_offset) \ +- { \ +- .offset = reg_offset, \ +- .size = 4, \ +- .init = rom_init, \ +- .reset = bar_reset, \ +- .release = bar_release, \ +- .u.dw.read = bar_read, \ +- .u.dw.write = rom_write, \ +- } ++#define CFG_FIELD_BAR(reg_offset) \ ++ { \ ++ .offset = reg_offset, \ ++ .size = 4, \ ++ .init = bar_init, \ ++ .reset = bar_reset, \ ++ .release = bar_release, \ ++ .u.dw.read = bar_read, \ ++ .u.dw.write = bar_write, \ ++ } ++ ++#define CFG_FIELD_ROM(reg_offset) \ ++ { \ ++ .offset = reg_offset, \ ++ .size = 4, \ ++ .init = rom_init, \ ++ .reset = bar_reset, \ ++ .release = bar_release, \ ++ .u.dw.read = bar_read, \ ++ .u.dw.write = rom_write, \ ++ } + + static const struct config_field header_0[] = { + CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), +diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c +index 7f04f11..5a7e4cc 100644 +--- a/drivers/xen/pciback/controller.c ++++ b/drivers/xen/pciback/controller.c +@@ -378,7 +378,7 @@ int pciback_publish_pci_roots(struct pciback_device *pdev, + } + + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, +- "%lx", (sizeof(struct acpi_resource) * 2) + 1); ++ "%lx", (sizeof(struct acpi_resource) *2) + 1); + + out: + spin_unlock(&dev_data->lock); +diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c +index 88c7ca1..c8f6f29 100644 +--- a/drivers/xen/pciback/pci_stub.c ++++ b/drivers/xen/pciback/pci_stub.c +@@ -13,7 +13,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include +@@ -603,7 +603,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, + if (test_bit(_XEN_PCIF_active, + (unsigned long *)&psdev->pdev->sh_info->flags)) { + dev_dbg(&psdev->dev->dev, +- "schedule pci_conf service in pciback \n"); ++ "schedule pci_conf service in pciback\n"); + test_and_schedule_op(psdev->pdev); + } + +@@ -1055,7 +1055,8 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) + if (!dev_data) + continue; + count += +- scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing:%ld\n", ++ scnprintf(buf + count, PAGE_SIZE - count, ++ "%s:%s:%sing:%ld\n", + pci_name(psdev->dev), + dev_data->isr_on ? "on" : "off", + dev_data->ack_intr ? "ack" : "not ack", +diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h +index fc31052..5c14020 100644 +--- a/drivers/xen/pciback/pciback.h ++++ b/drivers/xen/pciback/pciback.h +@@ -12,7 +12,7 @@ + #include + #include + #include +-#include ++#include + #include + + struct pci_dev_entry { +@@ -20,8 +20,8 @@ struct pci_dev_entry { + struct pci_dev *dev; + }; + +-#define _PDEVF_op_active (0) +-#define PDEVF_op_active (1<<(_PDEVF_op_active)) ++#define _PDEVF_op_active (0) ++#define PDEVF_op_active (1<<(_PDEVF_op_active)) + #define _PCIB_op_pending (1) + #define PCIB_op_pending (1<<(_PCIB_op_pending)) + +@@ -45,11 +45,11 @@ struct pciback_device { + + struct pciback_dev_data { + struct list_head config_fields; +- unsigned int permissive : 1; +- unsigned int warned_on_write : 1; +- unsigned int enable_intx : 1; +- unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */ +- unsigned int ack_intr : 1; /* .. and ACK-ing */ ++ unsigned int permissive:1; ++ unsigned int warned_on_write:1; ++ unsigned int enable_intx:1; ++ unsigned int isr_on:1; /* Whether the IRQ handler is installed. */ ++ unsigned int ack_intr:1; /* .. and ACK-ing */ + unsigned long handled; + unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */ + char irq_name[0]; /* pciback[000:04:00.0] */ +diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c +index 5543881..9a465e9 100644 +--- a/drivers/xen/pciback/pciback_ops.c ++++ b/drivers/xen/pciback/pciback_ops.c +@@ -63,12 +63,11 @@ void pciback_control_isr(struct pci_dev *dev, int reset) + dev_data->irq_name, dev); + if (rc) { + dev_err(&dev->dev, "%s: failed to install fake IRQ " \ +- "handler for IRQ %d! (rc:%d)\n", dev_data->irq_name, +- dev_data->irq, rc); ++ "handler for IRQ %d! (rc:%d)\n", ++ dev_data->irq_name, dev_data->irq, rc); + goto out; + } +- } +- else { ++ } else { + free_irq(dev_data->irq, dev); + dev_data->irq = 0; + } +@@ -188,7 +187,7 @@ void pciback_do_op(struct work_struct *data) + break; + #endif + default: +- op->err = XEN_PCI_ERR_not_implemented; ++ op->err = XEN_PCI_ERR_not_implemented; + break; + } + } +diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c +index 993b659..70030c4 100644 +--- a/drivers/xen/pciback/xenbus.c ++++ b/drivers/xen/pciback/xenbus.c +@@ -700,12 +700,12 @@ static const struct xenbus_device_id xenpci_ids[] = { + }; + + static struct xenbus_driver xenbus_pciback_driver = { +- .name = "pciback", +- .owner = THIS_MODULE, +- .ids = xenpci_ids, +- .probe = pciback_xenbus_probe, +- .remove = pciback_xenbus_remove, +- .otherend_changed = pciback_frontend_changed, ++ .name = "pciback", ++ .owner = THIS_MODULE, ++ .ids = xenpci_ids, ++ .probe = pciback_xenbus_probe, ++ .remove = pciback_xenbus_remove, ++ .otherend_changed = pciback_frontend_changed, + }; + + int __init pciback_xenbus_register(void) +@@ -713,7 +713,7 @@ int __init pciback_xenbus_register(void) + pciback_wq = create_workqueue("pciback_workqueue"); + if (!pciback_wq) { + printk(KERN_ERR "%s: create" +- "pciback_workqueue failed\n",__FUNCTION__); ++ "pciback_workqueue failed\n", __func__); + return -EFAULT; + } + return xenbus_register_backend(&xenbus_pciback_driver); +-- +1.7.3.4 + + +From 83d24d8dbd9e52a7ac94deae2d9fff6681ce8761 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 13 Dec 2010 11:30:29 -0500 +Subject: [PATCH 036/139] xen/xen-pciback: Swap over to DEFINE_PCI_DEVICE_TABLE + +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/pciback/pci_stub.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c +index c8f6f29..09dd60c 100644 +--- a/drivers/xen/pciback/pci_stub.c ++++ b/drivers/xen/pciback/pci_stub.c +@@ -497,7 +497,7 @@ static void pcistub_remove(struct pci_dev *dev) + } + } + +-static const struct pci_device_id pcistub_ids[] = { ++DEFINE_PCI_DEVICE_TABLE(pcistub_ids) = { + { + .vendor = PCI_ANY_ID, + .device = PCI_ANY_ID, +-- +1.7.3.4 + + +From 105aad363797212fbd2a4c887b723407c5851175 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Thu, 9 Dec 2010 15:01:11 -0500 +Subject: [PATCH 038/139] xen/irq: Don't fall over when nr_irqs_gsi > nr_irqs. + +This scenario where the nr_irq_gsi is greater than nr_irqs +is rather strange but lets still try to survive. Make sure +to print a warning so the user wouldn't be surprised in case +things don't work. + +Solves a bootup-crash when booting Xen and Linux under QEMU. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/xen/events.c | 9 +++++++++ + 1 files changed, 9 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/events.c b/drivers/xen/events.c +index 4d4a23d..98b7220 100644 +--- a/drivers/xen/events.c ++++ b/drivers/xen/events.c +@@ -411,6 +411,7 @@ static int find_unbound_irq(void) + if (bottom == nr_irqs) + goto no_irqs; + ++retry: + /* This loop starts from the top of IRQ space and goes down. + * We need this b/c if we have a PCI device in a Xen PV guest + * we do not have an IO-APIC (though the backend might have them) +@@ -434,6 +435,14 @@ static int find_unbound_irq(void) + goto no_irqs; + + res = irq_alloc_desc_at(irq, -1); ++ if (res == -EEXIST) { ++ top--; ++ if (bottom > top) ++ printk(KERN_ERR "Eating in GSI/MSI space (%d)!" \ ++ " Your PCI device might not work!\n", top); ++ if (top > NR_IRQS_LEGACY) ++ goto retry; ++ } + + if (WARN_ON(res != irq)) + return -1; +-- +1.7.3.4 + + +From bfb56cd0b9304ddc6cfb411315bf7e5fea3e8bc7 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Tue, 11 Jan 2011 17:20:13 +0000 +Subject: [PATCH 039/139] xen: handled remapped IRQs when enabling a pcifront PCI device. + +This happens to not be an issue currently because we take pains to try +to ensure that the GSI-IRQ mapping is 1-1 in a PV guest and that +regular event channels do not clash. However a subsequent patch is +going to break this 1-1 mapping. + +Signed-off-by: Ian Campbell +Signed-off-by: Konrad Rzeszutek Wilk +Cc: Stefano Stabellini +Cc: Jeremy Fitzhardinge +--- + arch/x86/pci/xen.c | 22 ++++++++++++++-------- + 1 files changed, 14 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c +index 25cd4a0..2a12f3d 100644 +--- a/arch/x86/pci/xen.c ++++ b/arch/x86/pci/xen.c +@@ -226,21 +226,27 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev) + { + int rc; + int share = 1; ++ u8 gsi; + +- dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq); +- +- if (dev->irq < 0) +- return -EINVAL; ++ rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); ++ if (rc < 0) { ++ dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n", ++ rc); ++ return rc; ++ } + +- if (dev->irq < NR_IRQS_LEGACY) ++ if (gsi < NR_IRQS_LEGACY) + share = 0; + +- rc = xen_allocate_pirq(dev->irq, share, "pcifront"); ++ rc = xen_allocate_pirq(gsi, share, "pcifront"); + if (rc < 0) { +- dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n", +- dev->irq, rc); ++ dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n", ++ gsi, rc); + return rc; + } ++ ++ dev->irq = rc; ++ dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq); + return 0; + } + +-- +1.7.3.4 + + +From 80b3b503bba489dcbdd808c5dd50a6be3aa06949 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Tue, 11 Jan 2011 17:20:14 +0000 +Subject: [PATCH 040/139] xen:events: move find_unbound_irq inside CONFIG_PCI_MSI + +The only caller is xen_allocate_pirq_msi which is also under this +ifdef so this fixes: + drivers/xen/events.c:377: warning: 'find_unbound_pirq' defined but not used +when CONFIG_PCI_MSI=n + +Signed-off-by: Ian Campbell +Signed-off-by: Konrad Rzeszutek Wilk +Cc: Stefano Stabellini +Cc: Jeremy Fitzhardinge +--- + drivers/xen/events.c | 34 +++++++++++++++++----------------- + 1 files changed, 17 insertions(+), 17 deletions(-) + +diff --git a/drivers/xen/events.c b/drivers/xen/events.c +index 98b7220..ae8d45d 100644 +--- a/drivers/xen/events.c ++++ b/drivers/xen/events.c +@@ -384,23 +384,6 @@ static int get_nr_hw_irqs(void) + return ret; + } + +-static int find_unbound_pirq(int type) +-{ +- int rc, i; +- struct physdev_get_free_pirq op_get_free_pirq; +- op_get_free_pirq.type = type; +- +- rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); +- if (!rc) +- return op_get_free_pirq.pirq; +- +- for (i = 0; i < nr_irqs; i++) { +- if (pirq_to_irq[i] < 0) +- return i; +- } +- return -1; +-} +- + static int find_unbound_irq(void) + { + struct irq_data *data; +@@ -683,6 +666,23 @@ out: + #include + #include "../pci/msi.h" + ++static int find_unbound_pirq(int type) ++{ ++ int rc, i; ++ struct physdev_get_free_pirq op_get_free_pirq; ++ op_get_free_pirq.type = type; ++ ++ rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); ++ if (!rc) ++ return op_get_free_pirq.pirq; ++ ++ for (i = 0; i < nr_irqs; i++) { ++ if (pirq_to_irq[i] < 0) ++ return i; ++ } ++ return -1; ++} ++ + void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc) + { + spin_lock(&irq_mapping_update_lock); +-- +1.7.3.4 + + +From c514d00c80574e839d34c239363153b90bb8efcc Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Tue, 11 Jan 2011 17:20:15 +0000 +Subject: [PATCH 041/139] xen: events: add xen_allocate_irq_{dynamic, gsi} and xen_free_irq + +This is neater than open-coded calls to irq_alloc_desc_at and +irq_free_desc. + +No intended behavioural change. + +Note that we previously were not checking the return value of +irq_alloc_desc_at which would be failing for GSI +Signed-off-by: Konrad Rzeszutek Wilk +Cc: Stefano Stabellini +Cc: Jeremy Fitzhardinge +--- + drivers/xen/events.c | 53 +++++++++++++++++++++++++++++++++----------------- + 1 files changed, 35 insertions(+), 18 deletions(-) + +diff --git a/drivers/xen/events.c b/drivers/xen/events.c +index ae8d45d..74fb216 100644 +--- a/drivers/xen/events.c ++++ b/drivers/xen/events.c +@@ -384,7 +384,7 @@ static int get_nr_hw_irqs(void) + return ret; + } + +-static int find_unbound_irq(void) ++static int xen_allocate_irq_dynamic(void) + { + struct irq_data *data; + int irq, res; +@@ -442,6 +442,30 @@ static bool identity_mapped_irq(unsigned irq) + return irq < get_nr_hw_irqs(); + } + ++static int xen_allocate_irq_gsi(unsigned gsi) ++{ ++ int irq; ++ ++ if (!identity_mapped_irq(gsi) && ++ (xen_initial_domain() || !xen_pv_domain())) ++ return xen_allocate_irq_dynamic(); ++ ++ /* Legacy IRQ descriptors are already allocated by the arch. */ ++ if (gsi < NR_IRQS_LEGACY) ++ return gsi; ++ ++ irq = irq_alloc_desc_at(gsi, -1); ++ if (irq < 0) ++ panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq); ++ ++ return irq; ++} ++ ++static void xen_free_irq(unsigned irq) ++{ ++ irq_free_desc(irq); ++} ++ + static void pirq_unmask_notify(int irq) + { + struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) }; +@@ -627,14 +651,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) + goto out; /* XXX need refcount? */ + } + +- /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore +- * we are using the !xen_initial_domain() to drop in the function.*/ +- if (identity_mapped_irq(gsi) || (!xen_initial_domain() && +- xen_pv_domain())) { +- irq = gsi; +- irq_alloc_desc_at(irq, -1); +- } else +- irq = find_unbound_irq(); ++ irq = xen_allocate_irq_gsi(gsi); + + set_irq_chip_and_handler_name(irq, &xen_pirq_chip, + handle_level_irq, name); +@@ -647,7 +664,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) + * this in the priv domain. */ + if (xen_initial_domain() && + HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { +- irq_free_desc(irq); ++ xen_free_irq(irq); + irq = -ENOSPC; + goto out; + } +@@ -688,7 +705,7 @@ void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc) + spin_lock(&irq_mapping_update_lock); + + if (alloc & XEN_ALLOC_IRQ) { +- *irq = find_unbound_irq(); ++ *irq = xen_allocate_irq_dynamic(); + if (*irq == -1) + goto out; + } +@@ -738,7 +755,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) + + spin_lock(&irq_mapping_update_lock); + +- irq = find_unbound_irq(); ++ irq = xen_allocate_irq_dynamic(); + + if (irq == -1) + goto out; +@@ -747,7 +764,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) + if (rc) { + printk(KERN_WARNING "xen map irq failed %d\n", rc); + +- irq_free_desc(irq); ++ xen_free_irq(irq); + + irq = -1; + goto out; +@@ -789,7 +806,7 @@ int xen_destroy_irq(int irq) + } + irq_info[irq] = mk_unbound_info(); + +- irq_free_desc(irq); ++ xen_free_irq(irq); + + out: + spin_unlock(&irq_mapping_update_lock); +@@ -820,7 +837,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) + irq = evtchn_to_irq[evtchn]; + + if (irq == -1) { +- irq = find_unbound_irq(); ++ irq = xen_allocate_irq_dynamic(); + + set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_fasteoi_irq, "event"); +@@ -845,7 +862,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) + irq = per_cpu(ipi_to_irq, cpu)[ipi]; + + if (irq == -1) { +- irq = find_unbound_irq(); ++ irq = xen_allocate_irq_dynamic(); + if (irq < 0) + goto out; + +@@ -881,7 +898,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) + irq = per_cpu(virq_to_irq, cpu)[virq]; + + if (irq == -1) { +- irq = find_unbound_irq(); ++ irq = xen_allocate_irq_dynamic(); + + set_irq_chip_and_handler_name(irq, &xen_percpu_chip, + handle_percpu_irq, "virq"); +@@ -940,7 +957,7 @@ static void unbind_from_irq(unsigned int irq) + if (irq_info[irq].type != IRQT_UNBOUND) { + irq_info[irq] = mk_unbound_info(); + +- irq_free_desc(irq); ++ xen_free_irq(irq); + } + + spin_unlock(&irq_mapping_update_lock); +-- +1.7.3.4 + + +From 323430f2697268e6261b673fa2b86d6f3f3c7cff Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Tue, 11 Jan 2011 17:20:16 +0000 +Subject: [PATCH 042/139] xen: events: allocate GSIs and dynamic IRQs from separate IRQ ranges. + +There are three cases which we need to care about, PV guest, PV domain +0 and HVM guest. + +The PV guest case is simple since it has no access to ACPI or real +APICs and therefore has no GSIs therefore we simply dynamically +allocate all IRQs. The potentially interesting case here is PIRQ type +event channels associated with passed through PCI devices. However +even in this case the guest has no direct interaction with the +physical GSI since that happens in the PCI backend. + +The PV domain 0 and HVM guest cases are actually the same. In domain 0 +case the kernel sees the host ACPI and GSIs (although it only sees the +APIC indirectly via the hypervisor) and in the HVM guest case it sees +the virtualised ACPI and emulated APICs. In these cases we start +allocating dynamic IRQs at nr_irqs_gsi so that they cannot clash with +any GSI. + +Currently xen_allocate_irq_dynamic starts at nr_irqs and works +backwards looking for a free IRQ in order to (try and) avoid clashing +with GSIs used in domain 0 and in HVM guests. This change avoids that +although we retain the behaviour of allowing dynamic IRQs to encroach +on the GSI range if no suitable IRQs are available since a future IRQ +clash is deemed preferable to failure right now. + +Signed-off-by: Ian Campbell +Signed-off-by: Konrad Rzeszutek Wilk +Cc: Stefano Stabellini +Cc: Jeremy Fitzhardinge +--- + drivers/xen/events.c | 84 +++++++++++++++---------------------------------- + 1 files changed, 26 insertions(+), 58 deletions(-) + +diff --git a/drivers/xen/events.c b/drivers/xen/events.c +index 74fb216..a7b60f6 100644 +--- a/drivers/xen/events.c ++++ b/drivers/xen/events.c +@@ -373,81 +373,49 @@ static void unmask_evtchn(int port) + put_cpu(); + } + +-static int get_nr_hw_irqs(void) ++static int xen_allocate_irq_dynamic(void) + { +- int ret = 1; ++ int first = 0; ++ int irq; + + #ifdef CONFIG_X86_IO_APIC +- ret = get_nr_irqs_gsi(); ++ /* ++ * For an HVM guest or domain 0 which see "real" (emulated or ++ * actual repectively) GSIs we allocate dynamic IRQs ++ * e.g. those corresponding to event channels or MSIs ++ * etc. from the range above those "real" GSIs to avoid ++ * collisions. ++ */ ++ if (xen_initial_domain() || xen_hvm_domain()) ++ first = get_nr_irqs_gsi(); + #endif + +- return ret; +-} +- +-static int xen_allocate_irq_dynamic(void) +-{ +- struct irq_data *data; +- int irq, res; +- int bottom = get_nr_hw_irqs(); +- int top = nr_irqs-1; +- +- if (bottom == nr_irqs) +- goto no_irqs; +- + retry: +- /* This loop starts from the top of IRQ space and goes down. +- * We need this b/c if we have a PCI device in a Xen PV guest +- * we do not have an IO-APIC (though the backend might have them) +- * mapped in. To not have a collision of physical IRQs with the Xen +- * event channels start at the top of the IRQ space for virtual IRQs. +- */ +- for (irq = top; irq > bottom; irq--) { +- data = irq_get_irq_data(irq); +- /* only 15->0 have init'd desc; handle irq > 16 */ +- if (!data) +- break; +- if (data->chip == &no_irq_chip) +- break; +- if (data->chip != &xen_dynamic_chip) +- continue; +- if (irq_info[irq].type == IRQT_UNBOUND) +- return irq; +- } ++ irq = irq_alloc_desc_from(first, -1); + +- if (irq == bottom) +- goto no_irqs; +- +- res = irq_alloc_desc_at(irq, -1); +- if (res == -EEXIST) { +- top--; +- if (bottom > top) +- printk(KERN_ERR "Eating in GSI/MSI space (%d)!" \ +- " Your PCI device might not work!\n", top); +- if (top > NR_IRQS_LEGACY) +- goto retry; ++ if (irq == -ENOMEM && first > NR_IRQS_LEGACY) { ++ printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n"); ++ first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY); ++ goto retry; + } + +- if (WARN_ON(res != irq)) +- return -1; ++ if (irq < 0) ++ panic("No available IRQ to bind to: increase nr_irqs!\n"); + + return irq; +- +-no_irqs: +- panic("No available IRQ to bind to: increase nr_irqs!\n"); +-} +- +-static bool identity_mapped_irq(unsigned irq) +-{ +- /* identity map all the hardware irqs */ +- return irq < get_nr_hw_irqs(); + } + + static int xen_allocate_irq_gsi(unsigned gsi) + { + int irq; + +- if (!identity_mapped_irq(gsi) && +- (xen_initial_domain() || !xen_pv_domain())) ++ /* ++ * A PV guest has no concept of a GSI (since it has no ACPI ++ * nor access to/knowledge of the physical APICs). Therefore ++ * all IRQs are dynamically allocated from the entire IRQ ++ * space. ++ */ ++ if (xen_pv_domain() && !xen_initial_domain()) + return xen_allocate_irq_dynamic(); + + /* Legacy IRQ descriptors are already allocated by the arch. */ +-- +1.7.3.4 + + +From c986ab83cede3fc02d9f73c65dd83c20ebdf3d0e Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 29 Nov 2010 13:52:18 -0500 +Subject: [PATCH 043/139] ttm: Introduce a placeholder for DMA (bus) addresses. + +This is right now limited to only non-pool constructs. + +Signed-off-by: Konrad Rzeszutek Wilk +Tested-by: Ian Campbell +--- + drivers/gpu/drm/ttm/ttm_page_alloc.c | 9 ++++++--- + drivers/gpu/drm/ttm/ttm_tt.c | 10 ++++++++-- + include/drm/ttm/ttm_bo_driver.h | 2 ++ + include/drm/ttm/ttm_page_alloc.h | 8 ++++++-- + 4 files changed, 22 insertions(+), 7 deletions(-) + +diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c +index b1e02ff..6859288 100644 +--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c ++++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c +@@ -38,6 +38,7 @@ + #include + #include /* for seq_printf */ + #include ++#include + + #include + +@@ -662,7 +663,8 @@ out: + * cached pages. + */ + int ttm_get_pages(struct list_head *pages, int flags, +- enum ttm_caching_state cstate, unsigned count) ++ enum ttm_caching_state cstate, unsigned count, ++ dma_addr_t *dma_address) + { + struct ttm_page_pool *pool = ttm_get_pool(flags, cstate); + struct page *p = NULL; +@@ -720,7 +722,7 @@ int ttm_get_pages(struct list_head *pages, int flags, + printk(KERN_ERR TTM_PFX + "Failed to allocate extra pages " + "for large request."); +- ttm_put_pages(pages, 0, flags, cstate); ++ ttm_put_pages(pages, 0, flags, cstate, NULL); + return r; + } + } +@@ -731,7 +733,8 @@ int ttm_get_pages(struct list_head *pages, int flags, + + /* Put all pages in pages list to correct pool to wait for reuse */ + void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags, +- enum ttm_caching_state cstate) ++ enum ttm_caching_state cstate, ++ dma_addr_t *dma_address) + { + unsigned long irq_flags; + struct ttm_page_pool *pool = ttm_get_pool(flags, cstate); +diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c +index af789dc..0d39001 100644 +--- a/drivers/gpu/drm/ttm/ttm_tt.c ++++ b/drivers/gpu/drm/ttm/ttm_tt.c +@@ -49,12 +49,16 @@ static int ttm_tt_swapin(struct ttm_tt *ttm); + static void ttm_tt_alloc_page_directory(struct ttm_tt *ttm) + { + ttm->pages = drm_calloc_large(ttm->num_pages, sizeof(*ttm->pages)); ++ ttm->dma_address = drm_calloc_large(ttm->num_pages, ++ sizeof(*ttm->dma_address)); + } + + static void ttm_tt_free_page_directory(struct ttm_tt *ttm) + { + drm_free_large(ttm->pages); + ttm->pages = NULL; ++ drm_free_large(ttm->dma_address); ++ ttm->dma_address = NULL; + } + + static void ttm_tt_free_user_pages(struct ttm_tt *ttm) +@@ -105,7 +109,8 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index) + + INIT_LIST_HEAD(&h); + +- ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1); ++ ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1, ++ &ttm->dma_address[index]); + + if (ret != 0) + return NULL; +@@ -298,7 +303,8 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm) + count++; + } + } +- ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state); ++ ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state, ++ ttm->dma_address); + ttm->state = tt_unpopulated; + ttm->first_himem_page = ttm->num_pages; + ttm->last_lomem_page = -1; +diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h +index 8e0c848..6dc4fcc 100644 +--- a/include/drm/ttm/ttm_bo_driver.h ++++ b/include/drm/ttm/ttm_bo_driver.h +@@ -149,6 +149,7 @@ enum ttm_caching_state { + * @swap_storage: Pointer to shmem struct file for swap storage. + * @caching_state: The current caching state of the pages. + * @state: The current binding state of the pages. ++ * @dma_address: The DMA (bus) addresses of the pages (if TTM_PAGE_FLAG_DMA32) + * + * This is a structure holding the pages, caching- and aperture binding + * status for a buffer object that isn't backed by fixed (VRAM / AGP) +@@ -173,6 +174,7 @@ struct ttm_tt { + tt_unbound, + tt_unpopulated, + } state; ++ dma_addr_t *dma_address; + }; + + #define TTM_MEMTYPE_FLAG_FIXED (1 << 0) /* Fixed (on-card) PCI memory */ +diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h +index 1168214..8062890 100644 +--- a/include/drm/ttm/ttm_page_alloc.h ++++ b/include/drm/ttm/ttm_page_alloc.h +@@ -36,11 +36,13 @@ + * @flags: ttm flags for page allocation. + * @cstate: ttm caching state for the page. + * @count: number of pages to allocate. ++ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set). + */ + int ttm_get_pages(struct list_head *pages, + int flags, + enum ttm_caching_state cstate, +- unsigned count); ++ unsigned count, ++ dma_addr_t *dma_address); + /** + * Put linked list of pages to pool. + * +@@ -49,11 +51,13 @@ int ttm_get_pages(struct list_head *pages, + * count. + * @flags: ttm flags for page allocation. + * @cstate: ttm caching state. ++ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set). + */ + void ttm_put_pages(struct list_head *pages, + unsigned page_count, + int flags, +- enum ttm_caching_state cstate); ++ enum ttm_caching_state cstate, ++ dma_addr_t *dma_address); + /** + * Initialize pool allocator. + */ +-- +1.7.3.4 + + +From ed805774fd3e3a8a0baa003d6419c5bd07d8cc5a Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 29 Nov 2010 14:03:30 -0500 +Subject: [PATCH 044/139] tm: Utilize the dma_addr_t array for pages that are to in DMA32 pool. + +We only use the "if (pool == NULL)" path for right now. + +Signed-off-by: Konrad Rzeszutek Wilk +Tested-by: Ian Campbell +--- + drivers/gpu/drm/ttm/ttm_page_alloc.c | 26 +++++++++++++++++++++++--- + 1 files changed, 23 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c +index 6859288..5d09677 100644 +--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c ++++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c +@@ -683,14 +683,22 @@ int ttm_get_pages(struct list_head *pages, int flags, + gfp_flags |= GFP_HIGHUSER; + + for (r = 0; r < count; ++r) { +- p = alloc_page(gfp_flags); ++ if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) { ++ void *addr; ++ addr = dma_alloc_coherent(NULL, PAGE_SIZE, ++ &dma_address[r], ++ gfp_flags); ++ if (addr == NULL) ++ return -ENOMEM; ++ p = virt_to_page(addr); ++ } else ++ p = alloc_page(gfp_flags); + if (!p) { + + printk(KERN_ERR TTM_PFX + "Unable to allocate page."); + return -ENOMEM; + } +- + list_add(&p->lru, pages); + } + return 0; +@@ -739,12 +747,24 @@ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags, + unsigned long irq_flags; + struct ttm_page_pool *pool = ttm_get_pool(flags, cstate); + struct page *p, *tmp; ++ unsigned r; + + if (pool == NULL) { + /* No pool for this memory type so free the pages */ + ++ r = page_count-1; + list_for_each_entry_safe(p, tmp, pages, lru) { +- __free_page(p); ++ if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) { ++ void *addr = page_address(p); ++ WARN_ON(!addr || !dma_address[r]); ++ if (addr) ++ dma_free_coherent(NULL, PAGE_SIZE, ++ addr, ++ dma_address[r]); ++ dma_address[r] = 0; ++ } else ++ __free_page(p); ++ r--; + } + /* Make the pages list empty */ + INIT_LIST_HEAD(pages); +-- +1.7.3.4 + + +From c779160e3b0246d7de606eafb855df9b283a5c2a Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Thu, 2 Dec 2010 10:24:13 -0500 +Subject: [PATCH 045/139] ttm: Expand (*populate) to support an array of DMA addresses. + +We pass in the array of ttm pages to be populated in the GART/MM +of the card (or AGP). Patch titled: "ttm: Utilize the dma_addr_t array +for pages that are to in DMA32 pool." uses the DMA API to make those +pages have a proper DMA addresses (in the situation where +page_to_phys or virt_to_phys do not give use the DMA (bus) address). + +Since we are using the DMA API on those pages, we should pass in the +DMA address to this function so it can save it in its proper fields +(later patches use it). + +Signed-off-by: Konrad Rzeszutek Wilk +Tested-by: Ian Campbell +--- + drivers/gpu/drm/nouveau/nouveau_sgdma.c | 3 ++- + drivers/gpu/drm/radeon/radeon_ttm.c | 3 ++- + drivers/gpu/drm/ttm/ttm_agp_backend.c | 3 ++- + drivers/gpu/drm/ttm/ttm_tt.c | 2 +- + drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c | 3 ++- + include/drm/ttm/ttm_bo_driver.h | 4 +++- + 6 files changed, 12 insertions(+), 6 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c +index 288baca..edc140a 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c ++++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c +@@ -20,7 +20,8 @@ struct nouveau_sgdma_be { + + static int + nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages, +- struct page **pages, struct page *dummy_read_page) ++ struct page **pages, struct page *dummy_read_page, ++ dma_addr_t *dma_addrs) + { + struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be; + struct drm_device *dev = nvbe->dev; +diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c +index 01c2c73..6f156e9 100644 +--- a/drivers/gpu/drm/radeon/radeon_ttm.c ++++ b/drivers/gpu/drm/radeon/radeon_ttm.c +@@ -655,7 +655,8 @@ struct radeon_ttm_backend { + static int radeon_ttm_backend_populate(struct ttm_backend *backend, + unsigned long num_pages, + struct page **pages, +- struct page *dummy_read_page) ++ struct page *dummy_read_page, ++ dma_addr_t *dma_addrs) + { + struct radeon_ttm_backend *gtt; + +diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c +index f999e36..1c4a72f 100644 +--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c ++++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c +@@ -47,7 +47,8 @@ struct ttm_agp_backend { + + static int ttm_agp_populate(struct ttm_backend *backend, + unsigned long num_pages, struct page **pages, +- struct page *dummy_read_page) ++ struct page *dummy_read_page, ++ dma_addr_t *dma_addrs) + { + struct ttm_agp_backend *agp_be = + container_of(backend, struct ttm_agp_backend, backend); +diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c +index 0d39001..86d5b17 100644 +--- a/drivers/gpu/drm/ttm/ttm_tt.c ++++ b/drivers/gpu/drm/ttm/ttm_tt.c +@@ -169,7 +169,7 @@ int ttm_tt_populate(struct ttm_tt *ttm) + } + + be->func->populate(be, ttm->num_pages, ttm->pages, +- ttm->dummy_read_page); ++ ttm->dummy_read_page, ttm->dma_address); + ttm->state = tt_unbound; + return 0; + } +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c +index 80bc37b..87e43e0 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c +@@ -102,7 +102,8 @@ struct vmw_ttm_backend { + + static int vmw_ttm_populate(struct ttm_backend *backend, + unsigned long num_pages, struct page **pages, +- struct page *dummy_read_page) ++ struct page *dummy_read_page, ++ dma_addr_t *dma_addrs) + { + struct vmw_ttm_backend *vmw_be = + container_of(backend, struct vmw_ttm_backend, backend); +diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h +index 6dc4fcc..ebcd3dd 100644 +--- a/include/drm/ttm/ttm_bo_driver.h ++++ b/include/drm/ttm/ttm_bo_driver.h +@@ -50,13 +50,15 @@ struct ttm_backend_func { + * @pages: Array of pointers to ttm pages. + * @dummy_read_page: Page to be used instead of NULL pages in the + * array @pages. ++ * @dma_addrs: Array of DMA (bus) address of the ttm pages. + * + * Populate the backend with ttm pages. Depending on the backend, + * it may or may not copy the @pages array. + */ + int (*populate) (struct ttm_backend *backend, + unsigned long num_pages, struct page **pages, +- struct page *dummy_read_page); ++ struct page *dummy_read_page, ++ dma_addr_t *dma_addrs); + /** + * struct ttm_backend_func member clear + * +-- +1.7.3.4 + + +From 340dec734021d8600029e8b3178280cc8d3af251 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Thu, 2 Dec 2010 11:04:29 -0500 +Subject: [PATCH 046/139] radeon/ttm/PCIe: Use dma_addr if TTM has set it. + +If the TTM layer has used the DMA API to setup pages that are +TTM_PAGE_FLAG_DMA32 (look at patch titled: "ttm: Utilize the dma_addr_t +array for pages that are to in DMA32 pool."), lets use it +when programming the GART in the PCIe type cards. + +This patch skips doing the pci_map_page (and pci_unmap_page) if +there is a DMA addresses passed in for that page. If the dma_address +is zero (or DMA_ERROR_CODE), then we continue on with our old +behaviour. + +Signed-off-by: Konrad Rzeszutek Wilk +Tested-by: Ian Campbell +--- + drivers/gpu/drm/radeon/radeon.h | 4 ++- + drivers/gpu/drm/radeon/radeon_gart.c | 36 ++++++++++++++++++++++++--------- + drivers/gpu/drm/radeon/radeon_ttm.c | 5 +++- + 3 files changed, 33 insertions(+), 12 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h +index 73f600d..c9bbab9 100644 +--- a/drivers/gpu/drm/radeon/radeon.h ++++ b/drivers/gpu/drm/radeon/radeon.h +@@ -317,6 +317,7 @@ struct radeon_gart { + union radeon_gart_table table; + struct page **pages; + dma_addr_t *pages_addr; ++ bool *ttm_alloced; + bool ready; + }; + +@@ -329,7 +330,8 @@ void radeon_gart_fini(struct radeon_device *rdev); + void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, + int pages); + int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, +- int pages, struct page **pagelist); ++ int pages, struct page **pagelist, ++ dma_addr_t *dma_addr); + + + /* +diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c +index e65b903..4a5ac4b 100644 +--- a/drivers/gpu/drm/radeon/radeon_gart.c ++++ b/drivers/gpu/drm/radeon/radeon_gart.c +@@ -149,8 +149,9 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, + p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); + for (i = 0; i < pages; i++, p++) { + if (rdev->gart.pages[p]) { +- pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p], +- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); ++ if (!rdev->gart.ttm_alloced[p]) ++ pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p], ++ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + rdev->gart.pages[p] = NULL; + rdev->gart.pages_addr[p] = rdev->dummy_page.addr; + page_base = rdev->gart.pages_addr[p]; +@@ -165,7 +166,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, + } + + int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, +- int pages, struct page **pagelist) ++ int pages, struct page **pagelist, dma_addr_t *dma_addr) + { + unsigned t; + unsigned p; +@@ -180,15 +181,22 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, + p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); + + for (i = 0; i < pages; i++, p++) { +- /* we need to support large memory configurations */ +- /* assume that unbind have already been call on the range */ +- rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i], ++ /* On TTM path, we only use the DMA API if TTM_PAGE_FLAG_DMA32 ++ * is requested. */ ++ if (dma_addr[i] != DMA_ERROR_CODE) { ++ rdev->gart.ttm_alloced[p] = true; ++ rdev->gart.pages_addr[p] = dma_addr[i]; ++ } else { ++ /* we need to support large memory configurations */ ++ /* assume that unbind have already been call on the range */ ++ rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i], + 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); +- if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) { +- /* FIXME: failed to map page (return -ENOMEM?) */ +- radeon_gart_unbind(rdev, offset, pages); +- return -ENOMEM; ++ if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) { ++ /* FIXME: failed to map page (return -ENOMEM?) */ ++ radeon_gart_unbind(rdev, offset, pages); ++ return -ENOMEM; ++ } + } + rdev->gart.pages[p] = pagelist[i]; + page_base = rdev->gart.pages_addr[p]; +@@ -251,6 +259,12 @@ int radeon_gart_init(struct radeon_device *rdev) + radeon_gart_fini(rdev); + return -ENOMEM; + } ++ rdev->gart.ttm_alloced = kzalloc(sizeof(bool) * ++ rdev->gart.num_cpu_pages, GFP_KERNEL); ++ if (rdev->gart.ttm_alloced == NULL) { ++ radeon_gart_fini(rdev); ++ return -ENOMEM; ++ } + /* set GART entry to point to the dummy page by default */ + for (i = 0; i < rdev->gart.num_cpu_pages; i++) { + rdev->gart.pages_addr[i] = rdev->dummy_page.addr; +@@ -267,6 +281,8 @@ void radeon_gart_fini(struct radeon_device *rdev) + rdev->gart.ready = false; + kfree(rdev->gart.pages); + kfree(rdev->gart.pages_addr); ++ kfree(rdev->gart.ttm_alloced); + rdev->gart.pages = NULL; + rdev->gart.pages_addr = NULL; ++ rdev->gart.ttm_alloced = NULL; + } +diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c +index 6f156e9..ca04505 100644 +--- a/drivers/gpu/drm/radeon/radeon_ttm.c ++++ b/drivers/gpu/drm/radeon/radeon_ttm.c +@@ -647,6 +647,7 @@ struct radeon_ttm_backend { + unsigned long num_pages; + struct page **pages; + struct page *dummy_read_page; ++ dma_addr_t *dma_addrs; + bool populated; + bool bound; + unsigned offset; +@@ -662,6 +663,7 @@ static int radeon_ttm_backend_populate(struct ttm_backend *backend, + + gtt = container_of(backend, struct radeon_ttm_backend, backend); + gtt->pages = pages; ++ gtt->dma_addrs = dma_addrs; + gtt->num_pages = num_pages; + gtt->dummy_read_page = dummy_read_page; + gtt->populated = true; +@@ -674,6 +676,7 @@ static void radeon_ttm_backend_clear(struct ttm_backend *backend) + + gtt = container_of(backend, struct radeon_ttm_backend, backend); + gtt->pages = NULL; ++ gtt->dma_addrs = NULL; + gtt->num_pages = 0; + gtt->dummy_read_page = NULL; + gtt->populated = false; +@@ -694,7 +697,7 @@ static int radeon_ttm_backend_bind(struct ttm_backend *backend, + gtt->num_pages, bo_mem, backend); + } + r = radeon_gart_bind(gtt->rdev, gtt->offset, +- gtt->num_pages, gtt->pages); ++ gtt->num_pages, gtt->pages, gtt->dma_addrs); + if (r) { + DRM_ERROR("failed to bind %lu pages at 0x%08X\n", + gtt->num_pages, gtt->offset); +-- +1.7.3.4 + + +From b4efe20150e5e9a483faf67ba0d947dbface96cd Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Thu, 2 Dec 2010 11:36:24 -0500 +Subject: [PATCH 047/139] nouveau/ttm/PCIe: Use dma_addr if TTM has set it. + +If the TTM layer has used the DMA API to setup pages that are +TTM_PAGE_FLAG_DMA32 (look at patch titled: "ttm: Utilize the dma_addr_t +array for pages that are to in DMA32 pool."), lets use it +when programming the GART in the PCIe type cards. + +This patch skips doing the pci_map_page (and pci_unmap_page) if +there is a DMA addresses passed in for that page. If the dma_address +is zero (or DMA_ERROR_CODE), then we continue on with our old +behaviour. + +Signed-off-by: Konrad Rzeszutek Wilk +Tested-by: Ian Campbell +--- + drivers/gpu/drm/nouveau/nouveau_sgdma.c | 28 +++++++++++++++++++++------- + 1 files changed, 21 insertions(+), 7 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c +index edc140a..bbdd982 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c ++++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c +@@ -12,6 +12,7 @@ struct nouveau_sgdma_be { + struct drm_device *dev; + + dma_addr_t *pages; ++ bool *ttm_alloced; + unsigned nr_pages; + + unsigned pte_start; +@@ -35,15 +36,25 @@ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages, + if (!nvbe->pages) + return -ENOMEM; + ++ nvbe->ttm_alloced = kmalloc(sizeof(bool) * num_pages, GFP_KERNEL); ++ if (!nvbe->ttm_alloced) ++ return -ENOMEM; ++ + nvbe->nr_pages = 0; + while (num_pages--) { +- nvbe->pages[nvbe->nr_pages] = +- pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0, ++ if (dma_addrs[nvbe->nr_pages] != DMA_ERROR_CODE) { ++ nvbe->pages[nvbe->nr_pages] = ++ dma_addrs[nvbe->nr_pages]; ++ nvbe->ttm_alloced[nvbe->nr_pages] = true; ++ } else { ++ nvbe->pages[nvbe->nr_pages] = ++ pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); +- if (pci_dma_mapping_error(dev->pdev, +- nvbe->pages[nvbe->nr_pages])) { +- be->func->clear(be); +- return -EFAULT; ++ if (pci_dma_mapping_error(dev->pdev, ++ nvbe->pages[nvbe->nr_pages])) { ++ be->func->clear(be); ++ return -EFAULT; ++ } + } + + nvbe->nr_pages++; +@@ -66,11 +77,14 @@ nouveau_sgdma_clear(struct ttm_backend *be) + be->func->unbind(be); + + while (nvbe->nr_pages--) { +- pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages], ++ if (!nvbe->ttm_alloced[nvbe->nr_pages]) ++ pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages], + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + } + kfree(nvbe->pages); ++ kfree(nvbe->ttm_alloced); + nvbe->pages = NULL; ++ nvbe->ttm_alloced = NULL; + nvbe->nr_pages = 0; + } + } +-- +1.7.3.4 + + +From 03c4949992e2b7e84b7cdeb156d803db3f848b6c Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Mon, 9 Feb 2009 12:05:52 -0800 +Subject: [PATCH 051/139] xen: netback: Initial import of linux-2.6.18-xen.hg netback driver. + +This corresponds to 774:107e10e0e07c in that tree. + +Signed-off-by: Ian Campbell +--- + drivers/xen/Kconfig | 7 + + drivers/xen/Makefile | 1 + + drivers/xen/netback/Makefile | 3 + + drivers/xen/netback/common.h | 217 ++++++ + drivers/xen/netback/interface.c | 336 ++++++++ + drivers/xen/netback/netback.c | 1637 +++++++++++++++++++++++++++++++++++++++ + drivers/xen/netback/xenbus.c | 454 +++++++++++ + 7 files changed, 2655 insertions(+), 0 deletions(-) + create mode 100644 drivers/xen/netback/Makefile + create mode 100644 drivers/xen/netback/common.h + create mode 100644 drivers/xen/netback/interface.c + create mode 100644 drivers/xen/netback/netback.c + create mode 100644 drivers/xen/netback/xenbus.c + +diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig +index 5a48ce9..7e83d43 100644 +--- a/drivers/xen/Kconfig ++++ b/drivers/xen/Kconfig +@@ -37,6 +37,13 @@ config XEN_BACKEND + depends on XEN_PCIDEV_BACKEND + + ++config XEN_NETDEV_BACKEND ++ bool "Xen backend network device" ++ depends on XEN_BACKEND && NET ++ help ++ Implement the network backend driver, which passes packets ++ from the guest domain's frontend drivers to the network. ++ + config XENFS + tristate "Xen filesystem" + default y +diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile +index 533a199..c0e0509 100644 +--- a/drivers/xen/Makefile ++++ b/drivers/xen/Makefile +@@ -9,6 +9,7 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o + obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o + obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o + obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/ ++obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ + obj-$(CONFIG_XENFS) += xenfs/ + obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o + obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o +diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile +new file mode 100644 +index 0000000..f4a0c51 +--- /dev/null ++++ b/drivers/xen/netback/Makefile +@@ -0,0 +1,3 @@ ++obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o ++ ++netbk-y := netback.o xenbus.o interface.o +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +new file mode 100644 +index 0000000..9a54d57 +--- /dev/null ++++ b/drivers/xen/netback/common.h +@@ -0,0 +1,217 @@ ++/****************************************************************************** ++ * arch/xen/drivers/netif/backend/common.h ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation; or, when distributed ++ * separately from the Linux kernel or incorporated into other ++ * software packages, subject to the following license: ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this source file (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, copy, modify, ++ * merge, publish, distribute, sublicense, and/or sell copies of the Software, ++ * and to permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#ifndef __NETIF__BACKEND__COMMON_H__ ++#define __NETIF__BACKEND__COMMON_H__ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define DPRINTK(_f, _a...) \ ++ pr_debug("(file=%s, line=%d) " _f, \ ++ __FILE__ , __LINE__ , ## _a ) ++#define IPRINTK(fmt, args...) \ ++ printk(KERN_INFO "xen_net: " fmt, ##args) ++#define WPRINTK(fmt, args...) \ ++ printk(KERN_WARNING "xen_net: " fmt, ##args) ++ ++typedef struct netif_st { ++ /* Unique identifier for this interface. */ ++ domid_t domid; ++ unsigned int handle; ++ ++ u8 fe_dev_addr[6]; ++ ++ /* Physical parameters of the comms window. */ ++ grant_handle_t tx_shmem_handle; ++ grant_ref_t tx_shmem_ref; ++ grant_handle_t rx_shmem_handle; ++ grant_ref_t rx_shmem_ref; ++ unsigned int irq; ++ ++ /* The shared rings and indexes. */ ++ netif_tx_back_ring_t tx; ++ netif_rx_back_ring_t rx; ++ struct vm_struct *tx_comms_area; ++ struct vm_struct *rx_comms_area; ++ ++ /* Set of features that can be turned on in dev->features. */ ++ int features; ++ ++ /* Internal feature information. */ ++ u8 can_queue:1; /* can queue packets for receiver? */ ++ u8 copying_receiver:1; /* copy packets to receiver? */ ++ ++ /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */ ++ RING_IDX rx_req_cons_peek; ++ ++ /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */ ++ unsigned long credit_bytes; ++ unsigned long credit_usec; ++ unsigned long remaining_credit; ++ struct timer_list credit_timeout; ++ ++ /* Enforce draining of the transmit queue. */ ++ struct timer_list tx_queue_timeout; ++ ++ /* Miscellaneous private stuff. */ ++ struct list_head list; /* scheduling list */ ++ atomic_t refcnt; ++ struct net_device *dev; ++ struct net_device_stats stats; ++ ++ unsigned int carrier; ++ ++ wait_queue_head_t waiting_to_free; ++} netif_t; ++ ++/* ++ * Implement our own carrier flag: the network stack's version causes delays ++ * when the carrier is re-enabled (in particular, dev_activate() may not ++ * immediately be called, which can cause packet loss; also the etherbridge ++ * can be rather lazy in activating its port). ++ */ ++#define netback_carrier_on(netif) ((netif)->carrier = 1) ++#define netback_carrier_off(netif) ((netif)->carrier = 0) ++#define netback_carrier_ok(netif) ((netif)->carrier) ++ ++enum { ++ NETBK_DONT_COPY_SKB, ++ NETBK_DELAYED_COPY_SKB, ++ NETBK_ALWAYS_COPY_SKB, ++}; ++ ++extern int netbk_copy_skb_mode; ++ ++/* Function pointers into netback accelerator plugin modules */ ++struct netback_accel_hooks { ++ struct module *owner; ++ int (*probe)(struct xenbus_device *dev); ++ int (*remove)(struct xenbus_device *dev); ++}; ++ ++/* Structure to track the state of a netback accelerator plugin */ ++struct netback_accelerator { ++ struct list_head link; ++ int id; ++ char *eth_name; ++ atomic_t use_count; ++ struct netback_accel_hooks *hooks; ++}; ++ ++struct backend_info { ++ struct xenbus_device *dev; ++ netif_t *netif; ++ enum xenbus_state frontend_state; ++ ++ /* State relating to the netback accelerator */ ++ void *netback_accel_priv; ++ /* The accelerator that this backend is currently using */ ++ struct netback_accelerator *accelerator; ++}; ++ ++#define NETBACK_ACCEL_VERSION 0x00010001 ++ ++/* ++ * Connect an accelerator plugin module to netback. Returns zero on ++ * success, < 0 on error, > 0 (with highest version number supported) ++ * if version mismatch. ++ */ ++extern int netback_connect_accelerator(unsigned version, ++ int id, const char *eth_name, ++ struct netback_accel_hooks *hooks); ++/* Disconnect a previously connected accelerator plugin module */ ++extern void netback_disconnect_accelerator(int id, const char *eth_name); ++ ++ ++extern ++void netback_probe_accelerators(struct backend_info *be, ++ struct xenbus_device *dev); ++extern ++void netback_remove_accelerators(struct backend_info *be, ++ struct xenbus_device *dev); ++extern ++void netif_accel_init(void); ++ ++ ++#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) ++#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) ++ ++void netif_disconnect(netif_t *netif); ++ ++netif_t *netif_alloc(domid_t domid, unsigned int handle); ++int netif_map(netif_t *netif, unsigned long tx_ring_ref, ++ unsigned long rx_ring_ref, unsigned int evtchn); ++ ++#define netif_get(_b) (atomic_inc(&(_b)->refcnt)) ++#define netif_put(_b) \ ++ do { \ ++ if ( atomic_dec_and_test(&(_b)->refcnt) ) \ ++ wake_up(&(_b)->waiting_to_free); \ ++ } while (0) ++ ++void netif_xenbus_init(void); ++ ++#define netif_schedulable(netif) \ ++ (netif_running((netif)->dev) && netback_carrier_ok(netif)) ++ ++void netif_schedule_work(netif_t *netif); ++void netif_deschedule_work(netif_t *netif); ++ ++int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev); ++struct net_device_stats *netif_be_get_stats(struct net_device *dev); ++irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs); ++ ++static inline int netbk_can_queue(struct net_device *dev) ++{ ++ netif_t *netif = netdev_priv(dev); ++ return netif->can_queue; ++} ++ ++static inline int netbk_can_sg(struct net_device *dev) ++{ ++ netif_t *netif = netdev_priv(dev); ++ return netif->features & NETIF_F_SG; ++} ++ ++#endif /* __NETIF__BACKEND__COMMON_H__ */ +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +new file mode 100644 +index 0000000..7e67941 +--- /dev/null ++++ b/drivers/xen/netback/interface.c +@@ -0,0 +1,336 @@ ++/****************************************************************************** ++ * arch/xen/drivers/netif/backend/interface.c ++ * ++ * Network-device interface management. ++ * ++ * Copyright (c) 2004-2005, Keir Fraser ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation; or, when distributed ++ * separately from the Linux kernel or incorporated into other ++ * software packages, subject to the following license: ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this source file (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, copy, modify, ++ * merge, publish, distribute, sublicense, and/or sell copies of the Software, ++ * and to permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include "common.h" ++#include ++#include ++ ++/* ++ * Module parameter 'queue_length': ++ * ++ * Enables queuing in the network stack when a client has run out of receive ++ * descriptors. Although this feature can improve receive bandwidth by avoiding ++ * packet loss, it can also result in packets sitting in the 'tx_queue' for ++ * unbounded time. This is bad if those packets hold onto foreign resources. ++ * For example, consider a packet that holds onto resources belonging to the ++ * guest for which it is queued (e.g., packet received on vif1.0, destined for ++ * vif1.1 which is not activated in the guest): in this situation the guest ++ * will never be destroyed, unless vif1.1 is taken down. To avoid this, we ++ * run a timer (tx_queue_timeout) to drain the queue when the interface is ++ * blocked. ++ */ ++static unsigned long netbk_queue_length = 32; ++module_param_named(queue_length, netbk_queue_length, ulong, 0); ++ ++static void __netif_up(netif_t *netif) ++{ ++ enable_irq(netif->irq); ++ netif_schedule_work(netif); ++} ++ ++static void __netif_down(netif_t *netif) ++{ ++ disable_irq(netif->irq); ++ netif_deschedule_work(netif); ++} ++ ++static int net_open(struct net_device *dev) ++{ ++ netif_t *netif = netdev_priv(dev); ++ if (netback_carrier_ok(netif)) { ++ __netif_up(netif); ++ netif_start_queue(dev); ++ } ++ return 0; ++} ++ ++static int net_close(struct net_device *dev) ++{ ++ netif_t *netif = netdev_priv(dev); ++ if (netback_carrier_ok(netif)) ++ __netif_down(netif); ++ netif_stop_queue(dev); ++ return 0; ++} ++ ++static int netbk_change_mtu(struct net_device *dev, int mtu) ++{ ++ int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; ++ ++ if (mtu > max) ++ return -EINVAL; ++ dev->mtu = mtu; ++ return 0; ++} ++ ++static int netbk_set_sg(struct net_device *dev, u32 data) ++{ ++ if (data) { ++ netif_t *netif = netdev_priv(dev); ++ ++ if (!(netif->features & NETIF_F_SG)) ++ return -ENOSYS; ++ } ++ ++ return ethtool_op_set_sg(dev, data); ++} ++ ++static int netbk_set_tso(struct net_device *dev, u32 data) ++{ ++ if (data) { ++ netif_t *netif = netdev_priv(dev); ++ ++ if (!(netif->features & NETIF_F_TSO)) ++ return -ENOSYS; ++ } ++ ++ return ethtool_op_set_tso(dev, data); ++} ++ ++static struct ethtool_ops network_ethtool_ops = ++{ ++ .get_tx_csum = ethtool_op_get_tx_csum, ++ .set_tx_csum = ethtool_op_set_tx_csum, ++ .get_sg = ethtool_op_get_sg, ++ .set_sg = netbk_set_sg, ++ .get_tso = ethtool_op_get_tso, ++ .set_tso = netbk_set_tso, ++ .get_link = ethtool_op_get_link, ++}; ++ ++netif_t *netif_alloc(domid_t domid, unsigned int handle) ++{ ++ int err = 0; ++ struct net_device *dev; ++ netif_t *netif; ++ char name[IFNAMSIZ] = {}; ++ ++ snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle); ++ dev = alloc_netdev(sizeof(netif_t), name, ether_setup); ++ if (dev == NULL) { ++ DPRINTK("Could not create netif: out of memory\n"); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ netif = netdev_priv(dev); ++ memset(netif, 0, sizeof(*netif)); ++ netif->domid = domid; ++ netif->handle = handle; ++ atomic_set(&netif->refcnt, 1); ++ init_waitqueue_head(&netif->waiting_to_free); ++ netif->dev = dev; ++ ++ netback_carrier_off(netif); ++ ++ netif->credit_bytes = netif->remaining_credit = ~0UL; ++ netif->credit_usec = 0UL; ++ init_timer(&netif->credit_timeout); ++ /* Initialize 'expires' now: it's used to track the credit window. */ ++ netif->credit_timeout.expires = jiffies; ++ ++ init_timer(&netif->tx_queue_timeout); ++ ++ dev->hard_start_xmit = netif_be_start_xmit; ++ dev->get_stats = netif_be_get_stats; ++ dev->open = net_open; ++ dev->stop = net_close; ++ dev->change_mtu = netbk_change_mtu; ++ dev->features = NETIF_F_IP_CSUM; ++ ++ SET_ETHTOOL_OPS(dev, &network_ethtool_ops); ++ ++ dev->tx_queue_len = netbk_queue_length; ++ ++ /* ++ * Initialise a dummy MAC address. We choose the numerically ++ * largest non-broadcast address to prevent the address getting ++ * stolen by an Ethernet bridge for STP purposes. ++ * (FE:FF:FF:FF:FF:FF) ++ */ ++ memset(dev->dev_addr, 0xFF, ETH_ALEN); ++ dev->dev_addr[0] &= ~0x01; ++ ++ rtnl_lock(); ++ err = register_netdevice(dev); ++ rtnl_unlock(); ++ if (err) { ++ DPRINTK("Could not register new net device %s: err=%d\n", ++ dev->name, err); ++ free_netdev(dev); ++ return ERR_PTR(err); ++ } ++ ++ DPRINTK("Successfully created netif\n"); ++ return netif; ++} ++ ++static int map_frontend_pages( ++ netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref) ++{ ++ struct gnttab_map_grant_ref op; ++ ++ gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr, ++ GNTMAP_host_map, tx_ring_ref, netif->domid); ++ ++ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) ++ BUG(); ++ ++ if (op.status) { ++ DPRINTK(" Gnttab failure mapping tx_ring_ref!\n"); ++ return op.status; ++ } ++ ++ netif->tx_shmem_ref = tx_ring_ref; ++ netif->tx_shmem_handle = op.handle; ++ ++ gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr, ++ GNTMAP_host_map, rx_ring_ref, netif->domid); ++ ++ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) ++ BUG(); ++ ++ if (op.status) { ++ DPRINTK(" Gnttab failure mapping rx_ring_ref!\n"); ++ return op.status; ++ } ++ ++ netif->rx_shmem_ref = rx_ring_ref; ++ netif->rx_shmem_handle = op.handle; ++ ++ return 0; ++} ++ ++static void unmap_frontend_pages(netif_t *netif) ++{ ++ struct gnttab_unmap_grant_ref op; ++ ++ gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr, ++ GNTMAP_host_map, netif->tx_shmem_handle); ++ ++ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) ++ BUG(); ++ ++ gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr, ++ GNTMAP_host_map, netif->rx_shmem_handle); ++ ++ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) ++ BUG(); ++} ++ ++int netif_map(netif_t *netif, unsigned long tx_ring_ref, ++ unsigned long rx_ring_ref, unsigned int evtchn) ++{ ++ int err = -ENOMEM; ++ netif_tx_sring_t *txs; ++ netif_rx_sring_t *rxs; ++ ++ /* Already connected through? */ ++ if (netif->irq) ++ return 0; ++ ++ netif->tx_comms_area = alloc_vm_area(PAGE_SIZE); ++ if (netif->tx_comms_area == NULL) ++ return -ENOMEM; ++ netif->rx_comms_area = alloc_vm_area(PAGE_SIZE); ++ if (netif->rx_comms_area == NULL) ++ goto err_rx; ++ ++ err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref); ++ if (err) ++ goto err_map; ++ ++ err = bind_interdomain_evtchn_to_irqhandler( ++ netif->domid, evtchn, netif_be_int, 0, ++ netif->dev->name, netif); ++ if (err < 0) ++ goto err_hypervisor; ++ netif->irq = err; ++ disable_irq(netif->irq); ++ ++ txs = (netif_tx_sring_t *)netif->tx_comms_area->addr; ++ BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE); ++ ++ rxs = (netif_rx_sring_t *) ++ ((char *)netif->rx_comms_area->addr); ++ BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE); ++ ++ netif->rx_req_cons_peek = 0; ++ ++ netif_get(netif); ++ ++ rtnl_lock(); ++ netback_carrier_on(netif); ++ if (netif_running(netif->dev)) ++ __netif_up(netif); ++ rtnl_unlock(); ++ ++ return 0; ++err_hypervisor: ++ unmap_frontend_pages(netif); ++err_map: ++ free_vm_area(netif->rx_comms_area); ++err_rx: ++ free_vm_area(netif->tx_comms_area); ++ return err; ++} ++ ++void netif_disconnect(netif_t *netif) ++{ ++ if (netback_carrier_ok(netif)) { ++ rtnl_lock(); ++ netback_carrier_off(netif); ++ netif_carrier_off(netif->dev); /* discard queued packets */ ++ if (netif_running(netif->dev)) ++ __netif_down(netif); ++ rtnl_unlock(); ++ netif_put(netif); ++ } ++ ++ atomic_dec(&netif->refcnt); ++ wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0); ++ ++ del_timer_sync(&netif->credit_timeout); ++ del_timer_sync(&netif->tx_queue_timeout); ++ ++ if (netif->irq) ++ unbind_from_irqhandler(netif->irq, netif); ++ ++ unregister_netdev(netif->dev); ++ ++ if (netif->tx.sring) { ++ unmap_frontend_pages(netif); ++ free_vm_area(netif->tx_comms_area); ++ free_vm_area(netif->rx_comms_area); ++ } ++ ++ free_netdev(netif->dev); ++} +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +new file mode 100644 +index 0000000..db629d4 +--- /dev/null ++++ b/drivers/xen/netback/netback.c +@@ -0,0 +1,1637 @@ ++/****************************************************************************** ++ * drivers/xen/netback/netback.c ++ * ++ * Back-end of the driver for virtual network devices. This portion of the ++ * driver exports a 'unified' network-device interface that can be accessed ++ * by any operating system that implements a compatible front end. A ++ * reference front-end implementation can be found in: ++ * drivers/xen/netfront/netfront.c ++ * ++ * Copyright (c) 2002-2005, K A Fraser ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation; or, when distributed ++ * separately from the Linux kernel or incorporated into other ++ * software packages, subject to the following license: ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this source file (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, copy, modify, ++ * merge, publish, distribute, sublicense, and/or sell copies of the Software, ++ * and to permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include "common.h" ++#include ++#include ++ ++/*define NETBE_DEBUG_INTERRUPT*/ ++ ++struct netbk_rx_meta { ++ skb_frag_t frag; ++ int id; ++ u8 copy:1; ++}; ++ ++struct netbk_tx_pending_inuse { ++ struct list_head list; ++ unsigned long alloc_time; ++}; ++ ++static void netif_idx_release(u16 pending_idx); ++static void make_tx_response(netif_t *netif, ++ netif_tx_request_t *txp, ++ s8 st); ++static netif_rx_response_t *make_rx_response(netif_t *netif, ++ u16 id, ++ s8 st, ++ u16 offset, ++ u16 size, ++ u16 flags); ++ ++static void net_tx_action(unsigned long unused); ++static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0); ++ ++static void net_rx_action(unsigned long unused); ++static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0); ++ ++static struct timer_list net_timer; ++static struct timer_list netbk_tx_pending_timer; ++ ++#define MAX_PENDING_REQS 256 ++ ++static struct sk_buff_head rx_queue; ++ ++static struct page **mmap_pages; ++static inline unsigned long idx_to_pfn(unsigned int idx) ++{ ++ return page_to_pfn(mmap_pages[idx]); ++} ++ ++static inline unsigned long idx_to_kaddr(unsigned int idx) ++{ ++ return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx)); ++} ++ ++/* extra field used in struct page */ ++static inline void netif_set_page_index(struct page *pg, unsigned int index) ++{ ++ *(unsigned long *)&pg->mapping = index; ++} ++ ++static inline int netif_page_index(struct page *pg) ++{ ++ unsigned long idx = (unsigned long)pg->mapping; ++ ++ if (!PageForeign(pg)) ++ return -1; ++ ++ if ((idx >= MAX_PENDING_REQS) || (mmap_pages[idx] != pg)) ++ return -1; ++ ++ return idx; ++} ++ ++#define PKT_PROT_LEN 64 ++ ++static struct pending_tx_info { ++ netif_tx_request_t req; ++ netif_t *netif; ++} pending_tx_info[MAX_PENDING_REQS]; ++static u16 pending_ring[MAX_PENDING_REQS]; ++typedef unsigned int PEND_RING_IDX; ++#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) ++static PEND_RING_IDX pending_prod, pending_cons; ++#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) ++ ++/* Freed TX SKBs get batched on this ring before return to pending_ring. */ ++static u16 dealloc_ring[MAX_PENDING_REQS]; ++static PEND_RING_IDX dealloc_prod, dealloc_cons; ++ ++/* Doubly-linked list of in-use pending entries. */ ++static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS]; ++static LIST_HEAD(pending_inuse_head); ++ ++static struct sk_buff_head tx_queue; ++ ++static grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; ++static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS]; ++static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS]; ++ ++static struct list_head net_schedule_list; ++static spinlock_t net_schedule_list_lock; ++ ++#define MAX_MFN_ALLOC 64 ++static unsigned long mfn_list[MAX_MFN_ALLOC]; ++static unsigned int alloc_index = 0; ++ ++/* Setting this allows the safe use of this driver without netloop. */ ++static int MODPARM_copy_skb = 1; ++module_param_named(copy_skb, MODPARM_copy_skb, bool, 0); ++MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop"); ++ ++int netbk_copy_skb_mode; ++ ++static inline unsigned long alloc_mfn(void) ++{ ++ BUG_ON(alloc_index == 0); ++ return mfn_list[--alloc_index]; ++} ++ ++static int check_mfn(int nr) ++{ ++ struct xen_memory_reservation reservation = { ++ .extent_order = 0, ++ .domid = DOMID_SELF ++ }; ++ int rc; ++ ++ if (likely(alloc_index >= nr)) ++ return 0; ++ ++ set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index); ++ reservation.nr_extents = MAX_MFN_ALLOC - alloc_index; ++ rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation); ++ if (likely(rc > 0)) ++ alloc_index += rc; ++ ++ return alloc_index >= nr ? 0 : -ENOMEM; ++} ++ ++static inline void maybe_schedule_tx_action(void) ++{ ++ smp_mb(); ++ if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && ++ !list_empty(&net_schedule_list)) ++ tasklet_schedule(&net_tx_tasklet); ++} ++ ++static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) ++{ ++ struct skb_shared_info *ninfo; ++ struct sk_buff *nskb; ++ unsigned long offset; ++ int ret; ++ int len; ++ int headlen; ++ ++ BUG_ON(skb_shinfo(skb)->frag_list != NULL); ++ ++ nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN); ++ if (unlikely(!nskb)) ++ goto err; ++ ++ skb_reserve(nskb, 16 + NET_IP_ALIGN); ++ headlen = nskb->end - nskb->data; ++ if (headlen > skb_headlen(skb)) ++ headlen = skb_headlen(skb); ++ ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); ++ BUG_ON(ret); ++ ++ ninfo = skb_shinfo(nskb); ++ ninfo->gso_size = skb_shinfo(skb)->gso_size; ++ ninfo->gso_type = skb_shinfo(skb)->gso_type; ++ ++ offset = headlen; ++ len = skb->len - headlen; ++ ++ nskb->len = skb->len; ++ nskb->data_len = len; ++ nskb->truesize += len; ++ ++ while (len) { ++ struct page *page; ++ int copy; ++ int zero; ++ ++ if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) { ++ dump_stack(); ++ goto err_free; ++ } ++ ++ copy = len >= PAGE_SIZE ? PAGE_SIZE : len; ++ zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO; ++ ++ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero); ++ if (unlikely(!page)) ++ goto err_free; ++ ++ ret = skb_copy_bits(skb, offset, page_address(page), copy); ++ BUG_ON(ret); ++ ++ ninfo->frags[ninfo->nr_frags].page = page; ++ ninfo->frags[ninfo->nr_frags].page_offset = 0; ++ ninfo->frags[ninfo->nr_frags].size = copy; ++ ninfo->nr_frags++; ++ ++ offset += copy; ++ len -= copy; ++ } ++ ++ offset = nskb->data - skb->data; ++ ++ nskb->h.raw = skb->h.raw + offset; ++ nskb->nh.raw = skb->nh.raw + offset; ++ nskb->mac.raw = skb->mac.raw + offset; ++ ++ return nskb; ++ ++ err_free: ++ kfree_skb(nskb); ++ err: ++ return NULL; ++} ++ ++static inline int netbk_max_required_rx_slots(netif_t *netif) ++{ ++ if (netif->features & (NETIF_F_SG|NETIF_F_TSO)) ++ return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */ ++ return 1; /* all in one */ ++} ++ ++static inline int netbk_queue_full(netif_t *netif) ++{ ++ RING_IDX peek = netif->rx_req_cons_peek; ++ RING_IDX needed = netbk_max_required_rx_slots(netif); ++ ++ return ((netif->rx.sring->req_prod - peek) < needed) || ++ ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed); ++} ++ ++static void tx_queue_callback(unsigned long data) ++{ ++ netif_t *netif = (netif_t *)data; ++ if (netif_schedulable(netif)) ++ netif_wake_queue(netif->dev); ++} ++ ++int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) ++{ ++ netif_t *netif = netdev_priv(dev); ++ ++ BUG_ON(skb->dev != dev); ++ ++ /* Drop the packet if the target domain has no receive buffers. */ ++ if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif))) ++ goto drop; ++ ++ /* ++ * Copy the packet here if it's destined for a flipping interface ++ * but isn't flippable (e.g. extra references to data). ++ * XXX For now we also copy skbuffs whose head crosses a page ++ * boundary, because netbk_gop_skb can't handle them. ++ */ ++ if (!netif->copying_receiver || ++ ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) { ++ struct sk_buff *nskb = netbk_copy_skb(skb); ++ if ( unlikely(nskb == NULL) ) ++ goto drop; ++ /* Copy only the header fields we use in this driver. */ ++ nskb->dev = skb->dev; ++ nskb->ip_summed = skb->ip_summed; ++ nskb->proto_data_valid = skb->proto_data_valid; ++ dev_kfree_skb(skb); ++ skb = nskb; ++ } ++ ++ netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 + ++ !!skb_shinfo(skb)->gso_size; ++ netif_get(netif); ++ ++ if (netbk_can_queue(dev) && netbk_queue_full(netif)) { ++ netif->rx.sring->req_event = netif->rx_req_cons_peek + ++ netbk_max_required_rx_slots(netif); ++ mb(); /* request notification /then/ check & stop the queue */ ++ if (netbk_queue_full(netif)) { ++ netif_stop_queue(dev); ++ /* ++ * Schedule 500ms timeout to restart the queue, thus ++ * ensuring that an inactive queue will be drained. ++ * Packets will be immediately be dropped until more ++ * receive buffers become available (see ++ * netbk_queue_full() check above). ++ */ ++ netif->tx_queue_timeout.data = (unsigned long)netif; ++ netif->tx_queue_timeout.function = tx_queue_callback; ++ __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2); ++ } ++ } ++ ++ skb_queue_tail(&rx_queue, skb); ++ tasklet_schedule(&net_rx_tasklet); ++ ++ return 0; ++ ++ drop: ++ netif->stats.tx_dropped++; ++ dev_kfree_skb(skb); ++ return 0; ++} ++ ++#if 0 ++static void xen_network_done_notify(void) ++{ ++ static struct net_device *eth0_dev = NULL; ++ if (unlikely(eth0_dev == NULL)) ++ eth0_dev = __dev_get_by_name("eth0"); ++ netif_rx_schedule(eth0_dev); ++} ++/* ++ * Add following to poll() function in NAPI driver (Tigon3 is example): ++ * if ( xen_network_done() ) ++ * tg3_enable_ints(tp); ++ */ ++int xen_network_done(void) ++{ ++ return skb_queue_empty(&rx_queue); ++} ++#endif ++ ++struct netrx_pending_operations { ++ unsigned trans_prod, trans_cons; ++ unsigned mmu_prod, mmu_mcl; ++ unsigned mcl_prod, mcl_cons; ++ unsigned copy_prod, copy_cons; ++ unsigned meta_prod, meta_cons; ++ mmu_update_t *mmu; ++ gnttab_transfer_t *trans; ++ gnttab_copy_t *copy; ++ multicall_entry_t *mcl; ++ struct netbk_rx_meta *meta; ++}; ++ ++/* Set up the grant operations for this fragment. If it's a flipping ++ interface, we also set up the unmap request from here. */ ++static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta, ++ int i, struct netrx_pending_operations *npo, ++ struct page *page, unsigned long size, ++ unsigned long offset) ++{ ++ mmu_update_t *mmu; ++ gnttab_transfer_t *gop; ++ gnttab_copy_t *copy_gop; ++ multicall_entry_t *mcl; ++ netif_rx_request_t *req; ++ unsigned long old_mfn, new_mfn; ++ int idx = netif_page_index(page); ++ ++ old_mfn = virt_to_mfn(page_address(page)); ++ ++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i); ++ if (netif->copying_receiver) { ++ /* The fragment needs to be copied rather than ++ flipped. */ ++ meta->copy = 1; ++ copy_gop = npo->copy + npo->copy_prod++; ++ copy_gop->flags = GNTCOPY_dest_gref; ++ if (idx > -1) { ++ struct pending_tx_info *src_pend = &pending_tx_info[idx]; ++ copy_gop->source.domid = src_pend->netif->domid; ++ copy_gop->source.u.ref = src_pend->req.gref; ++ copy_gop->flags |= GNTCOPY_source_gref; ++ } else { ++ copy_gop->source.domid = DOMID_SELF; ++ copy_gop->source.u.gmfn = old_mfn; ++ } ++ copy_gop->source.offset = offset; ++ copy_gop->dest.domid = netif->domid; ++ copy_gop->dest.offset = 0; ++ copy_gop->dest.u.ref = req->gref; ++ copy_gop->len = size; ++ } else { ++ meta->copy = 0; ++ if (!xen_feature(XENFEAT_auto_translated_physmap)) { ++ new_mfn = alloc_mfn(); ++ ++ /* ++ * Set the new P2M table entry before ++ * reassigning the old data page. Heed the ++ * comment in pgtable-2level.h:pte_page(). :-) ++ */ ++ set_phys_to_machine(page_to_pfn(page), new_mfn); ++ ++ mcl = npo->mcl + npo->mcl_prod++; ++ MULTI_update_va_mapping(mcl, ++ (unsigned long)page_address(page), ++ pfn_pte_ma(new_mfn, PAGE_KERNEL), ++ 0); ++ ++ mmu = npo->mmu + npo->mmu_prod++; ++ mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | ++ MMU_MACHPHYS_UPDATE; ++ mmu->val = page_to_pfn(page); ++ } ++ ++ gop = npo->trans + npo->trans_prod++; ++ gop->mfn = old_mfn; ++ gop->domid = netif->domid; ++ gop->ref = req->gref; ++ } ++ return req->id; ++} ++ ++static void netbk_gop_skb(struct sk_buff *skb, ++ struct netrx_pending_operations *npo) ++{ ++ netif_t *netif = netdev_priv(skb->dev); ++ int nr_frags = skb_shinfo(skb)->nr_frags; ++ int i; ++ int extra; ++ struct netbk_rx_meta *head_meta, *meta; ++ ++ head_meta = npo->meta + npo->meta_prod++; ++ head_meta->frag.page_offset = skb_shinfo(skb)->gso_type; ++ head_meta->frag.size = skb_shinfo(skb)->gso_size; ++ extra = !!head_meta->frag.size + 1; ++ ++ for (i = 0; i < nr_frags; i++) { ++ meta = npo->meta + npo->meta_prod++; ++ meta->frag = skb_shinfo(skb)->frags[i]; ++ meta->id = netbk_gop_frag(netif, meta, i + extra, npo, ++ meta->frag.page, ++ meta->frag.size, ++ meta->frag.page_offset); ++ } ++ ++ /* ++ * This must occur at the end to ensure that we don't trash skb_shinfo ++ * until we're done. We know that the head doesn't cross a page ++ * boundary because such packets get copied in netif_be_start_xmit. ++ */ ++ head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo, ++ virt_to_page(skb->data), ++ skb_headlen(skb), ++ offset_in_page(skb->data)); ++ ++ netif->rx.req_cons += nr_frags + extra; ++} ++ ++static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta) ++{ ++ int i; ++ ++ for (i = 0; i < nr_frags; i++) ++ put_page(meta[i].frag.page); ++} ++ ++/* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was ++ used to set up the operations on the top of ++ netrx_pending_operations, which have since been done. Check that ++ they didn't give any errors and advance over them. */ ++static int netbk_check_gop(int nr_frags, domid_t domid, ++ struct netrx_pending_operations *npo) ++{ ++ multicall_entry_t *mcl; ++ gnttab_transfer_t *gop; ++ gnttab_copy_t *copy_op; ++ int status = NETIF_RSP_OKAY; ++ int i; ++ ++ for (i = 0; i <= nr_frags; i++) { ++ if (npo->meta[npo->meta_cons + i].copy) { ++ copy_op = npo->copy + npo->copy_cons++; ++ if (copy_op->status != GNTST_okay) { ++ DPRINTK("Bad status %d from copy to DOM%d.\n", ++ copy_op->status, domid); ++ status = NETIF_RSP_ERROR; ++ } ++ } else { ++ if (!xen_feature(XENFEAT_auto_translated_physmap)) { ++ mcl = npo->mcl + npo->mcl_cons++; ++ /* The update_va_mapping() must not fail. */ ++ BUG_ON(mcl->result != 0); ++ } ++ ++ gop = npo->trans + npo->trans_cons++; ++ /* Check the reassignment error code. */ ++ if (gop->status != 0) { ++ DPRINTK("Bad status %d from grant transfer to DOM%u\n", ++ gop->status, domid); ++ /* ++ * Page no longer belongs to us unless ++ * GNTST_bad_page, but that should be ++ * a fatal error anyway. ++ */ ++ BUG_ON(gop->status == GNTST_bad_page); ++ status = NETIF_RSP_ERROR; ++ } ++ } ++ } ++ ++ return status; ++} ++ ++static void netbk_add_frag_responses(netif_t *netif, int status, ++ struct netbk_rx_meta *meta, int nr_frags) ++{ ++ int i; ++ unsigned long offset; ++ ++ for (i = 0; i < nr_frags; i++) { ++ int id = meta[i].id; ++ int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data; ++ ++ if (meta[i].copy) ++ offset = 0; ++ else ++ offset = meta[i].frag.page_offset; ++ make_rx_response(netif, id, status, offset, ++ meta[i].frag.size, flags); ++ } ++} ++ ++static void net_rx_action(unsigned long unused) ++{ ++ netif_t *netif = NULL; ++ s8 status; ++ u16 id, irq, flags; ++ netif_rx_response_t *resp; ++ multicall_entry_t *mcl; ++ struct sk_buff_head rxq; ++ struct sk_buff *skb; ++ int notify_nr = 0; ++ int ret; ++ int nr_frags; ++ int count; ++ unsigned long offset; ++ ++ /* ++ * Putting hundreds of bytes on the stack is considered rude. ++ * Static works because a tasklet can only be on one CPU at any time. ++ */ ++ static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3]; ++ static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; ++ static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE]; ++ static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE]; ++ static unsigned char rx_notify[NR_IRQS]; ++ static u16 notify_list[NET_RX_RING_SIZE]; ++ static struct netbk_rx_meta meta[NET_RX_RING_SIZE]; ++ ++ struct netrx_pending_operations npo = { ++ mmu: rx_mmu, ++ trans: grant_trans_op, ++ copy: grant_copy_op, ++ mcl: rx_mcl, ++ meta: meta}; ++ ++ skb_queue_head_init(&rxq); ++ ++ count = 0; ++ ++ while ((skb = skb_dequeue(&rx_queue)) != NULL) { ++ nr_frags = skb_shinfo(skb)->nr_frags; ++ *(int *)skb->cb = nr_frags; ++ ++ if (!xen_feature(XENFEAT_auto_translated_physmap) && ++ !((netif_t *)netdev_priv(skb->dev))->copying_receiver && ++ check_mfn(nr_frags + 1)) { ++ /* Memory squeeze? Back off for an arbitrary while. */ ++ if ( net_ratelimit() ) ++ WPRINTK("Memory squeeze in netback " ++ "driver.\n"); ++ mod_timer(&net_timer, jiffies + HZ); ++ skb_queue_head(&rx_queue, skb); ++ break; ++ } ++ ++ netbk_gop_skb(skb, &npo); ++ ++ count += nr_frags + 1; ++ ++ __skb_queue_tail(&rxq, skb); ++ ++ /* Filled the batch queue? */ ++ if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE) ++ break; ++ } ++ ++ BUG_ON(npo.meta_prod > ARRAY_SIZE(meta)); ++ ++ npo.mmu_mcl = npo.mcl_prod; ++ if (npo.mcl_prod) { ++ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); ++ BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu)); ++ mcl = npo.mcl + npo.mcl_prod++; ++ ++ BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping); ++ mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; ++ ++ mcl->op = __HYPERVISOR_mmu_update; ++ mcl->args[0] = (unsigned long)rx_mmu; ++ mcl->args[1] = npo.mmu_prod; ++ mcl->args[2] = 0; ++ mcl->args[3] = DOMID_SELF; ++ } ++ ++ if (npo.trans_prod) { ++ BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op)); ++ mcl = npo.mcl + npo.mcl_prod++; ++ mcl->op = __HYPERVISOR_grant_table_op; ++ mcl->args[0] = GNTTABOP_transfer; ++ mcl->args[1] = (unsigned long)grant_trans_op; ++ mcl->args[2] = npo.trans_prod; ++ } ++ ++ if (npo.copy_prod) { ++ BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op)); ++ mcl = npo.mcl + npo.mcl_prod++; ++ mcl->op = __HYPERVISOR_grant_table_op; ++ mcl->args[0] = GNTTABOP_copy; ++ mcl->args[1] = (unsigned long)grant_copy_op; ++ mcl->args[2] = npo.copy_prod; ++ } ++ ++ /* Nothing to do? */ ++ if (!npo.mcl_prod) ++ return; ++ ++ BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl)); ++ ++ ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod); ++ BUG_ON(ret != 0); ++ /* The mmu_machphys_update() must not fail. */ ++ BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0); ++ ++ while ((skb = __skb_dequeue(&rxq)) != NULL) { ++ nr_frags = *(int *)skb->cb; ++ ++ netif = netdev_priv(skb->dev); ++ /* We can't rely on skb_release_data to release the ++ pages used by fragments for us, since it tries to ++ touch the pages in the fraglist. If we're in ++ flipping mode, that doesn't work. In copying mode, ++ we still have access to all of the pages, and so ++ it's safe to let release_data deal with it. */ ++ /* (Freeing the fragments is safe since we copy ++ non-linear skbs destined for flipping interfaces) */ ++ if (!netif->copying_receiver) { ++ atomic_set(&(skb_shinfo(skb)->dataref), 1); ++ skb_shinfo(skb)->frag_list = NULL; ++ skb_shinfo(skb)->nr_frags = 0; ++ netbk_free_pages(nr_frags, meta + npo.meta_cons + 1); ++ } ++ ++ netif->stats.tx_bytes += skb->len; ++ netif->stats.tx_packets++; ++ ++ status = netbk_check_gop(nr_frags, netif->domid, &npo); ++ ++ id = meta[npo.meta_cons].id; ++ flags = nr_frags ? NETRXF_more_data : 0; ++ ++ if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ ++ flags |= NETRXF_csum_blank | NETRXF_data_validated; ++ else if (skb->proto_data_valid) /* remote but checksummed? */ ++ flags |= NETRXF_data_validated; ++ ++ if (meta[npo.meta_cons].copy) ++ offset = 0; ++ else ++ offset = offset_in_page(skb->data); ++ resp = make_rx_response(netif, id, status, offset, ++ skb_headlen(skb), flags); ++ ++ if (meta[npo.meta_cons].frag.size) { ++ struct netif_extra_info *gso = ++ (struct netif_extra_info *) ++ RING_GET_RESPONSE(&netif->rx, ++ netif->rx.rsp_prod_pvt++); ++ ++ resp->flags |= NETRXF_extra_info; ++ ++ gso->u.gso.size = meta[npo.meta_cons].frag.size; ++ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; ++ gso->u.gso.pad = 0; ++ gso->u.gso.features = 0; ++ ++ gso->type = XEN_NETIF_EXTRA_TYPE_GSO; ++ gso->flags = 0; ++ } ++ ++ netbk_add_frag_responses(netif, status, ++ meta + npo.meta_cons + 1, ++ nr_frags); ++ ++ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret); ++ irq = netif->irq; ++ if (ret && !rx_notify[irq]) { ++ rx_notify[irq] = 1; ++ notify_list[notify_nr++] = irq; ++ } ++ ++ if (netif_queue_stopped(netif->dev) && ++ netif_schedulable(netif) && ++ !netbk_queue_full(netif)) ++ netif_wake_queue(netif->dev); ++ ++ netif_put(netif); ++ dev_kfree_skb(skb); ++ npo.meta_cons += nr_frags + 1; ++ } ++ ++ while (notify_nr != 0) { ++ irq = notify_list[--notify_nr]; ++ rx_notify[irq] = 0; ++ notify_remote_via_irq(irq); ++ } ++ ++ /* More work to do? */ ++ if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer)) ++ tasklet_schedule(&net_rx_tasklet); ++#if 0 ++ else ++ xen_network_done_notify(); ++#endif ++} ++ ++static void net_alarm(unsigned long unused) ++{ ++ tasklet_schedule(&net_rx_tasklet); ++} ++ ++static void netbk_tx_pending_timeout(unsigned long unused) ++{ ++ tasklet_schedule(&net_tx_tasklet); ++} ++ ++struct net_device_stats *netif_be_get_stats(struct net_device *dev) ++{ ++ netif_t *netif = netdev_priv(dev); ++ return &netif->stats; ++} ++ ++static int __on_net_schedule_list(netif_t *netif) ++{ ++ return netif->list.next != NULL; ++} ++ ++static void remove_from_net_schedule_list(netif_t *netif) ++{ ++ spin_lock_irq(&net_schedule_list_lock); ++ if (likely(__on_net_schedule_list(netif))) { ++ list_del(&netif->list); ++ netif->list.next = NULL; ++ netif_put(netif); ++ } ++ spin_unlock_irq(&net_schedule_list_lock); ++} ++ ++static void add_to_net_schedule_list_tail(netif_t *netif) ++{ ++ if (__on_net_schedule_list(netif)) ++ return; ++ ++ spin_lock_irq(&net_schedule_list_lock); ++ if (!__on_net_schedule_list(netif) && ++ likely(netif_schedulable(netif))) { ++ list_add_tail(&netif->list, &net_schedule_list); ++ netif_get(netif); ++ } ++ spin_unlock_irq(&net_schedule_list_lock); ++} ++ ++/* ++ * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER: ++ * If this driver is pipelining transmit requests then we can be very ++ * aggressive in avoiding new-packet notifications -- frontend only needs to ++ * send a notification if there are no outstanding unreceived responses. ++ * If we may be buffer transmit buffers for any reason then we must be rather ++ * more conservative and treat this as the final check for pending work. ++ */ ++void netif_schedule_work(netif_t *netif) ++{ ++ int more_to_do; ++ ++#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER ++ more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx); ++#else ++ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); ++#endif ++ ++ if (more_to_do) { ++ add_to_net_schedule_list_tail(netif); ++ maybe_schedule_tx_action(); ++ } ++} ++ ++void netif_deschedule_work(netif_t *netif) ++{ ++ remove_from_net_schedule_list(netif); ++} ++ ++ ++static void tx_add_credit(netif_t *netif) ++{ ++ unsigned long max_burst, max_credit; ++ ++ /* ++ * Allow a burst big enough to transmit a jumbo packet of up to 128kB. ++ * Otherwise the interface can seize up due to insufficient credit. ++ */ ++ max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size; ++ max_burst = min(max_burst, 131072UL); ++ max_burst = max(max_burst, netif->credit_bytes); ++ ++ /* Take care that adding a new chunk of credit doesn't wrap to zero. */ ++ max_credit = netif->remaining_credit + netif->credit_bytes; ++ if (max_credit < netif->remaining_credit) ++ max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ ++ ++ netif->remaining_credit = min(max_credit, max_burst); ++} ++ ++static void tx_credit_callback(unsigned long data) ++{ ++ netif_t *netif = (netif_t *)data; ++ tx_add_credit(netif); ++ netif_schedule_work(netif); ++} ++ ++static inline int copy_pending_req(PEND_RING_IDX pending_idx) ++{ ++ return gnttab_copy_grant_page(grant_tx_handle[pending_idx], ++ &mmap_pages[pending_idx]); ++} ++ ++inline static void net_tx_action_dealloc(void) ++{ ++ struct netbk_tx_pending_inuse *inuse, *n; ++ gnttab_unmap_grant_ref_t *gop; ++ u16 pending_idx; ++ PEND_RING_IDX dc, dp; ++ netif_t *netif; ++ int ret; ++ LIST_HEAD(list); ++ ++ dc = dealloc_cons; ++ gop = tx_unmap_ops; ++ ++ /* ++ * Free up any grants we have finished using ++ */ ++ do { ++ dp = dealloc_prod; ++ ++ /* Ensure we see all indices enqueued by netif_idx_release(). */ ++ smp_rmb(); ++ ++ while (dc != dp) { ++ unsigned long pfn; ++ ++ pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; ++ list_move_tail(&pending_inuse[pending_idx].list, &list); ++ ++ pfn = idx_to_pfn(pending_idx); ++ /* Already unmapped? */ ++ if (!phys_to_machine_mapping_valid(pfn)) ++ continue; ++ ++ gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx), ++ GNTMAP_host_map, ++ grant_tx_handle[pending_idx]); ++ gop++; ++ } ++ ++ if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB || ++ list_empty(&pending_inuse_head)) ++ break; ++ ++ /* Copy any entries that have been pending for too long. */ ++ list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) { ++ if (time_after(inuse->alloc_time + HZ / 2, jiffies)) ++ break; ++ ++ switch (copy_pending_req(inuse - pending_inuse)) { ++ case 0: ++ list_move_tail(&inuse->list, &list); ++ continue; ++ case -EBUSY: ++ list_del_init(&inuse->list); ++ continue; ++ case -ENOENT: ++ continue; ++ } ++ ++ break; ++ } ++ } while (dp != dealloc_prod); ++ ++ dealloc_cons = dc; ++ ++ ret = HYPERVISOR_grant_table_op( ++ GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops); ++ BUG_ON(ret); ++ ++ list_for_each_entry_safe(inuse, n, &list, list) { ++ pending_idx = inuse - pending_inuse; ++ ++ netif = pending_tx_info[pending_idx].netif; ++ ++ make_tx_response(netif, &pending_tx_info[pending_idx].req, ++ NETIF_RSP_OKAY); ++ ++ /* Ready for next use. */ ++ gnttab_reset_grant_page(mmap_pages[pending_idx]); ++ ++ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; ++ ++ netif_put(netif); ++ ++ list_del_init(&inuse->list); ++ } ++} ++ ++static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end) ++{ ++ RING_IDX cons = netif->tx.req_cons; ++ ++ do { ++ make_tx_response(netif, txp, NETIF_RSP_ERROR); ++ if (cons >= end) ++ break; ++ txp = RING_GET_REQUEST(&netif->tx, cons++); ++ } while (1); ++ netif->tx.req_cons = cons; ++ netif_schedule_work(netif); ++ netif_put(netif); ++} ++ ++static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first, ++ netif_tx_request_t *txp, int work_to_do) ++{ ++ RING_IDX cons = netif->tx.req_cons; ++ int frags = 0; ++ ++ if (!(first->flags & NETTXF_more_data)) ++ return 0; ++ ++ do { ++ if (frags >= work_to_do) { ++ DPRINTK("Need more frags\n"); ++ return -frags; ++ } ++ ++ if (unlikely(frags >= MAX_SKB_FRAGS)) { ++ DPRINTK("Too many frags\n"); ++ return -frags; ++ } ++ ++ memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags), ++ sizeof(*txp)); ++ if (txp->size > first->size) { ++ DPRINTK("Frags galore\n"); ++ return -frags; ++ } ++ ++ first->size -= txp->size; ++ frags++; ++ ++ if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { ++ DPRINTK("txp->offset: %x, size: %u\n", ++ txp->offset, txp->size); ++ return -frags; ++ } ++ } while ((txp++)->flags & NETTXF_more_data); ++ ++ return frags; ++} ++ ++static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif, ++ struct sk_buff *skb, ++ netif_tx_request_t *txp, ++ gnttab_map_grant_ref_t *mop) ++{ ++ struct skb_shared_info *shinfo = skb_shinfo(skb); ++ skb_frag_t *frags = shinfo->frags; ++ unsigned long pending_idx = *((u16 *)skb->data); ++ int i, start; ++ ++ /* Skip first skb fragment if it is on same page as header fragment. */ ++ start = ((unsigned long)shinfo->frags[0].page == pending_idx); ++ ++ for (i = start; i < shinfo->nr_frags; i++, txp++) { ++ pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)]; ++ ++ gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx), ++ GNTMAP_host_map | GNTMAP_readonly, ++ txp->gref, netif->domid); ++ ++ memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); ++ netif_get(netif); ++ pending_tx_info[pending_idx].netif = netif; ++ frags[i].page = (void *)pending_idx; ++ } ++ ++ return mop; ++} ++ ++static int netbk_tx_check_mop(struct sk_buff *skb, ++ gnttab_map_grant_ref_t **mopp) ++{ ++ gnttab_map_grant_ref_t *mop = *mopp; ++ int pending_idx = *((u16 *)skb->data); ++ netif_t *netif = pending_tx_info[pending_idx].netif; ++ netif_tx_request_t *txp; ++ struct skb_shared_info *shinfo = skb_shinfo(skb); ++ int nr_frags = shinfo->nr_frags; ++ int i, err, start; ++ ++ /* Check status of header. */ ++ err = mop->status; ++ if (unlikely(err)) { ++ txp = &pending_tx_info[pending_idx].req; ++ make_tx_response(netif, txp, NETIF_RSP_ERROR); ++ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; ++ netif_put(netif); ++ } else { ++ set_phys_to_machine( ++ __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT, ++ FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); ++ grant_tx_handle[pending_idx] = mop->handle; ++ } ++ ++ /* Skip first skb fragment if it is on same page as header fragment. */ ++ start = ((unsigned long)shinfo->frags[0].page == pending_idx); ++ ++ for (i = start; i < nr_frags; i++) { ++ int j, newerr; ++ ++ pending_idx = (unsigned long)shinfo->frags[i].page; ++ ++ /* Check error status: if okay then remember grant handle. */ ++ newerr = (++mop)->status; ++ if (likely(!newerr)) { ++ set_phys_to_machine( ++ __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT, ++ FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT)); ++ grant_tx_handle[pending_idx] = mop->handle; ++ /* Had a previous error? Invalidate this fragment. */ ++ if (unlikely(err)) ++ netif_idx_release(pending_idx); ++ continue; ++ } ++ ++ /* Error on this fragment: respond to client with an error. */ ++ txp = &pending_tx_info[pending_idx].req; ++ make_tx_response(netif, txp, NETIF_RSP_ERROR); ++ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; ++ netif_put(netif); ++ ++ /* Not the first error? Preceding frags already invalidated. */ ++ if (err) ++ continue; ++ ++ /* First error: invalidate header and preceding fragments. */ ++ pending_idx = *((u16 *)skb->data); ++ netif_idx_release(pending_idx); ++ for (j = start; j < i; j++) { ++ pending_idx = (unsigned long)shinfo->frags[i].page; ++ netif_idx_release(pending_idx); ++ } ++ ++ /* Remember the error: invalidate all subsequent fragments. */ ++ err = newerr; ++ } ++ ++ *mopp = mop + 1; ++ return err; ++} ++ ++static void netbk_fill_frags(struct sk_buff *skb) ++{ ++ struct skb_shared_info *shinfo = skb_shinfo(skb); ++ int nr_frags = shinfo->nr_frags; ++ int i; ++ ++ for (i = 0; i < nr_frags; i++) { ++ skb_frag_t *frag = shinfo->frags + i; ++ netif_tx_request_t *txp; ++ unsigned long pending_idx; ++ ++ pending_idx = (unsigned long)frag->page; ++ ++ pending_inuse[pending_idx].alloc_time = jiffies; ++ list_add_tail(&pending_inuse[pending_idx].list, ++ &pending_inuse_head); ++ ++ txp = &pending_tx_info[pending_idx].req; ++ frag->page = virt_to_page(idx_to_kaddr(pending_idx)); ++ frag->size = txp->size; ++ frag->page_offset = txp->offset; ++ ++ skb->len += txp->size; ++ skb->data_len += txp->size; ++ skb->truesize += txp->size; ++ } ++} ++ ++int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras, ++ int work_to_do) ++{ ++ struct netif_extra_info extra; ++ RING_IDX cons = netif->tx.req_cons; ++ ++ do { ++ if (unlikely(work_to_do-- <= 0)) { ++ DPRINTK("Missing extra info\n"); ++ return -EBADR; ++ } ++ ++ memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons), ++ sizeof(extra)); ++ if (unlikely(!extra.type || ++ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { ++ netif->tx.req_cons = ++cons; ++ DPRINTK("Invalid extra type: %d\n", extra.type); ++ return -EINVAL; ++ } ++ ++ memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); ++ netif->tx.req_cons = ++cons; ++ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); ++ ++ return work_to_do; ++} ++ ++static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso) ++{ ++ if (!gso->u.gso.size) { ++ DPRINTK("GSO size must not be zero.\n"); ++ return -EINVAL; ++ } ++ ++ /* Currently only TCPv4 S.O. is supported. */ ++ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { ++ DPRINTK("Bad GSO type %d.\n", gso->u.gso.type); ++ return -EINVAL; ++ } ++ ++ skb_shinfo(skb)->gso_size = gso->u.gso.size; ++ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; ++ ++ /* Header must be checked, and gso_segs computed. */ ++ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; ++ skb_shinfo(skb)->gso_segs = 0; ++ ++ return 0; ++} ++ ++/* Called after netfront has transmitted */ ++static void net_tx_action(unsigned long unused) ++{ ++ struct list_head *ent; ++ struct sk_buff *skb; ++ netif_t *netif; ++ netif_tx_request_t txreq; ++ netif_tx_request_t txfrags[MAX_SKB_FRAGS]; ++ struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; ++ u16 pending_idx; ++ RING_IDX i; ++ gnttab_map_grant_ref_t *mop; ++ unsigned int data_len; ++ int ret, work_to_do; ++ ++ if (dealloc_cons != dealloc_prod) ++ net_tx_action_dealloc(); ++ ++ mop = tx_map_ops; ++ while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && ++ !list_empty(&net_schedule_list)) { ++ /* Get a netif from the list with work to do. */ ++ ent = net_schedule_list.next; ++ netif = list_entry(ent, netif_t, list); ++ netif_get(netif); ++ remove_from_net_schedule_list(netif); ++ ++ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do); ++ if (!work_to_do) { ++ netif_put(netif); ++ continue; ++ } ++ ++ i = netif->tx.req_cons; ++ rmb(); /* Ensure that we see the request before we copy it. */ ++ memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq)); ++ ++ /* Credit-based scheduling. */ ++ if (txreq.size > netif->remaining_credit) { ++ unsigned long now = jiffies; ++ unsigned long next_credit = ++ netif->credit_timeout.expires + ++ msecs_to_jiffies(netif->credit_usec / 1000); ++ ++ /* Timer could already be pending in rare cases. */ ++ if (timer_pending(&netif->credit_timeout)) { ++ netif_put(netif); ++ continue; ++ } ++ ++ /* Passed the point where we can replenish credit? */ ++ if (time_after_eq(now, next_credit)) { ++ netif->credit_timeout.expires = now; ++ tx_add_credit(netif); ++ } ++ ++ /* Still too big to send right now? Set a callback. */ ++ if (txreq.size > netif->remaining_credit) { ++ netif->credit_timeout.data = ++ (unsigned long)netif; ++ netif->credit_timeout.function = ++ tx_credit_callback; ++ __mod_timer(&netif->credit_timeout, ++ next_credit); ++ netif_put(netif); ++ continue; ++ } ++ } ++ netif->remaining_credit -= txreq.size; ++ ++ work_to_do--; ++ netif->tx.req_cons = ++i; ++ ++ memset(extras, 0, sizeof(extras)); ++ if (txreq.flags & NETTXF_extra_info) { ++ work_to_do = netbk_get_extras(netif, extras, ++ work_to_do); ++ i = netif->tx.req_cons; ++ if (unlikely(work_to_do < 0)) { ++ netbk_tx_err(netif, &txreq, i); ++ continue; ++ } ++ } ++ ++ ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do); ++ if (unlikely(ret < 0)) { ++ netbk_tx_err(netif, &txreq, i - ret); ++ continue; ++ } ++ i += ret; ++ ++ if (unlikely(txreq.size < ETH_HLEN)) { ++ DPRINTK("Bad packet size: %d\n", txreq.size); ++ netbk_tx_err(netif, &txreq, i); ++ continue; ++ } ++ ++ /* No crossing a page as the payload mustn't fragment. */ ++ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { ++ DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", ++ txreq.offset, txreq.size, ++ (txreq.offset &~PAGE_MASK) + txreq.size); ++ netbk_tx_err(netif, &txreq, i); ++ continue; ++ } ++ ++ pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; ++ ++ data_len = (txreq.size > PKT_PROT_LEN && ++ ret < MAX_SKB_FRAGS) ? ++ PKT_PROT_LEN : txreq.size; ++ ++ skb = alloc_skb(data_len + 16 + NET_IP_ALIGN, ++ GFP_ATOMIC | __GFP_NOWARN); ++ if (unlikely(skb == NULL)) { ++ DPRINTK("Can't allocate a skb in start_xmit.\n"); ++ netbk_tx_err(netif, &txreq, i); ++ break; ++ } ++ ++ /* Packets passed to netif_rx() must have some headroom. */ ++ skb_reserve(skb, 16 + NET_IP_ALIGN); ++ ++ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { ++ struct netif_extra_info *gso; ++ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; ++ ++ if (netbk_set_skb_gso(skb, gso)) { ++ kfree_skb(skb); ++ netbk_tx_err(netif, &txreq, i); ++ continue; ++ } ++ } ++ ++ gnttab_set_map_op(mop, idx_to_kaddr(pending_idx), ++ GNTMAP_host_map | GNTMAP_readonly, ++ txreq.gref, netif->domid); ++ mop++; ++ ++ memcpy(&pending_tx_info[pending_idx].req, ++ &txreq, sizeof(txreq)); ++ pending_tx_info[pending_idx].netif = netif; ++ *((u16 *)skb->data) = pending_idx; ++ ++ __skb_put(skb, data_len); ++ ++ skb_shinfo(skb)->nr_frags = ret; ++ if (data_len < txreq.size) { ++ skb_shinfo(skb)->nr_frags++; ++ skb_shinfo(skb)->frags[0].page = ++ (void *)(unsigned long)pending_idx; ++ } else { ++ /* Discriminate from any valid pending_idx value. */ ++ skb_shinfo(skb)->frags[0].page = (void *)~0UL; ++ } ++ ++ if (skb->data_len < skb_shinfo(skb)->gso_size) { ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_type = 0; ++ } ++ ++ __skb_queue_tail(&tx_queue, skb); ++ ++ pending_cons++; ++ ++ mop = netbk_get_requests(netif, skb, txfrags, mop); ++ ++ netif->tx.req_cons = i; ++ netif_schedule_work(netif); ++ ++ if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops)) ++ break; ++ } ++ ++ if (mop == tx_map_ops) ++ return; ++ ++ ret = HYPERVISOR_grant_table_op( ++ GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops); ++ BUG_ON(ret); ++ ++ mop = tx_map_ops; ++ while ((skb = __skb_dequeue(&tx_queue)) != NULL) { ++ netif_tx_request_t *txp; ++ ++ pending_idx = *((u16 *)skb->data); ++ netif = pending_tx_info[pending_idx].netif; ++ txp = &pending_tx_info[pending_idx].req; ++ ++ /* Check the remap error code. */ ++ if (unlikely(netbk_tx_check_mop(skb, &mop))) { ++ DPRINTK("netback grant failed.\n"); ++ skb_shinfo(skb)->nr_frags = 0; ++ kfree_skb(skb); ++ continue; ++ } ++ ++ data_len = skb->len; ++ memcpy(skb->data, ++ (void *)(idx_to_kaddr(pending_idx)|txp->offset), ++ data_len); ++ if (data_len < txp->size) { ++ /* Append the packet payload as a fragment. */ ++ txp->offset += data_len; ++ txp->size -= data_len; ++ } else { ++ /* Schedule a response immediately. */ ++ netif_idx_release(pending_idx); ++ } ++ ++ /* ++ * Old frontends do not assert data_validated but we ++ * can infer it from csum_blank so test both flags. ++ */ ++ if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) { ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ skb->proto_data_valid = 1; ++ } else { ++ skb->ip_summed = CHECKSUM_NONE; ++ skb->proto_data_valid = 0; ++ } ++ skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank); ++ ++ netbk_fill_frags(skb); ++ ++ skb->dev = netif->dev; ++ skb->protocol = eth_type_trans(skb, skb->dev); ++ ++ netif->stats.rx_bytes += skb->len; ++ netif->stats.rx_packets++; ++ ++ if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) && ++ unlikely(skb_linearize(skb))) { ++ DPRINTK("Can't linearize skb in net_tx_action.\n"); ++ kfree_skb(skb); ++ continue; ++ } ++ ++ netif_rx(skb); ++ netif->dev->last_rx = jiffies; ++ } ++ ++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && ++ !list_empty(&pending_inuse_head)) { ++ struct netbk_tx_pending_inuse *oldest; ++ ++ oldest = list_entry(pending_inuse_head.next, ++ struct netbk_tx_pending_inuse, list); ++ mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ); ++ } ++} ++ ++static void netif_idx_release(u16 pending_idx) ++{ ++ static DEFINE_SPINLOCK(_lock); ++ unsigned long flags; ++ ++ spin_lock_irqsave(&_lock, flags); ++ dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx; ++ /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */ ++ smp_wmb(); ++ dealloc_prod++; ++ spin_unlock_irqrestore(&_lock, flags); ++ ++ tasklet_schedule(&net_tx_tasklet); ++} ++ ++static void netif_page_release(struct page *page, unsigned int order) ++{ ++ int idx = netif_page_index(page); ++ BUG_ON(order); ++ BUG_ON(idx < 0); ++ netif_idx_release(idx); ++} ++ ++irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs) ++{ ++ netif_t *netif = dev_id; ++ ++ add_to_net_schedule_list_tail(netif); ++ maybe_schedule_tx_action(); ++ ++ if (netif_schedulable(netif) && !netbk_queue_full(netif)) ++ netif_wake_queue(netif->dev); ++ ++ return IRQ_HANDLED; ++} ++ ++static void make_tx_response(netif_t *netif, ++ netif_tx_request_t *txp, ++ s8 st) ++{ ++ RING_IDX i = netif->tx.rsp_prod_pvt; ++ netif_tx_response_t *resp; ++ int notify; ++ ++ resp = RING_GET_RESPONSE(&netif->tx, i); ++ resp->id = txp->id; ++ resp->status = st; ++ ++ if (txp->flags & NETTXF_extra_info) ++ RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL; ++ ++ netif->tx.rsp_prod_pvt = ++i; ++ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify); ++ if (notify) ++ notify_remote_via_irq(netif->irq); ++ ++#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER ++ if (i == netif->tx.req_cons) { ++ int more_to_do; ++ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); ++ if (more_to_do) ++ add_to_net_schedule_list_tail(netif); ++ } ++#endif ++} ++ ++static netif_rx_response_t *make_rx_response(netif_t *netif, ++ u16 id, ++ s8 st, ++ u16 offset, ++ u16 size, ++ u16 flags) ++{ ++ RING_IDX i = netif->rx.rsp_prod_pvt; ++ netif_rx_response_t *resp; ++ ++ resp = RING_GET_RESPONSE(&netif->rx, i); ++ resp->offset = offset; ++ resp->flags = flags; ++ resp->id = id; ++ resp->status = (s16)size; ++ if (st < 0) ++ resp->status = (s16)st; ++ ++ netif->rx.rsp_prod_pvt = ++i; ++ ++ return resp; ++} ++ ++#ifdef NETBE_DEBUG_INTERRUPT ++static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs) ++{ ++ struct list_head *ent; ++ netif_t *netif; ++ int i = 0; ++ ++ printk(KERN_ALERT "netif_schedule_list:\n"); ++ spin_lock_irq(&net_schedule_list_lock); ++ ++ list_for_each (ent, &net_schedule_list) { ++ netif = list_entry(ent, netif_t, list); ++ printk(KERN_ALERT " %d: private(rx_req_cons=%08x " ++ "rx_resp_prod=%08x\n", ++ i, netif->rx.req_cons, netif->rx.rsp_prod_pvt); ++ printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n", ++ netif->tx.req_cons, netif->tx.rsp_prod_pvt); ++ printk(KERN_ALERT " shared(rx_req_prod=%08x " ++ "rx_resp_prod=%08x\n", ++ netif->rx.sring->req_prod, netif->rx.sring->rsp_prod); ++ printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n", ++ netif->rx.sring->rsp_event, netif->tx.sring->req_prod); ++ printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n", ++ netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event); ++ i++; ++ } ++ ++ spin_unlock_irq(&net_schedule_list_lock); ++ printk(KERN_ALERT " ** End of netif_schedule_list **\n"); ++ ++ return IRQ_HANDLED; ++} ++#endif ++ ++static int __init netback_init(void) ++{ ++ int i; ++ struct page *page; ++ ++ if (!is_running_on_xen()) ++ return -ENODEV; ++ ++ /* We can increase reservation by this much in net_rx_action(). */ ++ balloon_update_driver_allowance(NET_RX_RING_SIZE); ++ ++ skb_queue_head_init(&rx_queue); ++ skb_queue_head_init(&tx_queue); ++ ++ init_timer(&net_timer); ++ net_timer.data = 0; ++ net_timer.function = net_alarm; ++ ++ init_timer(&netbk_tx_pending_timer); ++ netbk_tx_pending_timer.data = 0; ++ netbk_tx_pending_timer.function = netbk_tx_pending_timeout; ++ ++ mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS); ++ if (mmap_pages == NULL) { ++ printk("%s: out of memory\n", __FUNCTION__); ++ return -ENOMEM; ++ } ++ ++ for (i = 0; i < MAX_PENDING_REQS; i++) { ++ page = mmap_pages[i]; ++ SetPageForeign(page, netif_page_release); ++ netif_set_page_index(page, i); ++ INIT_LIST_HEAD(&pending_inuse[i].list); ++ } ++ ++ pending_cons = 0; ++ pending_prod = MAX_PENDING_REQS; ++ for (i = 0; i < MAX_PENDING_REQS; i++) ++ pending_ring[i] = i; ++ ++ spin_lock_init(&net_schedule_list_lock); ++ INIT_LIST_HEAD(&net_schedule_list); ++ ++ netbk_copy_skb_mode = NETBK_DONT_COPY_SKB; ++ if (MODPARM_copy_skb) { ++ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, ++ NULL, 0)) ++ netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB; ++ else ++ netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB; ++ } ++ ++ netif_accel_init(); ++ ++ netif_xenbus_init(); ++ ++#ifdef NETBE_DEBUG_INTERRUPT ++ (void)bind_virq_to_irqhandler(VIRQ_DEBUG, ++ 0, ++ netif_be_dbg, ++ SA_SHIRQ, ++ "net-be-dbg", ++ &netif_be_dbg); ++#endif ++ ++ return 0; ++} ++ ++module_init(netback_init); ++ ++MODULE_LICENSE("Dual BSD/GPL"); +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +new file mode 100644 +index 0000000..d7faeb6 +--- /dev/null ++++ b/drivers/xen/netback/xenbus.c +@@ -0,0 +1,454 @@ ++/* Xenbus code for netif backend ++ Copyright (C) 2005 Rusty Russell ++ Copyright (C) 2005 XenSource Ltd ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 2 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++*/ ++ ++#include ++#include ++#include ++#include "common.h" ++ ++#if 0 ++#undef DPRINTK ++#define DPRINTK(fmt, args...) \ ++ printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) ++#endif ++ ++ ++static int connect_rings(struct backend_info *); ++static void connect(struct backend_info *); ++static void backend_create_netif(struct backend_info *be); ++ ++static int netback_remove(struct xenbus_device *dev) ++{ ++ struct backend_info *be = dev->dev.driver_data; ++ ++ netback_remove_accelerators(be, dev); ++ ++ if (be->netif) { ++ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); ++ netif_disconnect(be->netif); ++ be->netif = NULL; ++ } ++ kfree(be); ++ dev->dev.driver_data = NULL; ++ return 0; ++} ++ ++ ++/** ++ * Entry point to this code when a new device is created. Allocate the basic ++ * structures and switch to InitWait. ++ */ ++static int netback_probe(struct xenbus_device *dev, ++ const struct xenbus_device_id *id) ++{ ++ const char *message; ++ struct xenbus_transaction xbt; ++ int err; ++ int sg; ++ struct backend_info *be = kzalloc(sizeof(struct backend_info), ++ GFP_KERNEL); ++ if (!be) { ++ xenbus_dev_fatal(dev, -ENOMEM, ++ "allocating backend structure"); ++ return -ENOMEM; ++ } ++ ++ be->dev = dev; ++ dev->dev.driver_data = be; ++ ++ sg = 1; ++ if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) ++ sg = 0; ++ ++ do { ++ err = xenbus_transaction_start(&xbt); ++ if (err) { ++ xenbus_dev_fatal(dev, err, "starting transaction"); ++ goto fail; ++ } ++ ++ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg); ++ if (err) { ++ message = "writing feature-sg"; ++ goto abort_transaction; ++ } ++ ++ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", ++ "%d", sg); ++ if (err) { ++ message = "writing feature-gso-tcpv4"; ++ goto abort_transaction; ++ } ++ ++ /* We support rx-copy path. */ ++ err = xenbus_printf(xbt, dev->nodename, ++ "feature-rx-copy", "%d", 1); ++ if (err) { ++ message = "writing feature-rx-copy"; ++ goto abort_transaction; ++ } ++ ++ /* ++ * We don't support rx-flip path (except old guests who don't ++ * grok this feature flag). ++ */ ++ err = xenbus_printf(xbt, dev->nodename, ++ "feature-rx-flip", "%d", 0); ++ if (err) { ++ message = "writing feature-rx-flip"; ++ goto abort_transaction; ++ } ++ ++ err = xenbus_transaction_end(xbt, 0); ++ } while (err == -EAGAIN); ++ ++ if (err) { ++ xenbus_dev_fatal(dev, err, "completing transaction"); ++ goto fail; ++ } ++ ++ netback_probe_accelerators(be, dev); ++ ++ err = xenbus_switch_state(dev, XenbusStateInitWait); ++ if (err) ++ goto fail; ++ ++ /* This kicks hotplug scripts, so do it immediately. */ ++ backend_create_netif(be); ++ ++ return 0; ++ ++abort_transaction: ++ xenbus_transaction_end(xbt, 1); ++ xenbus_dev_fatal(dev, err, "%s", message); ++fail: ++ DPRINTK("failed"); ++ netback_remove(dev); ++ return err; ++} ++ ++ ++/** ++ * Handle the creation of the hotplug script environment. We add the script ++ * and vif variables to the environment, for the benefit of the vif-* hotplug ++ * scripts. ++ */ ++static int netback_uevent(struct xenbus_device *xdev, char **envp, ++ int num_envp, char *buffer, int buffer_size) ++{ ++ struct backend_info *be = xdev->dev.driver_data; ++ netif_t *netif = be->netif; ++ int i = 0, length = 0; ++ char *val; ++ ++ DPRINTK("netback_uevent"); ++ ++ val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL); ++ if (IS_ERR(val)) { ++ int err = PTR_ERR(val); ++ xenbus_dev_fatal(xdev, err, "reading script"); ++ return err; ++ } ++ else { ++ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, ++ &length, "script=%s", val); ++ kfree(val); ++ } ++ ++ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, ++ "vif=%s", netif->dev->name); ++ ++ envp[i] = NULL; ++ ++ return 0; ++} ++ ++ ++static void backend_create_netif(struct backend_info *be) ++{ ++ int err; ++ long handle; ++ struct xenbus_device *dev = be->dev; ++ ++ if (be->netif != NULL) ++ return; ++ ++ err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle); ++ if (err != 1) { ++ xenbus_dev_fatal(dev, err, "reading handle"); ++ return; ++ } ++ ++ be->netif = netif_alloc(dev->otherend_id, handle); ++ if (IS_ERR(be->netif)) { ++ err = PTR_ERR(be->netif); ++ be->netif = NULL; ++ xenbus_dev_fatal(dev, err, "creating interface"); ++ return; ++ } ++ ++ kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE); ++} ++ ++ ++/** ++ * Callback received when the frontend's state changes. ++ */ ++static void frontend_changed(struct xenbus_device *dev, ++ enum xenbus_state frontend_state) ++{ ++ struct backend_info *be = dev->dev.driver_data; ++ ++ DPRINTK("%s", xenbus_strstate(frontend_state)); ++ ++ be->frontend_state = frontend_state; ++ ++ switch (frontend_state) { ++ case XenbusStateInitialising: ++ if (dev->state == XenbusStateClosed) { ++ printk(KERN_INFO "%s: %s: prepare for reconnect\n", ++ __FUNCTION__, dev->nodename); ++ xenbus_switch_state(dev, XenbusStateInitWait); ++ } ++ break; ++ ++ case XenbusStateInitialised: ++ break; ++ ++ case XenbusStateConnected: ++ if (dev->state == XenbusStateConnected) ++ break; ++ backend_create_netif(be); ++ if (be->netif) ++ connect(be); ++ break; ++ ++ case XenbusStateClosing: ++ if (be->netif) { ++ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); ++ netif_disconnect(be->netif); ++ be->netif = NULL; ++ } ++ xenbus_switch_state(dev, XenbusStateClosing); ++ break; ++ ++ case XenbusStateClosed: ++ xenbus_switch_state(dev, XenbusStateClosed); ++ if (xenbus_dev_is_online(dev)) ++ break; ++ /* fall through if not online */ ++ case XenbusStateUnknown: ++ device_unregister(&dev->dev); ++ break; ++ ++ default: ++ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", ++ frontend_state); ++ break; ++ } ++} ++ ++ ++static void xen_net_read_rate(struct xenbus_device *dev, ++ unsigned long *bytes, unsigned long *usec) ++{ ++ char *s, *e; ++ unsigned long b, u; ++ char *ratestr; ++ ++ /* Default to unlimited bandwidth. */ ++ *bytes = ~0UL; ++ *usec = 0; ++ ++ ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL); ++ if (IS_ERR(ratestr)) ++ return; ++ ++ s = ratestr; ++ b = simple_strtoul(s, &e, 10); ++ if ((s == e) || (*e != ',')) ++ goto fail; ++ ++ s = e + 1; ++ u = simple_strtoul(s, &e, 10); ++ if ((s == e) || (*e != '\0')) ++ goto fail; ++ ++ *bytes = b; ++ *usec = u; ++ ++ kfree(ratestr); ++ return; ++ ++ fail: ++ WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n"); ++ kfree(ratestr); ++} ++ ++static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) ++{ ++ char *s, *e, *macstr; ++ int i; ++ ++ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL); ++ if (IS_ERR(macstr)) ++ return PTR_ERR(macstr); ++ ++ for (i = 0; i < ETH_ALEN; i++) { ++ mac[i] = simple_strtoul(s, &e, 16); ++ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { ++ kfree(macstr); ++ return -ENOENT; ++ } ++ s = e+1; ++ } ++ ++ kfree(macstr); ++ return 0; ++} ++ ++static void connect(struct backend_info *be) ++{ ++ int err; ++ struct xenbus_device *dev = be->dev; ++ ++ err = connect_rings(be); ++ if (err) ++ return; ++ ++ err = xen_net_read_mac(dev, be->netif->fe_dev_addr); ++ if (err) { ++ xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); ++ return; ++ } ++ ++ xen_net_read_rate(dev, &be->netif->credit_bytes, ++ &be->netif->credit_usec); ++ be->netif->remaining_credit = be->netif->credit_bytes; ++ ++ xenbus_switch_state(dev, XenbusStateConnected); ++ ++ netif_wake_queue(be->netif->dev); ++} ++ ++ ++static int connect_rings(struct backend_info *be) ++{ ++ struct xenbus_device *dev = be->dev; ++ unsigned long tx_ring_ref, rx_ring_ref; ++ unsigned int evtchn, rx_copy; ++ int err; ++ int val; ++ ++ DPRINTK(""); ++ ++ err = xenbus_gather(XBT_NIL, dev->otherend, ++ "tx-ring-ref", "%lu", &tx_ring_ref, ++ "rx-ring-ref", "%lu", &rx_ring_ref, ++ "event-channel", "%u", &evtchn, NULL); ++ if (err) { ++ xenbus_dev_fatal(dev, err, ++ "reading %s/ring-ref and event-channel", ++ dev->otherend); ++ return err; ++ } ++ ++ err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u", ++ &rx_copy); ++ if (err == -ENOENT) { ++ err = 0; ++ rx_copy = 0; ++ } ++ if (err < 0) { ++ xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy", ++ dev->otherend); ++ return err; ++ } ++ be->netif->copying_receiver = !!rx_copy; ++ ++ if (be->netif->dev->tx_queue_len != 0) { ++ if (xenbus_scanf(XBT_NIL, dev->otherend, ++ "feature-rx-notify", "%d", &val) < 0) ++ val = 0; ++ if (val) ++ be->netif->can_queue = 1; ++ else ++ /* Must be non-zero for pfifo_fast to work. */ ++ be->netif->dev->tx_queue_len = 1; ++ } ++ ++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0) ++ val = 0; ++ if (val) { ++ be->netif->features |= NETIF_F_SG; ++ be->netif->dev->features |= NETIF_F_SG; ++ } ++ ++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d", ++ &val) < 0) ++ val = 0; ++ if (val) { ++ be->netif->features |= NETIF_F_TSO; ++ be->netif->dev->features |= NETIF_F_TSO; ++ } ++ ++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload", ++ "%d", &val) < 0) ++ val = 0; ++ if (val) { ++ be->netif->features &= ~NETIF_F_IP_CSUM; ++ be->netif->dev->features &= ~NETIF_F_IP_CSUM; ++ } ++ ++ /* Map the shared frame, irq etc. */ ++ err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn); ++ if (err) { ++ xenbus_dev_fatal(dev, err, ++ "mapping shared-frames %lu/%lu port %u", ++ tx_ring_ref, rx_ring_ref, evtchn); ++ return err; ++ } ++ return 0; ++} ++ ++ ++/* ** Driver Registration ** */ ++ ++ ++static const struct xenbus_device_id netback_ids[] = { ++ { "vif" }, ++ { "" } ++}; ++ ++ ++static struct xenbus_driver netback = { ++ .name = "vif", ++ .owner = THIS_MODULE, ++ .ids = netback_ids, ++ .probe = netback_probe, ++ .remove = netback_remove, ++ .uevent = netback_uevent, ++ .otherend_changed = frontend_changed, ++}; ++ ++ ++void netif_xenbus_init(void) ++{ ++ xenbus_register_backend(&netback); ++} +-- +1.7.3.4 + + +From 5b30803bf5f58ee980edd8d88a2d73dda995ee93 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Mon, 9 Feb 2009 12:05:52 -0800 +Subject: [PATCH 052/139] xen: netback: first cut at porting to upstream and cleaning up + +Signed-off-by: Ian Campbell +--- + drivers/xen/Kconfig | 2 +- + drivers/xen/netback/Makefile | 2 +- + drivers/xen/netback/common.h | 33 +++--- + drivers/xen/netback/interface.c | 37 +++--- + drivers/xen/netback/netback.c | 248 ++++++++++++++++++++++++--------------- + drivers/xen/netback/xenbus.c | 25 ++-- + 6 files changed, 201 insertions(+), 146 deletions(-) + +diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig +index 7e83d43..30290a8 100644 +--- a/drivers/xen/Kconfig ++++ b/drivers/xen/Kconfig +@@ -38,7 +38,7 @@ config XEN_BACKEND + to other virtual machines. + + config XEN_NETDEV_BACKEND +- bool "Xen backend network device" ++ tristate "Xen backend network device" + depends on XEN_BACKEND && NET + help + Implement the network backend driver, which passes packets +diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile +index f4a0c51..a01a1a3 100644 +--- a/drivers/xen/netback/Makefile ++++ b/drivers/xen/netback/Makefile +@@ -1,3 +1,3 @@ + obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o + +-netbk-y := netback.o xenbus.o interface.o ++netbk-y := netback.o xenbus.o interface.o +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 9a54d57..65b88f4 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -43,8 +43,7 @@ + #include + #include + #include +-#include +-#include ++#include + #include + + #define DPRINTK(_f, _a...) \ +@@ -55,7 +54,7 @@ + #define WPRINTK(fmt, args...) \ + printk(KERN_WARNING "xen_net: " fmt, ##args) + +-typedef struct netif_st { ++struct xen_netif { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; +@@ -70,8 +69,8 @@ typedef struct netif_st { + unsigned int irq; + + /* The shared rings and indexes. */ +- netif_tx_back_ring_t tx; +- netif_rx_back_ring_t rx; ++ struct xen_netif_tx_back_ring tx; ++ struct xen_netif_rx_back_ring rx; + struct vm_struct *tx_comms_area; + struct vm_struct *rx_comms_area; + +@@ -103,7 +102,7 @@ typedef struct netif_st { + unsigned int carrier; + + wait_queue_head_t waiting_to_free; +-} netif_t; ++}; + + /* + * Implement our own carrier flag: the network stack's version causes delays +@@ -141,7 +140,7 @@ struct netback_accelerator { + + struct backend_info { + struct xenbus_device *dev; +- netif_t *netif; ++ struct xen_netif *netif; + enum xenbus_state frontend_state; + + /* State relating to the netback accelerator */ +@@ -174,13 +173,13 @@ extern + void netif_accel_init(void); + + +-#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) +-#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) ++#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE) ++#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE) + +-void netif_disconnect(netif_t *netif); ++void netif_disconnect(struct xen_netif *netif); + +-netif_t *netif_alloc(domid_t domid, unsigned int handle); +-int netif_map(netif_t *netif, unsigned long tx_ring_ref, ++struct xen_netif *netif_alloc(domid_t domid, unsigned int handle); ++int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref, + unsigned long rx_ring_ref, unsigned int evtchn); + + #define netif_get(_b) (atomic_inc(&(_b)->refcnt)) +@@ -195,22 +194,22 @@ void netif_xenbus_init(void); + #define netif_schedulable(netif) \ + (netif_running((netif)->dev) && netback_carrier_ok(netif)) + +-void netif_schedule_work(netif_t *netif); +-void netif_deschedule_work(netif_t *netif); ++void netif_schedule_work(struct xen_netif *netif); ++void netif_deschedule_work(struct xen_netif *netif); + + int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev); + struct net_device_stats *netif_be_get_stats(struct net_device *dev); +-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs); ++irqreturn_t netif_be_int(int irq, void *dev_id); + + static inline int netbk_can_queue(struct net_device *dev) + { +- netif_t *netif = netdev_priv(dev); ++ struct xen_netif *netif = netdev_priv(dev); + return netif->can_queue; + } + + static inline int netbk_can_sg(struct net_device *dev) + { +- netif_t *netif = netdev_priv(dev); ++ struct xen_netif *netif = netdev_priv(dev); + return netif->features & NETIF_F_SG; + } + +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index 7e67941..d184ad7 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -34,6 +34,9 @@ + #include + #include + ++#include ++#include ++ + /* + * Module parameter 'queue_length': + * +@@ -51,13 +54,13 @@ + static unsigned long netbk_queue_length = 32; + module_param_named(queue_length, netbk_queue_length, ulong, 0); + +-static void __netif_up(netif_t *netif) ++static void __netif_up(struct xen_netif *netif) + { + enable_irq(netif->irq); + netif_schedule_work(netif); + } + +-static void __netif_down(netif_t *netif) ++static void __netif_down(struct xen_netif *netif) + { + disable_irq(netif->irq); + netif_deschedule_work(netif); +@@ -65,7 +68,7 @@ static void __netif_down(netif_t *netif) + + static int net_open(struct net_device *dev) + { +- netif_t *netif = netdev_priv(dev); ++ struct xen_netif *netif = netdev_priv(dev); + if (netback_carrier_ok(netif)) { + __netif_up(netif); + netif_start_queue(dev); +@@ -75,7 +78,7 @@ static int net_open(struct net_device *dev) + + static int net_close(struct net_device *dev) + { +- netif_t *netif = netdev_priv(dev); ++ struct xen_netif *netif = netdev_priv(dev); + if (netback_carrier_ok(netif)) + __netif_down(netif); + netif_stop_queue(dev); +@@ -95,7 +98,7 @@ static int netbk_change_mtu(struct net_device *dev, int mtu) + static int netbk_set_sg(struct net_device *dev, u32 data) + { + if (data) { +- netif_t *netif = netdev_priv(dev); ++ struct xen_netif *netif = netdev_priv(dev); + + if (!(netif->features & NETIF_F_SG)) + return -ENOSYS; +@@ -107,7 +110,7 @@ static int netbk_set_sg(struct net_device *dev, u32 data) + static int netbk_set_tso(struct net_device *dev, u32 data) + { + if (data) { +- netif_t *netif = netdev_priv(dev); ++ struct xen_netif *netif = netdev_priv(dev); + + if (!(netif->features & NETIF_F_TSO)) + return -ENOSYS; +@@ -127,15 +130,15 @@ static struct ethtool_ops network_ethtool_ops = + .get_link = ethtool_op_get_link, + }; + +-netif_t *netif_alloc(domid_t domid, unsigned int handle) ++struct xen_netif *netif_alloc(domid_t domid, unsigned int handle) + { + int err = 0; + struct net_device *dev; +- netif_t *netif; ++ struct xen_netif *netif; + char name[IFNAMSIZ] = {}; + + snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle); +- dev = alloc_netdev(sizeof(netif_t), name, ether_setup); ++ dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup); + if (dev == NULL) { + DPRINTK("Could not create netif: out of memory\n"); + return ERR_PTR(-ENOMEM); +@@ -194,7 +197,7 @@ netif_t *netif_alloc(domid_t domid, unsigned int handle) + } + + static int map_frontend_pages( +- netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref) ++ struct xen_netif *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref) + { + struct gnttab_map_grant_ref op; + +@@ -229,7 +232,7 @@ static int map_frontend_pages( + return 0; + } + +-static void unmap_frontend_pages(netif_t *netif) ++static void unmap_frontend_pages(struct xen_netif *netif) + { + struct gnttab_unmap_grant_ref op; + +@@ -246,12 +249,12 @@ static void unmap_frontend_pages(netif_t *netif) + BUG(); + } + +-int netif_map(netif_t *netif, unsigned long tx_ring_ref, ++int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref, + unsigned long rx_ring_ref, unsigned int evtchn) + { + int err = -ENOMEM; +- netif_tx_sring_t *txs; +- netif_rx_sring_t *rxs; ++ struct xen_netif_tx_sring *txs; ++ struct xen_netif_rx_sring *rxs; + + /* Already connected through? */ + if (netif->irq) +@@ -276,10 +279,10 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref, + netif->irq = err; + disable_irq(netif->irq); + +- txs = (netif_tx_sring_t *)netif->tx_comms_area->addr; ++ txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr; + BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE); + +- rxs = (netif_rx_sring_t *) ++ rxs = (struct xen_netif_rx_sring *) + ((char *)netif->rx_comms_area->addr); + BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE); + +@@ -303,7 +306,7 @@ err_rx: + return err; + } + +-void netif_disconnect(netif_t *netif) ++void netif_disconnect(struct xen_netif *netif) + { + if (netback_carrier_ok(netif)) { + rtnl_lock(); +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index db629d4..c959075 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -35,9 +35,17 @@ + */ + + #include "common.h" ++ ++#include ++#include ++ + #include ++#include + #include + ++#include ++#include ++ + /*define NETBE_DEBUG_INTERRUPT*/ + + struct netbk_rx_meta { +@@ -51,11 +59,12 @@ struct netbk_tx_pending_inuse { + unsigned long alloc_time; + }; + ++ + static void netif_idx_release(u16 pending_idx); +-static void make_tx_response(netif_t *netif, +- netif_tx_request_t *txp, ++static void make_tx_response(struct xen_netif *netif, ++ struct xen_netif_tx_request *txp, + s8 st); +-static netif_rx_response_t *make_rx_response(netif_t *netif, ++static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif, + u16 id, + s8 st, + u16 offset, +@@ -108,8 +117,8 @@ static inline int netif_page_index(struct page *pg) + #define PKT_PROT_LEN 64 + + static struct pending_tx_info { +- netif_tx_request_t req; +- netif_t *netif; ++ struct xen_netif_tx_request req; ++ struct xen_netif *netif; + } pending_tx_info[MAX_PENDING_REQS]; + static u16 pending_ring[MAX_PENDING_REQS]; + typedef unsigned int PEND_RING_IDX; +@@ -128,8 +137,8 @@ static LIST_HEAD(pending_inuse_head); + static struct sk_buff_head tx_queue; + + static grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; +-static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS]; +-static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS]; ++static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; ++static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS]; + + static struct list_head net_schedule_list; + static spinlock_t net_schedule_list_lock; +@@ -195,7 +204,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) + goto err; + + skb_reserve(nskb, 16 + NET_IP_ALIGN); +- headlen = nskb->end - nskb->data; ++ headlen = skb_end_pointer(nskb) - nskb->data; + if (headlen > skb_headlen(skb)) + headlen = skb_headlen(skb); + ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); +@@ -243,9 +252,9 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) + + offset = nskb->data - skb->data; + +- nskb->h.raw = skb->h.raw + offset; +- nskb->nh.raw = skb->nh.raw + offset; +- nskb->mac.raw = skb->mac.raw + offset; ++ nskb->transport_header = skb->transport_header + offset; ++ nskb->network_header = skb->network_header + offset; ++ nskb->mac_header = skb->mac_header + offset; + + return nskb; + +@@ -255,14 +264,14 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) + return NULL; + } + +-static inline int netbk_max_required_rx_slots(netif_t *netif) ++static inline int netbk_max_required_rx_slots(struct xen_netif *netif) + { + if (netif->features & (NETIF_F_SG|NETIF_F_TSO)) + return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */ + return 1; /* all in one */ + } + +-static inline int netbk_queue_full(netif_t *netif) ++static inline int netbk_queue_full(struct xen_netif *netif) + { + RING_IDX peek = netif->rx_req_cons_peek; + RING_IDX needed = netbk_max_required_rx_slots(netif); +@@ -273,14 +282,14 @@ static inline int netbk_queue_full(netif_t *netif) + + static void tx_queue_callback(unsigned long data) + { +- netif_t *netif = (netif_t *)data; ++ struct xen_netif *netif = (struct xen_netif *)data; + if (netif_schedulable(netif)) + netif_wake_queue(netif->dev); + } + + int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + { +- netif_t *netif = netdev_priv(dev); ++ struct xen_netif *netif = netdev_priv(dev); + + BUG_ON(skb->dev != dev); + +@@ -302,7 +311,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + /* Copy only the header fields we use in this driver. */ + nskb->dev = skb->dev; + nskb->ip_summed = skb->ip_summed; +- nskb->proto_data_valid = skb->proto_data_valid; + dev_kfree_skb(skb); + skb = nskb; + } +@@ -366,25 +374,25 @@ struct netrx_pending_operations { + unsigned mcl_prod, mcl_cons; + unsigned copy_prod, copy_cons; + unsigned meta_prod, meta_cons; +- mmu_update_t *mmu; +- gnttab_transfer_t *trans; +- gnttab_copy_t *copy; +- multicall_entry_t *mcl; ++ struct mmu_update *mmu; ++ struct gnttab_transfer *trans; ++ struct gnttab_copy *copy; ++ struct multicall_entry *mcl; + struct netbk_rx_meta *meta; + }; + + /* Set up the grant operations for this fragment. If it's a flipping + interface, we also set up the unmap request from here. */ +-static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta, ++static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta, + int i, struct netrx_pending_operations *npo, + struct page *page, unsigned long size, + unsigned long offset) + { +- mmu_update_t *mmu; +- gnttab_transfer_t *gop; +- gnttab_copy_t *copy_gop; +- multicall_entry_t *mcl; +- netif_rx_request_t *req; ++ struct mmu_update *mmu; ++ struct gnttab_transfer *gop; ++ struct gnttab_copy *copy_gop; ++ struct multicall_entry *mcl; ++ struct xen_netif_rx_request *req; + unsigned long old_mfn, new_mfn; + int idx = netif_page_index(page); + +@@ -426,12 +434,12 @@ static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta, + mcl = npo->mcl + npo->mcl_prod++; + MULTI_update_va_mapping(mcl, + (unsigned long)page_address(page), +- pfn_pte_ma(new_mfn, PAGE_KERNEL), ++ mfn_pte(new_mfn, PAGE_KERNEL), + 0); + + mmu = npo->mmu + npo->mmu_prod++; +- mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | +- MMU_MACHPHYS_UPDATE; ++ mmu->ptr = ((phys_addr_t)new_mfn << PAGE_SHIFT) | ++ MMU_MACHPHYS_UPDATE; + mmu->val = page_to_pfn(page); + } + +@@ -446,7 +454,7 @@ static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta, + static void netbk_gop_skb(struct sk_buff *skb, + struct netrx_pending_operations *npo) + { +- netif_t *netif = netdev_priv(skb->dev); ++ struct xen_netif *netif = netdev_priv(skb->dev); + int nr_frags = skb_shinfo(skb)->nr_frags; + int i; + int extra; +@@ -494,9 +502,9 @@ static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta) + static int netbk_check_gop(int nr_frags, domid_t domid, + struct netrx_pending_operations *npo) + { +- multicall_entry_t *mcl; +- gnttab_transfer_t *gop; +- gnttab_copy_t *copy_op; ++ struct multicall_entry *mcl; ++ struct gnttab_transfer *gop; ++ struct gnttab_copy *copy_op; + int status = NETIF_RSP_OKAY; + int i; + +@@ -534,7 +542,7 @@ static int netbk_check_gop(int nr_frags, domid_t domid, + return status; + } + +-static void netbk_add_frag_responses(netif_t *netif, int status, ++static void netbk_add_frag_responses(struct xen_netif *netif, int status, + struct netbk_rx_meta *meta, int nr_frags) + { + int i; +@@ -555,11 +563,11 @@ static void netbk_add_frag_responses(netif_t *netif, int status, + + static void net_rx_action(unsigned long unused) + { +- netif_t *netif = NULL; ++ struct xen_netif *netif = NULL; + s8 status; + u16 id, irq, flags; +- netif_rx_response_t *resp; +- multicall_entry_t *mcl; ++ struct xen_netif_rx_response *resp; ++ struct multicall_entry *mcl; + struct sk_buff_head rxq; + struct sk_buff *skb; + int notify_nr = 0; +@@ -572,10 +580,10 @@ static void net_rx_action(unsigned long unused) + * Putting hundreds of bytes on the stack is considered rude. + * Static works because a tasklet can only be on one CPU at any time. + */ +- static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3]; +- static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; +- static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE]; +- static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE]; ++ static struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3]; ++ static struct mmu_update rx_mmu[NET_RX_RING_SIZE]; ++ static struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE]; ++ static struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE]; + static unsigned char rx_notify[NR_IRQS]; + static u16 notify_list[NET_RX_RING_SIZE]; + static struct netbk_rx_meta meta[NET_RX_RING_SIZE]; +@@ -596,7 +604,7 @@ static void net_rx_action(unsigned long unused) + *(int *)skb->cb = nr_frags; + + if (!xen_feature(XENFEAT_auto_translated_physmap) && +- !((netif_t *)netdev_priv(skb->dev))->copying_receiver && ++ !((struct xen_netif *)netdev_priv(skb->dev))->copying_receiver && + check_mfn(nr_frags + 1)) { + /* Memory squeeze? Back off for an arbitrary while. */ + if ( net_ratelimit() ) +@@ -692,9 +700,10 @@ static void net_rx_action(unsigned long unused) + id = meta[npo.meta_cons].id; + flags = nr_frags ? NETRXF_more_data : 0; + +- if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ ++ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ + flags |= NETRXF_csum_blank | NETRXF_data_validated; +- else if (skb->proto_data_valid) /* remote but checksummed? */ ++ else if (skb->ip_summed == CHECKSUM_UNNECESSARY) ++ /* remote but checksummed. */ + flags |= NETRXF_data_validated; + + if (meta[npo.meta_cons].copy) +@@ -705,8 +714,8 @@ static void net_rx_action(unsigned long unused) + skb_headlen(skb), flags); + + if (meta[npo.meta_cons].frag.size) { +- struct netif_extra_info *gso = +- (struct netif_extra_info *) ++ struct xen_netif_extra_info *gso = ++ (struct xen_netif_extra_info *) + RING_GET_RESPONSE(&netif->rx, + netif->rx.rsp_prod_pvt++); + +@@ -769,16 +778,16 @@ static void netbk_tx_pending_timeout(unsigned long unused) + + struct net_device_stats *netif_be_get_stats(struct net_device *dev) + { +- netif_t *netif = netdev_priv(dev); ++ struct xen_netif *netif = netdev_priv(dev); + return &netif->stats; + } + +-static int __on_net_schedule_list(netif_t *netif) ++static int __on_net_schedule_list(struct xen_netif *netif) + { + return netif->list.next != NULL; + } + +-static void remove_from_net_schedule_list(netif_t *netif) ++static void remove_from_net_schedule_list(struct xen_netif *netif) + { + spin_lock_irq(&net_schedule_list_lock); + if (likely(__on_net_schedule_list(netif))) { +@@ -789,7 +798,7 @@ static void remove_from_net_schedule_list(netif_t *netif) + spin_unlock_irq(&net_schedule_list_lock); + } + +-static void add_to_net_schedule_list_tail(netif_t *netif) ++static void add_to_net_schedule_list_tail(struct xen_netif *netif) + { + if (__on_net_schedule_list(netif)) + return; +@@ -811,7 +820,7 @@ static void add_to_net_schedule_list_tail(netif_t *netif) + * If we may be buffer transmit buffers for any reason then we must be rather + * more conservative and treat this as the final check for pending work. + */ +-void netif_schedule_work(netif_t *netif) ++void netif_schedule_work(struct xen_netif *netif) + { + int more_to_do; + +@@ -827,13 +836,13 @@ void netif_schedule_work(netif_t *netif) + } + } + +-void netif_deschedule_work(netif_t *netif) ++void netif_deschedule_work(struct xen_netif *netif) + { + remove_from_net_schedule_list(netif); + } + + +-static void tx_add_credit(netif_t *netif) ++static void tx_add_credit(struct xen_netif *netif) + { + unsigned long max_burst, max_credit; + +@@ -855,7 +864,7 @@ static void tx_add_credit(netif_t *netif) + + static void tx_credit_callback(unsigned long data) + { +- netif_t *netif = (netif_t *)data; ++ struct xen_netif *netif = (struct xen_netif *)data; + tx_add_credit(netif); + netif_schedule_work(netif); + } +@@ -869,10 +878,10 @@ static inline int copy_pending_req(PEND_RING_IDX pending_idx) + inline static void net_tx_action_dealloc(void) + { + struct netbk_tx_pending_inuse *inuse, *n; +- gnttab_unmap_grant_ref_t *gop; ++ struct gnttab_unmap_grant_ref *gop; + u16 pending_idx; + PEND_RING_IDX dc, dp; +- netif_t *netif; ++ struct xen_netif *netif; + int ret; + LIST_HEAD(list); + +@@ -954,7 +963,7 @@ inline static void net_tx_action_dealloc(void) + } + } + +-static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end) ++static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *txp, RING_IDX end) + { + RING_IDX cons = netif->tx.req_cons; + +@@ -969,8 +978,8 @@ static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end) + netif_put(netif); + } + +-static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first, +- netif_tx_request_t *txp, int work_to_do) ++static int netbk_count_requests(struct xen_netif *netif, struct xen_netif_tx_request *first, ++ struct xen_netif_tx_request *txp, int work_to_do) + { + RING_IDX cons = netif->tx.req_cons; + int frags = 0; +@@ -1009,10 +1018,10 @@ static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first, + return frags; + } + +-static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif, ++static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif, + struct sk_buff *skb, +- netif_tx_request_t *txp, +- gnttab_map_grant_ref_t *mop) ++ struct xen_netif_tx_request *txp, ++ struct gnttab_map_grant_ref *mop) + { + struct skb_shared_info *shinfo = skb_shinfo(skb); + skb_frag_t *frags = shinfo->frags; +@@ -1039,12 +1048,12 @@ static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif, + } + + static int netbk_tx_check_mop(struct sk_buff *skb, +- gnttab_map_grant_ref_t **mopp) ++ struct gnttab_map_grant_ref **mopp) + { +- gnttab_map_grant_ref_t *mop = *mopp; ++ struct gnttab_map_grant_ref *mop = *mopp; + int pending_idx = *((u16 *)skb->data); +- netif_t *netif = pending_tx_info[pending_idx].netif; +- netif_tx_request_t *txp; ++ struct xen_netif *netif = pending_tx_info[pending_idx].netif; ++ struct xen_netif_tx_request *txp; + struct skb_shared_info *shinfo = skb_shinfo(skb); + int nr_frags = shinfo->nr_frags; + int i, err, start; +@@ -1118,7 +1127,7 @@ static void netbk_fill_frags(struct sk_buff *skb) + + for (i = 0; i < nr_frags; i++) { + skb_frag_t *frag = shinfo->frags + i; +- netif_tx_request_t *txp; ++ struct xen_netif_tx_request *txp; + unsigned long pending_idx; + + pending_idx = (unsigned long)frag->page; +@@ -1138,10 +1147,10 @@ static void netbk_fill_frags(struct sk_buff *skb) + } + } + +-int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras, ++int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extras, + int work_to_do) + { +- struct netif_extra_info extra; ++ struct xen_netif_extra_info extra; + RING_IDX cons = netif->tx.req_cons; + + do { +@@ -1166,7 +1175,7 @@ int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras, + return work_to_do; + } + +-static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso) ++static int netbk_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *gso) + { + if (!gso->u.gso.size) { + DPRINTK("GSO size must not be zero.\n"); +@@ -1189,18 +1198,57 @@ static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso) + return 0; + } + ++static int skb_checksum_setup(struct sk_buff *skb) ++{ ++ struct iphdr *iph; ++ unsigned char *th; ++ int err = -EPROTO; ++ ++ if (skb->protocol != htons(ETH_P_IP)) ++ goto out; ++ ++ iph = (void *)skb->data; ++ th = skb->data + 4 * iph->ihl; ++ if (th >= skb_tail_pointer(skb)) ++ goto out; ++ ++ skb->csum_start = th - skb->head; ++ switch (iph->protocol) { ++ case IPPROTO_TCP: ++ skb->csum_offset = offsetof(struct tcphdr, check); ++ break; ++ case IPPROTO_UDP: ++ skb->csum_offset = offsetof(struct udphdr, check); ++ break; ++ default: ++ if (net_ratelimit()) ++ printk(KERN_ERR "Attempting to checksum a non-" ++ "TCP/UDP packet, dropping a protocol" ++ " %d packet", iph->protocol); ++ goto out; ++ } ++ ++ if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) ++ goto out; ++ ++ err = 0; ++ ++out: ++ return err; ++} ++ + /* Called after netfront has transmitted */ + static void net_tx_action(unsigned long unused) + { + struct list_head *ent; + struct sk_buff *skb; +- netif_t *netif; +- netif_tx_request_t txreq; +- netif_tx_request_t txfrags[MAX_SKB_FRAGS]; +- struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; ++ struct xen_netif *netif; ++ struct xen_netif_tx_request txreq; ++ struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; ++ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; + u16 pending_idx; + RING_IDX i; +- gnttab_map_grant_ref_t *mop; ++ struct gnttab_map_grant_ref *mop; + unsigned int data_len; + int ret, work_to_do; + +@@ -1212,7 +1260,7 @@ static void net_tx_action(unsigned long unused) + !list_empty(&net_schedule_list)) { + /* Get a netif from the list with work to do. */ + ent = net_schedule_list.next; +- netif = list_entry(ent, netif_t, list); ++ netif = list_entry(ent, struct xen_netif, list); + netif_get(netif); + remove_from_net_schedule_list(netif); + +@@ -1313,7 +1361,7 @@ static void net_tx_action(unsigned long unused) + skb_reserve(skb, 16 + NET_IP_ALIGN); + + if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { +- struct netif_extra_info *gso; ++ struct xen_netif_extra_info *gso; + gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; + + if (netbk_set_skb_gso(skb, gso)) { +@@ -1372,7 +1420,7 @@ static void net_tx_action(unsigned long unused) + + mop = tx_map_ops; + while ((skb = __skb_dequeue(&tx_queue)) != NULL) { +- netif_tx_request_t *txp; ++ struct xen_netif_tx_request *txp; + + pending_idx = *((u16 *)skb->data); + netif = pending_tx_info[pending_idx].netif; +@@ -1403,14 +1451,10 @@ static void net_tx_action(unsigned long unused) + * Old frontends do not assert data_validated but we + * can infer it from csum_blank so test both flags. + */ +- if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) { +- skb->ip_summed = CHECKSUM_UNNECESSARY; +- skb->proto_data_valid = 1; +- } else { ++ if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) ++ skb->ip_summed = CHECKSUM_PARTIAL; ++ else + skb->ip_summed = CHECKSUM_NONE; +- skb->proto_data_valid = 0; +- } +- skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank); + + netbk_fill_frags(skb); + +@@ -1420,6 +1464,14 @@ static void net_tx_action(unsigned long unused) + netif->stats.rx_bytes += skb->len; + netif->stats.rx_packets++; + ++ if (skb->ip_summed == CHECKSUM_PARTIAL) { ++ if (skb_checksum_setup(skb)) { ++ DPRINTK("Can't setup checksum in net_tx_action\n"); ++ kfree_skb(skb); ++ continue; ++ } ++ } ++ + if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) && + unlikely(skb_linearize(skb))) { + DPRINTK("Can't linearize skb in net_tx_action.\n"); +@@ -1464,9 +1516,9 @@ static void netif_page_release(struct page *page, unsigned int order) + netif_idx_release(idx); + } + +-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs) ++irqreturn_t netif_be_int(int irq, void *dev_id) + { +- netif_t *netif = dev_id; ++ struct xen_netif *netif = dev_id; + + add_to_net_schedule_list_tail(netif); + maybe_schedule_tx_action(); +@@ -1477,12 +1529,12 @@ irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs) + return IRQ_HANDLED; + } + +-static void make_tx_response(netif_t *netif, +- netif_tx_request_t *txp, ++static void make_tx_response(struct xen_netif *netif, ++ struct xen_netif_tx_request *txp, + s8 st) + { + RING_IDX i = netif->tx.rsp_prod_pvt; +- netif_tx_response_t *resp; ++ struct xen_netif_tx_response *resp; + int notify; + + resp = RING_GET_RESPONSE(&netif->tx, i); +@@ -1507,7 +1559,7 @@ static void make_tx_response(netif_t *netif, + #endif + } + +-static netif_rx_response_t *make_rx_response(netif_t *netif, ++static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif, + u16 id, + s8 st, + u16 offset, +@@ -1515,7 +1567,7 @@ static netif_rx_response_t *make_rx_response(netif_t *netif, + u16 flags) + { + RING_IDX i = netif->rx.rsp_prod_pvt; +- netif_rx_response_t *resp; ++ struct xen_netif_rx_response *resp; + + resp = RING_GET_RESPONSE(&netif->rx, i); + resp->offset = offset; +@@ -1534,14 +1586,14 @@ static netif_rx_response_t *make_rx_response(netif_t *netif, + static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs) + { + struct list_head *ent; +- netif_t *netif; ++ struct xen_netif *netif; + int i = 0; + + printk(KERN_ALERT "netif_schedule_list:\n"); + spin_lock_irq(&net_schedule_list_lock); + + list_for_each (ent, &net_schedule_list) { +- netif = list_entry(ent, netif_t, list); ++ netif = list_entry(ent, struct xen_netif, list); + printk(KERN_ALERT " %d: private(rx_req_cons=%08x " + "rx_resp_prod=%08x\n", + i, netif->rx.req_cons, netif->rx.rsp_prod_pvt); +@@ -1569,11 +1621,13 @@ static int __init netback_init(void) + int i; + struct page *page; + +- if (!is_running_on_xen()) ++ printk(KERN_CRIT "*** netif_init\n"); ++ ++ if (!xen_domain()) + return -ENODEV; + + /* We can increase reservation by this much in net_rx_action(). */ +- balloon_update_driver_allowance(NET_RX_RING_SIZE); ++// balloon_update_driver_allowance(NET_RX_RING_SIZE); + + skb_queue_head_init(&rx_queue); + skb_queue_head_init(&tx_queue); +@@ -1616,7 +1670,7 @@ static int __init netback_init(void) + netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB; + } + +- netif_accel_init(); ++ //netif_accel_init(); + + netif_xenbus_init(); + +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index d7faeb6..ed7c006 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -37,7 +37,7 @@ static int netback_remove(struct xenbus_device *dev) + { + struct backend_info *be = dev->dev.driver_data; + +- netback_remove_accelerators(be, dev); ++ //netback_remove_accelerators(be, dev); + + if (be->netif) { + kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); +@@ -123,7 +123,7 @@ static int netback_probe(struct xenbus_device *dev, + goto fail; + } + +- netback_probe_accelerators(be, dev); ++ //netback_probe_accelerators(be, dev); + + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) +@@ -149,12 +149,10 @@ fail: + * and vif variables to the environment, for the benefit of the vif-* hotplug + * scripts. + */ +-static int netback_uevent(struct xenbus_device *xdev, char **envp, +- int num_envp, char *buffer, int buffer_size) ++static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env) + { + struct backend_info *be = xdev->dev.driver_data; +- netif_t *netif = be->netif; +- int i = 0, length = 0; ++ struct xen_netif *netif = be->netif; + char *val; + + DPRINTK("netback_uevent"); +@@ -166,15 +164,15 @@ static int netback_uevent(struct xenbus_device *xdev, char **envp, + return err; + } + else { +- add_uevent_var(envp, num_envp, &i, buffer, buffer_size, +- &length, "script=%s", val); ++ if (add_uevent_var(env, "script=%s", val)) { ++ kfree(val); ++ return -ENOMEM; ++ } + kfree(val); + } + +- add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, +- "vif=%s", netif->dev->name); +- +- envp[i] = NULL; ++ if (add_uevent_var(env, "vif=%s", netif->dev->name)) ++ return -ENOMEM; + + return 0; + } +@@ -450,5 +448,6 @@ static struct xenbus_driver netback = { + + void netif_xenbus_init(void) + { +- xenbus_register_backend(&netback); ++ printk(KERN_CRIT "registering netback\n"); ++ (void)xenbus_register_backend(&netback); + } +-- +1.7.3.4 + + +From a41a2ab9e1ac4ef8320f69f2719e973e25faff5c Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Mon, 9 Feb 2009 16:39:01 -0800 +Subject: [PATCH 053/139] xen: netback: don't include xen/evtchn.h + +Its a usermode header for users of /dev/evtchn + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/common.h | 1 - + 1 files changed, 0 insertions(+), 1 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 65b88f4..5665ed1 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -38,7 +38,6 @@ + #include + #include + #include +-#include + #include + #include + #include +-- +1.7.3.4 + + +From f28a7c6148bb979acf99c0cbe3b441d0fb0853d9 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Wed, 18 Feb 2009 15:55:18 -0800 +Subject: [PATCH 054/139] xen: netback: use mod_timer + +__mod_timer is no longer a public API. + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 4 ++-- + 1 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index c959075..e920703 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -334,7 +334,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + */ + netif->tx_queue_timeout.data = (unsigned long)netif; + netif->tx_queue_timeout.function = tx_queue_callback; +- __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2); ++ mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2); + } + } + +@@ -1299,7 +1299,7 @@ static void net_tx_action(unsigned long unused) + (unsigned long)netif; + netif->credit_timeout.function = + tx_credit_callback; +- __mod_timer(&netif->credit_timeout, ++ mod_timer(&netif->credit_timeout, + next_credit); + netif_put(netif); + continue; +-- +1.7.3.4 + + +From 52f97ad360f28762c785343ba5c9f8abb83536f3 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Fri, 6 Mar 2009 08:29:31 +0000 +Subject: [PATCH 055/139] xen: netback: unmap tx ring gref when mapping of rx ring gref failed + +[ijc-ported from linux-2.6.18-xen.hg 782:51decc39e5e7] +Signed-off-by: Jan Beulich +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/interface.c | 6 ++++++ + 1 files changed, 6 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index d184ad7..f3d9ea1 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -222,6 +222,12 @@ static int map_frontend_pages( + BUG(); + + if (op.status) { ++ struct gnttab_unmap_grant_ref unop; ++ ++ gnttab_set_unmap_op(&unop, ++ (unsigned long)netif->tx_comms_area->addr, ++ GNTMAP_host_map, netif->tx_shmem_handle); ++ HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1); + DPRINTK(" Gnttab failure mapping rx_ring_ref!\n"); + return op.status; + } +-- +1.7.3.4 + + +From f9b63790f1404eb03ac824147b2294a46e485643 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Fri, 6 Mar 2009 08:29:32 +0000 +Subject: [PATCH 056/139] xen: netback: add ethtool stat to track copied skbs. + +Copied skbs should be rare but we have no way of verifying that. + +[ijc-ported from linux-2.6.18-xen.hg 792:db9857bb0320] +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/common.h | 3 ++ + drivers/xen/netback/interface.c | 47 +++++++++++++++++++++++++++++++++++++++ + drivers/xen/netback/netback.c | 6 ++++- + 3 files changed, 55 insertions(+), 1 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 5665ed1..6ba804d 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -92,6 +92,9 @@ struct xen_netif { + /* Enforce draining of the transmit queue. */ + struct timer_list tx_queue_timeout; + ++ /* Statistics */ ++ int nr_copied_skbs; ++ + /* Miscellaneous private stuff. */ + struct list_head list; /* scheduling list */ + atomic_t refcnt; +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index f3d9ea1..1a99c87 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -119,8 +119,51 @@ static int netbk_set_tso(struct net_device *dev, u32 data) + return ethtool_op_set_tso(dev, data); + } + ++static void netbk_get_drvinfo(struct net_device *dev, ++ struct ethtool_drvinfo *info) ++{ ++ strcpy(info->driver, "netbk"); ++} ++ ++static const struct netif_stat { ++ char name[ETH_GSTRING_LEN]; ++ u16 offset; ++} netbk_stats[] = { ++ { "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) }, ++}; ++ ++static int netbk_get_stats_count(struct net_device *dev) ++{ ++ return ARRAY_SIZE(netbk_stats); ++} ++ ++static void netbk_get_ethtool_stats(struct net_device *dev, ++ struct ethtool_stats *stats, u64 * data) ++{ ++ void *netif = netdev_priv(dev); ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(netbk_stats); i++) ++ data[i] = *(int *)(netif + netbk_stats[i].offset); ++} ++ ++static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data) ++{ ++ int i; ++ ++ switch (stringset) { ++ case ETH_SS_STATS: ++ for (i = 0; i < ARRAY_SIZE(netbk_stats); i++) ++ memcpy(data + i * ETH_GSTRING_LEN, ++ netbk_stats[i].name, ETH_GSTRING_LEN); ++ break; ++ } ++} ++ + static struct ethtool_ops network_ethtool_ops = + { ++ .get_drvinfo = netbk_get_drvinfo, ++ + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = ethtool_op_set_tx_csum, + .get_sg = ethtool_op_get_sg, +@@ -128,6 +171,10 @@ static struct ethtool_ops network_ethtool_ops = + .get_tso = ethtool_op_get_tso, + .set_tso = netbk_set_tso, + .get_link = ethtool_op_get_link, ++ ++ .get_stats_count = netbk_get_stats_count, ++ .get_ethtool_stats = netbk_get_ethtool_stats, ++ .get_strings = netbk_get_strings, + }; + + struct xen_netif *netif_alloc(domid_t domid, unsigned int handle) +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index e920703..f59fadb 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -923,7 +923,11 @@ inline static void net_tx_action_dealloc(void) + if (time_after(inuse->alloc_time + HZ / 2, jiffies)) + break; + +- switch (copy_pending_req(inuse - pending_inuse)) { ++ pending_idx = inuse - pending_inuse; ++ ++ pending_tx_info[pending_idx].netif->nr_copied_skbs++; ++ ++ switch (copy_pending_req(pending_idx)) { + case 0: + list_move_tail(&inuse->list, &list); + continue; +-- +1.7.3.4 + + +From c41d8da3d853d4e89ba38693b90c1fe512095704 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Fri, 6 Mar 2009 08:29:33 +0000 +Subject: [PATCH 057/139] xen: netback: make queue length parameter writeable in sysfs + +Any changes will only take effect for newly created VIFs. + +Also hook up the vif devices to their parent and publish bus info via +ethtool. + +[ijc-ported from linux-2.6.18-xen.hg 793:3aa9b8a7876b] +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/interface.c | 3 ++- + drivers/xen/netback/xenbus.c | 1 + + 2 files changed, 3 insertions(+), 1 deletions(-) + +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index 1a99c87..7706170 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -52,7 +52,7 @@ + * blocked. + */ + static unsigned long netbk_queue_length = 32; +-module_param_named(queue_length, netbk_queue_length, ulong, 0); ++module_param_named(queue_length, netbk_queue_length, ulong, 0644); + + static void __netif_up(struct xen_netif *netif) + { +@@ -123,6 +123,7 @@ static void netbk_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) + { + strcpy(info->driver, "netbk"); ++ strcpy(info->bus_info, dev->dev.parent->bus_id); + } + + static const struct netif_stat { +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index ed7c006..dc7b367 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -200,6 +200,7 @@ static void backend_create_netif(struct backend_info *be) + xenbus_dev_fatal(dev, err, "creating interface"); + return; + } ++ SET_NETDEV_DEV(be->netif->dev, &dev->dev); + + kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE); + } +-- +1.7.3.4 + + +From f204d7567ab11ddb1ff3208ab5ed8921b575af5d Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Mon, 16 Mar 2009 22:05:16 +0000 +Subject: [PATCH 058/139] xen: netback: parent sysfs device should be set before registering. + +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/common.h | 2 +- + drivers/xen/netback/interface.c | 4 +++- + drivers/xen/netback/xenbus.c | 3 +-- + 3 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 6ba804d..123a169 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -180,7 +180,7 @@ void netif_accel_init(void); + + void netif_disconnect(struct xen_netif *netif); + +-struct xen_netif *netif_alloc(domid_t domid, unsigned int handle); ++struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle); + int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref, + unsigned long rx_ring_ref, unsigned int evtchn); + +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index 7706170..5e0d26d 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -178,7 +178,7 @@ static struct ethtool_ops network_ethtool_ops = + .get_strings = netbk_get_strings, + }; + +-struct xen_netif *netif_alloc(domid_t domid, unsigned int handle) ++struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle) + { + int err = 0; + struct net_device *dev; +@@ -192,6 +192,8 @@ struct xen_netif *netif_alloc(domid_t domid, unsigned int handle) + return ERR_PTR(-ENOMEM); + } + ++ SET_NETDEV_DEV(dev, parent); ++ + netif = netdev_priv(dev); + memset(netif, 0, sizeof(*netif)); + netif->domid = domid; +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index dc7b367..749931e 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -193,14 +193,13 @@ static void backend_create_netif(struct backend_info *be) + return; + } + +- be->netif = netif_alloc(dev->otherend_id, handle); ++ be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle); + if (IS_ERR(be->netif)) { + err = PTR_ERR(be->netif); + be->netif = NULL; + xenbus_dev_fatal(dev, err, "creating interface"); + return; + } +- SET_NETDEV_DEV(be->netif->dev, &dev->dev); + + kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE); + } +-- +1.7.3.4 + + +From bb606178665ea78b505cb54864899478b6020584 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Thu, 19 Mar 2009 12:42:36 -0700 +Subject: [PATCH 059/139] xen: netback: use NET_SKB_PAD rather than "16" + +There's a constant for the default skb headroom. + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 6 +++--- + 1 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index f59fadb..400f398 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -203,7 +203,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) + if (unlikely(!nskb)) + goto err; + +- skb_reserve(nskb, 16 + NET_IP_ALIGN); ++ skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN); + headlen = skb_end_pointer(nskb) - nskb->data; + if (headlen > skb_headlen(skb)) + headlen = skb_headlen(skb); +@@ -1353,7 +1353,7 @@ static void net_tx_action(unsigned long unused) + ret < MAX_SKB_FRAGS) ? + PKT_PROT_LEN : txreq.size; + +- skb = alloc_skb(data_len + 16 + NET_IP_ALIGN, ++ skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(skb == NULL)) { + DPRINTK("Can't allocate a skb in start_xmit.\n"); +@@ -1362,7 +1362,7 @@ static void net_tx_action(unsigned long unused) + } + + /* Packets passed to netif_rx() must have some headroom. */ +- skb_reserve(skb, 16 + NET_IP_ALIGN); ++ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); + + if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { + struct xen_netif_extra_info *gso; +-- +1.7.3.4 + + +From fe41ab031dfa0c6f9821c2667ce821e7f4f635ed Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Thu, 19 Mar 2009 13:31:26 -0700 +Subject: [PATCH 060/139] xen: netback: completely drop flip support + +Nobody uses it? + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/common.h | 1 - + drivers/xen/netback/netback.c | 245 ++++------------------------------------- + drivers/xen/netback/xenbus.c | 3 +- + 3 files changed, 22 insertions(+), 227 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 123a169..06f04c1 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -78,7 +78,6 @@ struct xen_netif { + + /* Internal feature information. */ + u8 can_queue:1; /* can queue packets for receiver? */ +- u8 copying_receiver:1; /* copy packets to receiver? */ + + /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */ + RING_IDX rx_req_cons_peek; +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 400f398..36bea2b 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -51,7 +51,6 @@ + struct netbk_rx_meta { + skb_frag_t frag; + int id; +- u8 copy:1; + }; + + struct netbk_tx_pending_inuse { +@@ -160,26 +159,6 @@ static inline unsigned long alloc_mfn(void) + return mfn_list[--alloc_index]; + } + +-static int check_mfn(int nr) +-{ +- struct xen_memory_reservation reservation = { +- .extent_order = 0, +- .domid = DOMID_SELF +- }; +- int rc; +- +- if (likely(alloc_index >= nr)) +- return 0; +- +- set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index); +- reservation.nr_extents = MAX_MFN_ALLOC - alloc_index; +- rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation); +- if (likely(rc > 0)) +- alloc_index += rc; +- +- return alloc_index >= nr ? 0 : -ENOMEM; +-} +- + static inline void maybe_schedule_tx_action(void) + { + smp_mb(); +@@ -188,82 +167,6 @@ static inline void maybe_schedule_tx_action(void) + tasklet_schedule(&net_tx_tasklet); + } + +-static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) +-{ +- struct skb_shared_info *ninfo; +- struct sk_buff *nskb; +- unsigned long offset; +- int ret; +- int len; +- int headlen; +- +- BUG_ON(skb_shinfo(skb)->frag_list != NULL); +- +- nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN); +- if (unlikely(!nskb)) +- goto err; +- +- skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN); +- headlen = skb_end_pointer(nskb) - nskb->data; +- if (headlen > skb_headlen(skb)) +- headlen = skb_headlen(skb); +- ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); +- BUG_ON(ret); +- +- ninfo = skb_shinfo(nskb); +- ninfo->gso_size = skb_shinfo(skb)->gso_size; +- ninfo->gso_type = skb_shinfo(skb)->gso_type; +- +- offset = headlen; +- len = skb->len - headlen; +- +- nskb->len = skb->len; +- nskb->data_len = len; +- nskb->truesize += len; +- +- while (len) { +- struct page *page; +- int copy; +- int zero; +- +- if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) { +- dump_stack(); +- goto err_free; +- } +- +- copy = len >= PAGE_SIZE ? PAGE_SIZE : len; +- zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO; +- +- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero); +- if (unlikely(!page)) +- goto err_free; +- +- ret = skb_copy_bits(skb, offset, page_address(page), copy); +- BUG_ON(ret); +- +- ninfo->frags[ninfo->nr_frags].page = page; +- ninfo->frags[ninfo->nr_frags].page_offset = 0; +- ninfo->frags[ninfo->nr_frags].size = copy; +- ninfo->nr_frags++; +- +- offset += copy; +- len -= copy; +- } +- +- offset = nskb->data - skb->data; +- +- nskb->transport_header = skb->transport_header + offset; +- nskb->network_header = skb->network_header + offset; +- nskb->mac_header = skb->mac_header + offset; +- +- return nskb; +- +- err_free: +- kfree_skb(nskb); +- err: +- return NULL; +-} +- + static inline int netbk_max_required_rx_slots(struct xen_netif *netif) + { + if (netif->features & (NETIF_F_SG|NETIF_F_TSO)) +@@ -297,24 +200,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif))) + goto drop; + +- /* +- * Copy the packet here if it's destined for a flipping interface +- * but isn't flippable (e.g. extra references to data). +- * XXX For now we also copy skbuffs whose head crosses a page +- * boundary, because netbk_gop_skb can't handle them. +- */ +- if (!netif->copying_receiver || +- ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) { +- struct sk_buff *nskb = netbk_copy_skb(skb); +- if ( unlikely(nskb == NULL) ) +- goto drop; +- /* Copy only the header fields we use in this driver. */ +- nskb->dev = skb->dev; +- nskb->ip_summed = skb->ip_summed; +- dev_kfree_skb(skb); +- skb = nskb; +- } +- + netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 + + !!skb_shinfo(skb)->gso_size; + netif_get(netif); +@@ -388,66 +273,32 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta, + struct page *page, unsigned long size, + unsigned long offset) + { +- struct mmu_update *mmu; +- struct gnttab_transfer *gop; + struct gnttab_copy *copy_gop; +- struct multicall_entry *mcl; + struct xen_netif_rx_request *req; +- unsigned long old_mfn, new_mfn; ++ unsigned long old_mfn; + int idx = netif_page_index(page); + + old_mfn = virt_to_mfn(page_address(page)); + + req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i); +- if (netif->copying_receiver) { +- /* The fragment needs to be copied rather than +- flipped. */ +- meta->copy = 1; +- copy_gop = npo->copy + npo->copy_prod++; +- copy_gop->flags = GNTCOPY_dest_gref; +- if (idx > -1) { +- struct pending_tx_info *src_pend = &pending_tx_info[idx]; +- copy_gop->source.domid = src_pend->netif->domid; +- copy_gop->source.u.ref = src_pend->req.gref; +- copy_gop->flags |= GNTCOPY_source_gref; +- } else { +- copy_gop->source.domid = DOMID_SELF; +- copy_gop->source.u.gmfn = old_mfn; +- } +- copy_gop->source.offset = offset; +- copy_gop->dest.domid = netif->domid; +- copy_gop->dest.offset = 0; +- copy_gop->dest.u.ref = req->gref; +- copy_gop->len = size; +- } else { +- meta->copy = 0; +- if (!xen_feature(XENFEAT_auto_translated_physmap)) { +- new_mfn = alloc_mfn(); +- +- /* +- * Set the new P2M table entry before +- * reassigning the old data page. Heed the +- * comment in pgtable-2level.h:pte_page(). :-) +- */ +- set_phys_to_machine(page_to_pfn(page), new_mfn); +- +- mcl = npo->mcl + npo->mcl_prod++; +- MULTI_update_va_mapping(mcl, +- (unsigned long)page_address(page), +- mfn_pte(new_mfn, PAGE_KERNEL), +- 0); +- +- mmu = npo->mmu + npo->mmu_prod++; +- mmu->ptr = ((phys_addr_t)new_mfn << PAGE_SHIFT) | +- MMU_MACHPHYS_UPDATE; +- mmu->val = page_to_pfn(page); +- } + +- gop = npo->trans + npo->trans_prod++; +- gop->mfn = old_mfn; +- gop->domid = netif->domid; +- gop->ref = req->gref; ++ copy_gop = npo->copy + npo->copy_prod++; ++ copy_gop->flags = GNTCOPY_dest_gref; ++ if (idx > -1) { ++ struct pending_tx_info *src_pend = &pending_tx_info[idx]; ++ copy_gop->source.domid = src_pend->netif->domid; ++ copy_gop->source.u.ref = src_pend->req.gref; ++ copy_gop->flags |= GNTCOPY_source_gref; ++ } else { ++ copy_gop->source.domid = DOMID_SELF; ++ copy_gop->source.u.gmfn = old_mfn; + } ++ copy_gop->source.offset = offset; ++ copy_gop->dest.domid = netif->domid; ++ copy_gop->dest.offset = 0; ++ copy_gop->dest.u.ref = req->gref; ++ copy_gop->len = size; ++ + return req->id; + } + +@@ -502,41 +353,17 @@ static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta) + static int netbk_check_gop(int nr_frags, domid_t domid, + struct netrx_pending_operations *npo) + { +- struct multicall_entry *mcl; +- struct gnttab_transfer *gop; + struct gnttab_copy *copy_op; + int status = NETIF_RSP_OKAY; + int i; + + for (i = 0; i <= nr_frags; i++) { +- if (npo->meta[npo->meta_cons + i].copy) { + copy_op = npo->copy + npo->copy_cons++; + if (copy_op->status != GNTST_okay) { + DPRINTK("Bad status %d from copy to DOM%d.\n", + copy_op->status, domid); + status = NETIF_RSP_ERROR; + } +- } else { +- if (!xen_feature(XENFEAT_auto_translated_physmap)) { +- mcl = npo->mcl + npo->mcl_cons++; +- /* The update_va_mapping() must not fail. */ +- BUG_ON(mcl->result != 0); +- } +- +- gop = npo->trans + npo->trans_cons++; +- /* Check the reassignment error code. */ +- if (gop->status != 0) { +- DPRINTK("Bad status %d from grant transfer to DOM%u\n", +- gop->status, domid); +- /* +- * Page no longer belongs to us unless +- * GNTST_bad_page, but that should be +- * a fatal error anyway. +- */ +- BUG_ON(gop->status == GNTST_bad_page); +- status = NETIF_RSP_ERROR; +- } +- } + } + + return status; +@@ -551,11 +378,8 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status, + for (i = 0; i < nr_frags; i++) { + int id = meta[i].id; + int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data; +- +- if (meta[i].copy) +- offset = 0; +- else +- offset = meta[i].frag.page_offset; ++ ++ offset = 0; + make_rx_response(netif, id, status, offset, + meta[i].frag.size, flags); + } +@@ -603,18 +427,6 @@ static void net_rx_action(unsigned long unused) + nr_frags = skb_shinfo(skb)->nr_frags; + *(int *)skb->cb = nr_frags; + +- if (!xen_feature(XENFEAT_auto_translated_physmap) && +- !((struct xen_netif *)netdev_priv(skb->dev))->copying_receiver && +- check_mfn(nr_frags + 1)) { +- /* Memory squeeze? Back off for an arbitrary while. */ +- if ( net_ratelimit() ) +- WPRINTK("Memory squeeze in netback " +- "driver.\n"); +- mod_timer(&net_timer, jiffies + HZ); +- skb_queue_head(&rx_queue, skb); +- break; +- } +- + netbk_gop_skb(skb, &npo); + + count += nr_frags + 1; +@@ -677,20 +489,6 @@ static void net_rx_action(unsigned long unused) + nr_frags = *(int *)skb->cb; + + netif = netdev_priv(skb->dev); +- /* We can't rely on skb_release_data to release the +- pages used by fragments for us, since it tries to +- touch the pages in the fraglist. If we're in +- flipping mode, that doesn't work. In copying mode, +- we still have access to all of the pages, and so +- it's safe to let release_data deal with it. */ +- /* (Freeing the fragments is safe since we copy +- non-linear skbs destined for flipping interfaces) */ +- if (!netif->copying_receiver) { +- atomic_set(&(skb_shinfo(skb)->dataref), 1); +- skb_shinfo(skb)->frag_list = NULL; +- skb_shinfo(skb)->nr_frags = 0; +- netbk_free_pages(nr_frags, meta + npo.meta_cons + 1); +- } + + netif->stats.tx_bytes += skb->len; + netif->stats.tx_packets++; +@@ -706,10 +504,7 @@ static void net_rx_action(unsigned long unused) + /* remote but checksummed. */ + flags |= NETRXF_data_validated; + +- if (meta[npo.meta_cons].copy) +- offset = 0; +- else +- offset = offset_in_page(skb->data); ++ offset = 0; + resp = make_rx_response(netif, id, status, offset, + skb_headlen(skb), flags); + +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index 749931e..a492288 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -378,7 +378,8 @@ static int connect_rings(struct backend_info *be) + dev->otherend); + return err; + } +- be->netif->copying_receiver = !!rx_copy; ++ if (!rx_copy) ++ return -EOPNOTSUPP; + + if (be->netif->dev->tx_queue_len != 0) { + if (xenbus_scanf(XBT_NIL, dev->otherend, +-- +1.7.3.4 + + +From 17d465234118873ab4f5a7992feb4ce7b5537cf7 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Thu, 19 Mar 2009 15:19:39 -0700 +Subject: [PATCH 061/139] xen: netback: demacro MASK_PEND_IDX + +Replace it with a more meaningful inline: pending_index(). + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 21 +++++++++++++-------- + 1 files changed, 13 insertions(+), 8 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 36bea2b..4095622 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -121,7 +121,12 @@ static struct pending_tx_info { + } pending_tx_info[MAX_PENDING_REQS]; + static u16 pending_ring[MAX_PENDING_REQS]; + typedef unsigned int PEND_RING_IDX; +-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) ++ ++static inline PEND_RING_IDX pending_index(unsigned i) ++{ ++ return i & (MAX_PENDING_REQS-1); ++} ++ + static PEND_RING_IDX pending_prod, pending_cons; + #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) + +@@ -695,7 +700,7 @@ inline static void net_tx_action_dealloc(void) + while (dc != dp) { + unsigned long pfn; + +- pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; ++ pending_idx = dealloc_ring[pending_index(dc++)]; + list_move_tail(&pending_inuse[pending_idx].list, &list); + + pfn = idx_to_pfn(pending_idx); +@@ -754,7 +759,7 @@ inline static void net_tx_action_dealloc(void) + /* Ready for next use. */ + gnttab_reset_grant_page(mmap_pages[pending_idx]); + +- pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; ++ pending_ring[pending_index(pending_prod++)] = pending_idx; + + netif_put(netif); + +@@ -831,7 +836,7 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif, + start = ((unsigned long)shinfo->frags[0].page == pending_idx); + + for (i = start; i < shinfo->nr_frags; i++, txp++) { +- pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)]; ++ pending_idx = pending_ring[pending_index(pending_cons++)]; + + gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx), + GNTMAP_host_map | GNTMAP_readonly, +@@ -862,7 +867,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb, + if (unlikely(err)) { + txp = &pending_tx_info[pending_idx].req; + make_tx_response(netif, txp, NETIF_RSP_ERROR); +- pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; ++ pending_ring[pending_index(pending_prod++)] = pending_idx; + netif_put(netif); + } else { + set_phys_to_machine( +@@ -895,7 +900,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb, + /* Error on this fragment: respond to client with an error. */ + txp = &pending_tx_info[pending_idx].req; + make_tx_response(netif, txp, NETIF_RSP_ERROR); +- pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; ++ pending_ring[pending_index(pending_prod++)] = pending_idx; + netif_put(netif); + + /* Not the first error? Preceding frags already invalidated. */ +@@ -1142,7 +1147,7 @@ static void net_tx_action(unsigned long unused) + continue; + } + +- pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; ++ pending_idx = pending_ring[pending_index(pending_cons)]; + + data_len = (txreq.size > PKT_PROT_LEN && + ret < MAX_SKB_FRAGS) ? +@@ -1298,7 +1303,7 @@ static void netif_idx_release(u16 pending_idx) + unsigned long flags; + + spin_lock_irqsave(&_lock, flags); +- dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx; ++ dealloc_ring[pending_index(dealloc_prod)] = pending_idx; + /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */ + smp_wmb(); + dealloc_prod++; +-- +1.7.3.4 + + +From d47af34f87b2d365c75aa3579ad512619ef3d579 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Thu, 19 Mar 2009 15:29:30 -0700 +Subject: [PATCH 062/139] xen: netback: convert PEND_RING_IDX into a proper typedef name + +Rename PEND_RING_IDX to pending_ring_idx_t. Its not used that much, +the extra typing won't kill anyone. + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 12 ++++++------ + 1 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 4095622..8292e96 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -120,19 +120,19 @@ static struct pending_tx_info { + struct xen_netif *netif; + } pending_tx_info[MAX_PENDING_REQS]; + static u16 pending_ring[MAX_PENDING_REQS]; +-typedef unsigned int PEND_RING_IDX; ++typedef unsigned int pending_ring_idx_t; + +-static inline PEND_RING_IDX pending_index(unsigned i) ++static inline pending_ring_idx_t pending_index(unsigned i) + { + return i & (MAX_PENDING_REQS-1); + } + +-static PEND_RING_IDX pending_prod, pending_cons; ++static pending_ring_idx_t pending_prod, pending_cons; + #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) + + /* Freed TX SKBs get batched on this ring before return to pending_ring. */ + static u16 dealloc_ring[MAX_PENDING_REQS]; +-static PEND_RING_IDX dealloc_prod, dealloc_cons; ++static pending_ring_idx_t dealloc_prod, dealloc_cons; + + /* Doubly-linked list of in-use pending entries. */ + static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS]; +@@ -669,7 +669,7 @@ static void tx_credit_callback(unsigned long data) + netif_schedule_work(netif); + } + +-static inline int copy_pending_req(PEND_RING_IDX pending_idx) ++static inline int copy_pending_req(pending_ring_idx_t pending_idx) + { + return gnttab_copy_grant_page(grant_tx_handle[pending_idx], + &mmap_pages[pending_idx]); +@@ -680,7 +680,7 @@ inline static void net_tx_action_dealloc(void) + struct netbk_tx_pending_inuse *inuse, *n; + struct gnttab_unmap_grant_ref *gop; + u16 pending_idx; +- PEND_RING_IDX dc, dp; ++ pending_ring_idx_t dc, dp; + struct xen_netif *netif; + int ret; + LIST_HEAD(list); +-- +1.7.3.4 + + +From 56727a43f329d50c2a00fed0316ffd87d6c23ebd Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Thu, 19 Mar 2009 15:31:32 -0700 +Subject: [PATCH 063/139] xen: netback: rename NR_PENDING_REQS to nr_pending_reqs() + +Use function syntax to show its actually computing a value, rather than +a constant. + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 10 +++++++--- + 1 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 8292e96..5410a68 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -128,7 +128,11 @@ static inline pending_ring_idx_t pending_index(unsigned i) + } + + static pending_ring_idx_t pending_prod, pending_cons; +-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) ++ ++static inline pending_ring_idx_t nr_pending_reqs(void) ++{ ++ return MAX_PENDING_REQS - pending_prod + pending_cons; ++} + + /* Freed TX SKBs get batched on this ring before return to pending_ring. */ + static u16 dealloc_ring[MAX_PENDING_REQS]; +@@ -167,7 +171,7 @@ static inline unsigned long alloc_mfn(void) + static inline void maybe_schedule_tx_action(void) + { + smp_mb(); +- if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && ++ if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) && + !list_empty(&net_schedule_list)) + tasklet_schedule(&net_tx_tasklet); + } +@@ -1060,7 +1064,7 @@ static void net_tx_action(unsigned long unused) + net_tx_action_dealloc(); + + mop = tx_map_ops; +- while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && ++ while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && + !list_empty(&net_schedule_list)) { + /* Get a netif from the list with work to do. */ + ent = net_schedule_list.next; +-- +1.7.3.4 + + +From 55b360614f1bd44d0b1395b4aabf41d8f1f13f17 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Thu, 19 Mar 2009 15:45:45 -0700 +Subject: [PATCH 064/139] xen: netback: pre-initialize list and spinlocks; use empty list to indicate not on list + +Statically pre-initialize net_schedule_list head and lock. + +Use an empty list to mark when a xen_netif is not on the schedule list, +rather than NULL (which may upset list debugging). + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/interface.c | 1 + + drivers/xen/netback/netback.c | 12 ++++-------- + 2 files changed, 5 insertions(+), 8 deletions(-) + +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index 5e0d26d..dc4fb53 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -201,6 +201,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int + atomic_set(&netif->refcnt, 1); + init_waitqueue_head(&netif->waiting_to_free); + netif->dev = dev; ++ INIT_LIST_HEAD(&netif->list); + + netback_carrier_off(netif); + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 5410a68..cbd4b03 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -148,8 +148,8 @@ static grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; + static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; + static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS]; + +-static struct list_head net_schedule_list; +-static spinlock_t net_schedule_list_lock; ++static LIST_HEAD(net_schedule_list); ++static DEFINE_SPINLOCK(net_schedule_list_lock); + + #define MAX_MFN_ALLOC 64 + static unsigned long mfn_list[MAX_MFN_ALLOC]; +@@ -588,15 +588,14 @@ struct net_device_stats *netif_be_get_stats(struct net_device *dev) + + static int __on_net_schedule_list(struct xen_netif *netif) + { +- return netif->list.next != NULL; ++ return !list_empty(&netif->list); + } + + static void remove_from_net_schedule_list(struct xen_netif *netif) + { + spin_lock_irq(&net_schedule_list_lock); + if (likely(__on_net_schedule_list(netif))) { +- list_del(&netif->list); +- netif->list.next = NULL; ++ list_del_init(&netif->list); + netif_put(netif); + } + spin_unlock_irq(&net_schedule_list_lock); +@@ -1466,9 +1465,6 @@ static int __init netback_init(void) + for (i = 0; i < MAX_PENDING_REQS; i++) + pending_ring[i] = i; + +- spin_lock_init(&net_schedule_list_lock); +- INIT_LIST_HEAD(&net_schedule_list); +- + netbk_copy_skb_mode = NETBK_DONT_COPY_SKB; + if (MODPARM_copy_skb) { + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, +-- +1.7.3.4 + + +From e12cf57de7a6c20e4c8900ce7bf4e6924a12f49e Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Thu, 19 Mar 2009 15:48:10 -0700 +Subject: [PATCH 065/139] xen: netback: remove CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER + +Keir says: +> > Does CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER need to be a config +> > option? Could/should we always/never set it? +> It doesn't work well with local delivery into dom0, nor even with IP +> fragment reassembly. I don't think we would ever turn it on these days. + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 21 --------------------- + 1 files changed, 0 insertions(+), 21 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index cbd4b03..f00e405 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -615,23 +615,11 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif) + spin_unlock_irq(&net_schedule_list_lock); + } + +-/* +- * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER: +- * If this driver is pipelining transmit requests then we can be very +- * aggressive in avoiding new-packet notifications -- frontend only needs to +- * send a notification if there are no outstanding unreceived responses. +- * If we may be buffer transmit buffers for any reason then we must be rather +- * more conservative and treat this as the final check for pending work. +- */ + void netif_schedule_work(struct xen_netif *netif) + { + int more_to_do; + +-#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER +- more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx); +-#else + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); +-#endif + + if (more_to_do) { + add_to_net_schedule_list_tail(netif); +@@ -1355,15 +1343,6 @@ static void make_tx_response(struct xen_netif *netif, + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify); + if (notify) + notify_remote_via_irq(netif->irq); +- +-#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER +- if (i == netif->tx.req_cons) { +- int more_to_do; +- RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); +- if (more_to_do) +- add_to_net_schedule_list_tail(netif); +- } +-#endif + } + + static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif, +-- +1.7.3.4 + + +From adf542f9c714e3b7c76fcf9e44e0a89cae21a341 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Thu, 19 Mar 2009 22:28:52 -0700 +Subject: [PATCH 066/139] xen: netback: make netif_get/put inlines + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/common.h | 16 ++++++++++------ + 1 files changed, 10 insertions(+), 6 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 06f04c1..9056be0 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -183,12 +183,16 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int + int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref, + unsigned long rx_ring_ref, unsigned int evtchn); + +-#define netif_get(_b) (atomic_inc(&(_b)->refcnt)) +-#define netif_put(_b) \ +- do { \ +- if ( atomic_dec_and_test(&(_b)->refcnt) ) \ +- wake_up(&(_b)->waiting_to_free); \ +- } while (0) ++static inline void netif_get(struct xen_netif *netif) ++{ ++ atomic_inc(&netif->refcnt); ++} ++ ++static inline void netif_put(struct xen_netif *netif) ++{ ++ if (atomic_dec_and_test(&netif->refcnt)) ++ wake_up(&netif->waiting_to_free); ++} + + void netif_xenbus_init(void); + +-- +1.7.3.4 + + +From f06459a199f953a68f001f06e54dde54e1e16c87 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Thu, 19 Mar 2009 22:30:24 -0700 +Subject: [PATCH 067/139] xen: netback: move code around + +net_tx_action() into several functions; move variables into +their innermost scopes; rename "i" to "idx". + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 158 ++++++++++++++++++++++++----------------- + 1 files changed, 94 insertions(+), 64 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index f00e405..4d63ff3 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -773,7 +773,8 @@ static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *t + netif_put(netif); + } + +-static int netbk_count_requests(struct xen_netif *netif, struct xen_netif_tx_request *first, ++static int netbk_count_requests(struct xen_netif *netif, ++ struct xen_netif_tx_request *first, + struct xen_netif_tx_request *txp, int work_to_do) + { + RING_IDX cons = netif->tx.req_cons; +@@ -1032,30 +1033,58 @@ out: + return err; + } + +-/* Called after netfront has transmitted */ +-static void net_tx_action(unsigned long unused) ++static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size) + { +- struct list_head *ent; +- struct sk_buff *skb; +- struct xen_netif *netif; +- struct xen_netif_tx_request txreq; +- struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; +- struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; +- u16 pending_idx; +- RING_IDX i; +- struct gnttab_map_grant_ref *mop; +- unsigned int data_len; +- int ret, work_to_do; ++ unsigned long now = jiffies; ++ unsigned long next_credit = ++ netif->credit_timeout.expires + ++ msecs_to_jiffies(netif->credit_usec / 1000); ++ ++ /* Timer could already be pending in rare cases. */ ++ if (timer_pending(&netif->credit_timeout)) ++ return true; ++ ++ /* Passed the point where we can replenish credit? */ ++ if (time_after_eq(now, next_credit)) { ++ netif->credit_timeout.expires = now; ++ tx_add_credit(netif); ++ } + +- if (dealloc_cons != dealloc_prod) +- net_tx_action_dealloc(); ++ /* Still too big to send right now? Set a callback. */ ++ if (size > netif->remaining_credit) { ++ netif->credit_timeout.data = ++ (unsigned long)netif; ++ netif->credit_timeout.function = ++ tx_credit_callback; ++ mod_timer(&netif->credit_timeout, ++ next_credit); ++ ++ return true; ++ } ++ ++ return false; ++} ++ ++static unsigned net_tx_build_mops(void) ++{ ++ struct gnttab_map_grant_ref *mop; ++ struct sk_buff *skb; ++ int ret; + + mop = tx_map_ops; + while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && + !list_empty(&net_schedule_list)) { ++ struct xen_netif *netif; ++ struct xen_netif_tx_request txreq; ++ struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; ++ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; ++ u16 pending_idx; ++ RING_IDX idx; ++ int work_to_do; ++ unsigned int data_len; ++ + /* Get a netif from the list with work to do. */ +- ent = net_schedule_list.next; +- netif = list_entry(ent, struct xen_netif, list); ++ netif = list_first_entry(&net_schedule_list, struct xen_netif, list); + netif_get(netif); + remove_from_net_schedule_list(netif); + +@@ -1065,67 +1094,43 @@ static void net_tx_action(unsigned long unused) + continue; + } + +- i = netif->tx.req_cons; ++ idx = netif->tx.req_cons; + rmb(); /* Ensure that we see the request before we copy it. */ +- memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq)); ++ memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq)); + + /* Credit-based scheduling. */ +- if (txreq.size > netif->remaining_credit) { +- unsigned long now = jiffies; +- unsigned long next_credit = +- netif->credit_timeout.expires + +- msecs_to_jiffies(netif->credit_usec / 1000); +- +- /* Timer could already be pending in rare cases. */ +- if (timer_pending(&netif->credit_timeout)) { +- netif_put(netif); +- continue; +- } +- +- /* Passed the point where we can replenish credit? */ +- if (time_after_eq(now, next_credit)) { +- netif->credit_timeout.expires = now; +- tx_add_credit(netif); +- } +- +- /* Still too big to send right now? Set a callback. */ +- if (txreq.size > netif->remaining_credit) { +- netif->credit_timeout.data = +- (unsigned long)netif; +- netif->credit_timeout.function = +- tx_credit_callback; +- mod_timer(&netif->credit_timeout, +- next_credit); +- netif_put(netif); +- continue; +- } ++ if (txreq.size > netif->remaining_credit && ++ tx_credit_exceeded(netif, txreq.size)) { ++ netif_put(netif); ++ continue; + } ++ + netif->remaining_credit -= txreq.size; + + work_to_do--; +- netif->tx.req_cons = ++i; ++ netif->tx.req_cons = ++idx; + + memset(extras, 0, sizeof(extras)); + if (txreq.flags & NETTXF_extra_info) { + work_to_do = netbk_get_extras(netif, extras, + work_to_do); +- i = netif->tx.req_cons; ++ idx = netif->tx.req_cons; + if (unlikely(work_to_do < 0)) { +- netbk_tx_err(netif, &txreq, i); ++ netbk_tx_err(netif, &txreq, idx); + continue; + } + } + + ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do); + if (unlikely(ret < 0)) { +- netbk_tx_err(netif, &txreq, i - ret); ++ netbk_tx_err(netif, &txreq, idx - ret); + continue; + } +- i += ret; ++ idx += ret; + + if (unlikely(txreq.size < ETH_HLEN)) { + DPRINTK("Bad packet size: %d\n", txreq.size); +- netbk_tx_err(netif, &txreq, i); ++ netbk_tx_err(netif, &txreq, idx); + continue; + } + +@@ -1134,7 +1139,7 @@ static void net_tx_action(unsigned long unused) + DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", + txreq.offset, txreq.size, + (txreq.offset &~PAGE_MASK) + txreq.size); +- netbk_tx_err(netif, &txreq, i); ++ netbk_tx_err(netif, &txreq, idx); + continue; + } + +@@ -1148,7 +1153,7 @@ static void net_tx_action(unsigned long unused) + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(skb == NULL)) { + DPRINTK("Can't allocate a skb in start_xmit.\n"); +- netbk_tx_err(netif, &txreq, i); ++ netbk_tx_err(netif, &txreq, idx); + break; + } + +@@ -1161,7 +1166,7 @@ static void net_tx_action(unsigned long unused) + + if (netbk_set_skb_gso(skb, gso)) { + kfree_skb(skb); +- netbk_tx_err(netif, &txreq, i); ++ netbk_tx_err(netif, &txreq, idx); + continue; + } + } +@@ -1199,23 +1204,27 @@ static void net_tx_action(unsigned long unused) + + mop = netbk_get_requests(netif, skb, txfrags, mop); + +- netif->tx.req_cons = i; ++ netif->tx.req_cons = idx; + netif_schedule_work(netif); + + if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops)) + break; + } + +- if (mop == tx_map_ops) +- return; ++ return mop - tx_map_ops; ++} + +- ret = HYPERVISOR_grant_table_op( +- GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops); +- BUG_ON(ret); ++static void net_tx_submit(void) ++{ ++ struct gnttab_map_grant_ref *mop; ++ struct sk_buff *skb; + + mop = tx_map_ops; + while ((skb = __skb_dequeue(&tx_queue)) != NULL) { + struct xen_netif_tx_request *txp; ++ struct xen_netif *netif; ++ u16 pending_idx; ++ unsigned data_len; + + pending_idx = *((u16 *)skb->data); + netif = pending_tx_info[pending_idx].netif; +@@ -1288,6 +1297,27 @@ static void net_tx_action(unsigned long unused) + } + } + ++/* Called after netfront has transmitted */ ++static void net_tx_action(unsigned long unused) ++{ ++ unsigned nr_mops; ++ int ret; ++ ++ if (dealloc_cons != dealloc_prod) ++ net_tx_action_dealloc(); ++ ++ nr_mops = net_tx_build_mops(); ++ ++ if (nr_mops == 0) ++ return; ++ ++ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, ++ tx_map_ops, nr_mops); ++ BUG_ON(ret); ++ ++ net_tx_submit(); ++} ++ + static void netif_idx_release(u16 pending_idx) + { + static DEFINE_SPINLOCK(_lock); +-- +1.7.3.4 + + +From cec84ff81d9f6ca882908572b984215529b4117b Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Fri, 20 Mar 2009 23:18:12 -0700 +Subject: [PATCH 068/139] xen: netback: document PKT_PROT_LEN + +Document the rationale for the existence and value of PKT_PROT_LEN. + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 9 +++++++++ + 1 files changed, 9 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 4d63ff3..80b424f 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -113,6 +113,15 @@ static inline int netif_page_index(struct page *pg) + return idx; + } + ++/* ++ * This is the amount of packet we copy rather than map, so that the ++ * guest can't fiddle with the contents of the headers while we do ++ * packet processing on them (netfilter, routing, etc). This could ++ * probably do with being larger, since 1) 64-bytes isn't necessarily ++ * long enough to cover a full christmas-tree ip+tcp header, let alone ++ * packet contents, and 2) the data is probably in cache anyway ++ * (though perhaps some other cpu's cache). ++ */ + #define PKT_PROT_LEN 64 + + static struct pending_tx_info { +-- +1.7.3.4 + + +From a9402ee935757e8facebc6e886f9912c2c523da7 Mon Sep 17 00:00:00 2001 +From: Christophe Saout +Date: Sun, 12 Apr 2009 13:40:27 +0200 +Subject: [PATCH 069/139] xen: netback: use dev_name() instead of removed ->bus_id. + +Signed-off-by: Christophe Saout +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/interface.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index dc4fb53..3bb5c20 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -123,7 +123,7 @@ static void netbk_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) + { + strcpy(info->driver, "netbk"); +- strcpy(info->bus_info, dev->dev.parent->bus_id); ++ strcpy(info->bus_info, dev_name(dev->dev.parent)); + } + + static const struct netif_stat { +-- +1.7.3.4 + + +From 35de1701fca19d693e9722bffbe7609caf1d5ac6 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Mon, 29 Jun 2009 14:04:23 -0700 +Subject: [PATCH 070/139] xen: netback: convert to net_device_ops + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/interface.c | 17 +++++++++++------ + 1 files changed, 11 insertions(+), 6 deletions(-) + +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index 3bb5c20..21c1f95 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -178,6 +178,15 @@ static struct ethtool_ops network_ethtool_ops = + .get_strings = netbk_get_strings, + }; + ++static struct net_device_ops netback_ops = ++{ ++ .ndo_start_xmit = netif_be_start_xmit, ++ .ndo_get_stats = netif_be_get_stats, ++ .ndo_open = net_open, ++ .ndo_stop = net_close, ++ .ndo_change_mtu = netbk_change_mtu, ++}; ++ + struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle) + { + int err = 0; +@@ -213,12 +222,8 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int + + init_timer(&netif->tx_queue_timeout); + +- dev->hard_start_xmit = netif_be_start_xmit; +- dev->get_stats = netif_be_get_stats; +- dev->open = net_open; +- dev->stop = net_close; +- dev->change_mtu = netbk_change_mtu; +- dev->features = NETIF_F_IP_CSUM; ++ dev->netdev_ops = &netback_ops; ++ dev->features = NETIF_F_IP_CSUM; + + SET_ETHTOOL_OPS(dev, &network_ethtool_ops); + +-- +1.7.3.4 + + +From c6f3885ef05e96489025e1c1c7299aac7cf43d87 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Fri, 4 Sep 2009 14:55:43 -0700 +Subject: [PATCH 071/139] xen: netback: reinstate missing code + +Change c3219dc868fe3e84070d6da2d0759a834b6f7251, "Completely drop flip +support" was a bit too aggressive in removing code, and removed a chunk +which was used for not only flip but if a buffer crossed a page boundary. +Reinstate that code. + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 91 +++++++++++++++++++++++++++++++++++++++++ + 1 files changed, 91 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 80b424f..7c0f05b 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -185,6 +185,82 @@ static inline void maybe_schedule_tx_action(void) + tasklet_schedule(&net_tx_tasklet); + } + ++static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) ++{ ++ struct skb_shared_info *ninfo; ++ struct sk_buff *nskb; ++ unsigned long offset; ++ int ret; ++ int len; ++ int headlen; ++ ++ BUG_ON(skb_shinfo(skb)->frag_list != NULL); ++ ++ nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN); ++ if (unlikely(!nskb)) ++ goto err; ++ ++ skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN); ++ headlen = skb_end_pointer(nskb) - nskb->data; ++ if (headlen > skb_headlen(skb)) ++ headlen = skb_headlen(skb); ++ ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); ++ BUG_ON(ret); ++ ++ ninfo = skb_shinfo(nskb); ++ ninfo->gso_size = skb_shinfo(skb)->gso_size; ++ ninfo->gso_type = skb_shinfo(skb)->gso_type; ++ ++ offset = headlen; ++ len = skb->len - headlen; ++ ++ nskb->len = skb->len; ++ nskb->data_len = len; ++ nskb->truesize += len; ++ ++ while (len) { ++ struct page *page; ++ int copy; ++ int zero; ++ ++ if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) { ++ dump_stack(); ++ goto err_free; ++ } ++ ++ copy = len >= PAGE_SIZE ? PAGE_SIZE : len; ++ zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO; ++ ++ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero); ++ if (unlikely(!page)) ++ goto err_free; ++ ++ ret = skb_copy_bits(skb, offset, page_address(page), copy); ++ BUG_ON(ret); ++ ++ ninfo->frags[ninfo->nr_frags].page = page; ++ ninfo->frags[ninfo->nr_frags].page_offset = 0; ++ ninfo->frags[ninfo->nr_frags].size = copy; ++ ninfo->nr_frags++; ++ ++ offset += copy; ++ len -= copy; ++ } ++ ++ offset = nskb->data - skb->data; ++ ++ nskb->transport_header = skb->transport_header + offset; ++ nskb->network_header = skb->network_header + offset; ++ nskb->mac_header = skb->mac_header + offset; ++ ++ return nskb; ++ ++ err_free: ++ kfree_skb(nskb); ++ err: ++ return NULL; ++} ++ + static inline int netbk_max_required_rx_slots(struct xen_netif *netif) + { + if (netif->features & (NETIF_F_SG|NETIF_F_TSO)) +@@ -218,6 +294,21 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif))) + goto drop; + ++ /* ++ * XXX For now we also copy skbuffs whose head crosses a page ++ * boundary, because netbk_gop_skb can't handle them. ++ */ ++ if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) { ++ struct sk_buff *nskb = netbk_copy_skb(skb); ++ if ( unlikely(nskb == NULL) ) ++ goto drop; ++ /* Copy only the header fields we use in this driver. */ ++ nskb->dev = skb->dev; ++ nskb->ip_summed = skb->ip_summed; ++ dev_kfree_skb(skb); ++ skb = nskb; ++ } ++ + netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 + + !!skb_shinfo(skb)->gso_size; + netif_get(netif); +-- +1.7.3.4 + + +From 2e290d790877df4368691180f76206ad27a42505 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Wed, 9 Sep 2009 15:19:15 -0700 +Subject: [PATCH 072/139] xen: netback: remove debug noise + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 2 -- + 1 files changed, 0 insertions(+), 2 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 7c0f05b..d7d738e 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -1537,8 +1537,6 @@ static int __init netback_init(void) + int i; + struct page *page; + +- printk(KERN_CRIT "*** netif_init\n"); +- + if (!xen_domain()) + return -ENODEV; + +-- +1.7.3.4 + + +From 3ba3bb7d563704c3050de6116aa0a761a5791428 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Thu, 8 Oct 2009 13:23:09 -0400 +Subject: [PATCH 073/139] Fix compile warnings: ignoring return value of 'xenbus_register_backend' .. + +We neglect to check the return value of xenbus_register_backend +and take actions when that fails. This patch fixes that and adds +code to deal with those type of failures. + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/common.h | 2 +- + drivers/xen/netback/netback.c | 12 +++++++++++- + drivers/xen/netback/xenbus.c | 4 ++-- + 3 files changed, 14 insertions(+), 4 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 9056be0..0675946 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -194,7 +194,7 @@ static inline void netif_put(struct xen_netif *netif) + wake_up(&netif->waiting_to_free); + } + +-void netif_xenbus_init(void); ++int netif_xenbus_init(void); + + #define netif_schedulable(netif) \ + (netif_running((netif)->dev) && netback_carrier_ok(netif)) +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index d7d738e..860c61e 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -1536,6 +1536,7 @@ static int __init netback_init(void) + { + int i; + struct page *page; ++ int rc = 0; + + if (!xen_domain()) + return -ENODEV; +@@ -1583,7 +1584,9 @@ static int __init netback_init(void) + + //netif_accel_init(); + +- netif_xenbus_init(); ++ rc = netif_xenbus_init(); ++ if (rc) ++ goto failed_init; + + #ifdef NETBE_DEBUG_INTERRUPT + (void)bind_virq_to_irqhandler(VIRQ_DEBUG, +@@ -1595,6 +1598,13 @@ static int __init netback_init(void) + #endif + + return 0; ++ ++failed_init: ++ free_empty_pages_and_pagevec(mmap_pages, MAX_PENDING_REQS); ++ del_timer(&netbk_tx_pending_timer); ++ del_timer(&net_timer); ++ return rc; ++ + } + + module_init(netback_init); +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index a492288..c46b235 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -447,8 +447,8 @@ static struct xenbus_driver netback = { + }; + + +-void netif_xenbus_init(void) ++int netif_xenbus_init(void) + { + printk(KERN_CRIT "registering netback\n"); +- (void)xenbus_register_backend(&netback); ++ return xenbus_register_backend(&netback); + } +-- +1.7.3.4 + + +From 4bc919e07d5dc48cb95b22cc6e90c6110c229343 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Tue, 27 Oct 2009 12:37:50 -0700 +Subject: [PATCH 074/139] xen: netback: don't screw around with packet gso state + +These lines were reverted from 2.6.18 netback as the network stack +was modified to deal with packets shorter than the gso size, so there's +no need to fiddle with the gso state in netback. + +Taken from linux-2.6.18-xen.hg change 8081d19dce89 + +Signed-off-by: Ian Campbell +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 5 ----- + 1 files changed, 0 insertions(+), 5 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 860c61e..9a14976 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -1293,11 +1293,6 @@ static unsigned net_tx_build_mops(void) + skb_shinfo(skb)->frags[0].page = (void *)~0UL; + } + +- if (skb->data_len < skb_shinfo(skb)->gso_size) { +- skb_shinfo(skb)->gso_size = 0; +- skb_shinfo(skb)->gso_type = 0; +- } +- + __skb_queue_tail(&tx_queue, skb); + + pending_cons++; +-- +1.7.3.4 + + +From f2b947783c47a721497e5d325c736234f71501e7 Mon Sep 17 00:00:00 2001 +From: Steven Smith +Date: Fri, 30 Oct 2009 13:55:23 -0700 +Subject: [PATCH 075/139] xen: netback: make sure that pg->mapping is never NULL for a page mapped from a foreign domain. + +Otherwise, the foreign maps tracking infrastructure gets confused, and +thinks that the foreign page is local. This means that you can't +forward that packet to another foreign domain. This leads to very +high packet drop, and hence very poor performance. + +Signed-off-by: Steven Smith +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 4 ++-- + 1 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 9a14976..111fec7 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -97,12 +97,12 @@ static inline unsigned long idx_to_kaddr(unsigned int idx) + /* extra field used in struct page */ + static inline void netif_set_page_index(struct page *pg, unsigned int index) + { +- *(unsigned long *)&pg->mapping = index; ++ *(unsigned long *)&pg->mapping = index + 1; + } + + static inline int netif_page_index(struct page *pg) + { +- unsigned long idx = (unsigned long)pg->mapping; ++ unsigned long idx = (unsigned long)pg->mapping - 1; + + if (!PageForeign(pg)) + return -1; +-- +1.7.3.4 + + +From df8b27ea0fb2695842104e06caaecb55780577a7 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Thu, 3 Dec 2009 21:56:19 +0000 +Subject: [PATCH 076/139] xen: rename netbk module xen-netback. + +netbk is rather generic for a modular distro style kernel. + +Signed-off-by: Ian Campbell +Cc: Jeremy Fitzhardinge +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/Makefile | 4 ++-- + 1 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile +index a01a1a3..e346e81 100644 +--- a/drivers/xen/netback/Makefile ++++ b/drivers/xen/netback/Makefile +@@ -1,3 +1,3 @@ +-obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o ++obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o + +-netbk-y := netback.o xenbus.o interface.o ++xen-netback-y := netback.o xenbus.o interface.o +-- +1.7.3.4 + + +From 279097395ad64ae4df15e206a487cd5fd3be39a8 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Tue, 16 Feb 2010 14:40:37 -0800 +Subject: [PATCH 077/139] xen: netback: use dev_get/set_drvdata() inteface + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/xenbus.c | 10 +++++----- + 1 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index c46b235..79e6fb0 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -35,7 +35,7 @@ static void backend_create_netif(struct backend_info *be); + + static int netback_remove(struct xenbus_device *dev) + { +- struct backend_info *be = dev->dev.driver_data; ++ struct backend_info *be = dev_get_drvdata(&dev->dev); + + //netback_remove_accelerators(be, dev); + +@@ -45,7 +45,7 @@ static int netback_remove(struct xenbus_device *dev) + be->netif = NULL; + } + kfree(be); +- dev->dev.driver_data = NULL; ++ dev_set_drvdata(&dev->dev, NULL); + return 0; + } + +@@ -70,7 +70,7 @@ static int netback_probe(struct xenbus_device *dev, + } + + be->dev = dev; +- dev->dev.driver_data = be; ++ dev_set_drvdata(&dev->dev, be); + + sg = 1; + if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) +@@ -151,7 +151,7 @@ fail: + */ + static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env) + { +- struct backend_info *be = xdev->dev.driver_data; ++ struct backend_info *be = dev_get_drvdata(&xdev->dev); + struct xen_netif *netif = be->netif; + char *val; + +@@ -211,7 +211,7 @@ static void backend_create_netif(struct backend_info *be) + static void frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state) + { +- struct backend_info *be = dev->dev.driver_data; ++ struct backend_info *be = dev_get_drvdata(&dev->dev); + + DPRINTK("%s", xenbus_strstate(frontend_state)); + +-- +1.7.3.4 + + +From 31d0b5f5763faf607e32f3b5a0f6b37a34bbbf09 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Tue, 16 Feb 2010 14:41:12 -0800 +Subject: [PATCH 078/139] xen: netback: include linux/sched.h for TASK_* definitions + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/common.h | 2 ++ + 1 files changed, 2 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 0675946..d8653d3 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -38,6 +38,8 @@ + #include + #include + #include ++#include ++ + #include + #include + #include +-- +1.7.3.4 + + +From cdefc88924b3cdfcac64be737a00a4ec5593cfd5 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Tue, 23 Feb 2010 11:52:27 +0000 +Subject: [PATCH 079/139] xen: netback: remove unused xen_network_done code + +It has been disabled effectively forever. + +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/netback.c | 23 ----------------------- + 1 files changed, 0 insertions(+), 23 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 111fec7..4b24893 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -343,25 +343,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + return 0; + } + +-#if 0 +-static void xen_network_done_notify(void) +-{ +- static struct net_device *eth0_dev = NULL; +- if (unlikely(eth0_dev == NULL)) +- eth0_dev = __dev_get_by_name("eth0"); +- netif_rx_schedule(eth0_dev); +-} +-/* +- * Add following to poll() function in NAPI driver (Tigon3 is example): +- * if ( xen_network_done() ) +- * tg3_enable_ints(tp); +- */ +-int xen_network_done(void) +-{ +- return skb_queue_empty(&rx_queue); +-} +-#endif +- + struct netrx_pending_operations { + unsigned trans_prod, trans_cons; + unsigned mmu_prod, mmu_mcl; +@@ -664,10 +645,6 @@ static void net_rx_action(unsigned long unused) + /* More work to do? */ + if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer)) + tasklet_schedule(&net_rx_tasklet); +-#if 0 +- else +- xen_network_done_notify(); +-#endif + } + + static void net_alarm(unsigned long unused) +-- +1.7.3.4 + + +From 994be068dd9947cedcee69a7185e54738cda33d4 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Tue, 23 Feb 2010 11:58:26 +0000 +Subject: [PATCH 080/139] xen: netback: factor disconnect from backend into new function. + +Makes subsequent patches cleaner. + +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/xenbus.c | 16 ++++++++++++---- + 1 files changed, 12 insertions(+), 4 deletions(-) + +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index 79e6fb0..1f36b4d4 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -205,6 +205,16 @@ static void backend_create_netif(struct backend_info *be) + } + + ++static void disconnect_backend(struct xenbus_device *dev) ++{ ++ struct backend_info *be = dev_get_drvdata(&dev->dev); ++ ++ if (be->netif) { ++ netif_disconnect(be->netif); ++ be->netif = NULL; ++ } ++} ++ + /** + * Callback received when the frontend's state changes. + */ +@@ -238,11 +248,9 @@ static void frontend_changed(struct xenbus_device *dev, + break; + + case XenbusStateClosing: +- if (be->netif) { ++ if (be->netif) + kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); +- netif_disconnect(be->netif); +- be->netif = NULL; +- } ++ disconnect_backend(dev); + xenbus_switch_state(dev, XenbusStateClosing); + break; + +-- +1.7.3.4 + + +From 9dcb4c18e5b29d8862cd7783d5b0040913010563 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Tue, 23 Feb 2010 12:10:24 +0000 +Subject: [PATCH 081/139] xen: netback: wait for hotplug scripts to complete before signalling connected to frontend + +Avoid the situation where the frontend is sending packets but the +domain 0 bridging (or whatever) is not yet configured (because the +hotplug scripts are too slow) and so packets get dropped. + +Signed-off-by: Ian Campbell +Cc: Steven.Smith@citrix.com +--- + drivers/xen/netback/common.h | 2 + + drivers/xen/netback/xenbus.c | 45 +++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 46 insertions(+), 1 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index d8653d3..1983768 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -145,6 +145,8 @@ struct backend_info { + struct xenbus_device *dev; + struct xen_netif *netif; + enum xenbus_state frontend_state; ++ struct xenbus_watch hotplug_status_watch; ++ int have_hotplug_status_watch:1; + + /* State relating to the netback accelerator */ + void *netback_accel_priv; +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index 1f36b4d4..d2407cc 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -32,6 +32,7 @@ + static int connect_rings(struct backend_info *); + static void connect(struct backend_info *); + static void backend_create_netif(struct backend_info *be); ++static void unregister_hotplug_status_watch(struct backend_info *be); + + static int netback_remove(struct xenbus_device *dev) + { +@@ -39,8 +40,10 @@ static int netback_remove(struct xenbus_device *dev) + + //netback_remove_accelerators(be, dev); + ++ unregister_hotplug_status_watch(be); + if (be->netif) { + kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); ++ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); + netif_disconnect(be->netif); + be->netif = NULL; + } +@@ -210,6 +213,7 @@ static void disconnect_backend(struct xenbus_device *dev) + struct backend_info *be = dev_get_drvdata(&dev->dev); + + if (be->netif) { ++ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); + netif_disconnect(be->netif); + be->netif = NULL; + } +@@ -329,6 +333,36 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) + return 0; + } + ++static void unregister_hotplug_status_watch(struct backend_info *be) ++{ ++ if (be->have_hotplug_status_watch) { ++ unregister_xenbus_watch(&be->hotplug_status_watch); ++ kfree(be->hotplug_status_watch.node); ++ } ++ be->have_hotplug_status_watch = 0; ++} ++ ++static void hotplug_status_changed(struct xenbus_watch *watch, ++ const char **vec, ++ unsigned int vec_size) ++{ ++ struct backend_info *be = container_of(watch, ++ struct backend_info, ++ hotplug_status_watch); ++ char *str; ++ unsigned int len; ++ ++ str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len); ++ if (IS_ERR(str)) ++ return; ++ if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) { ++ xenbus_switch_state(be->dev, XenbusStateConnected); ++ /* Not interested in this watch anymore. */ ++ unregister_hotplug_status_watch(be); ++ } ++ kfree(str); ++} ++ + static void connect(struct backend_info *be) + { + int err; +@@ -348,7 +382,16 @@ static void connect(struct backend_info *be) + &be->netif->credit_usec); + be->netif->remaining_credit = be->netif->credit_bytes; + +- xenbus_switch_state(dev, XenbusStateConnected); ++ unregister_hotplug_status_watch(be); ++ err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, ++ hotplug_status_changed, ++ "%s/%s", dev->nodename, "hotplug-status"); ++ if (err) { ++ /* Switch now, since we can't do a watch. */ ++ xenbus_switch_state(dev, XenbusStateConnected); ++ } else { ++ be->have_hotplug_status_watch = 1; ++ } + + netif_wake_queue(be->netif->dev); + } +-- +1.7.3.4 + + +From 509cc7f20f866277a8f5d5895bb266b5b68aac6d Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Tue, 23 Feb 2010 12:11:51 +0000 +Subject: [PATCH 082/139] xen: netback: Always pull through PKT_PROT_LEN bytes into the linear part of an skb. + +Previously PKT_PROT_LEN would only have an effect on the first fragment. + +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/netback.c | 10 ++++++++++ + 1 files changed, 10 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 4b24893..d4a7a56 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -1334,6 +1334,16 @@ static void net_tx_submit(void) + + netbk_fill_frags(skb); + ++ /* ++ * If the initial fragment was < PKT_PROT_LEN then ++ * pull through some bytes from the other fragments to ++ * increase the linear region to PKT_PROT_LEN bytes. ++ */ ++ if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) { ++ int target = min_t(int, skb->len, PKT_PROT_LEN); ++ __pskb_pull_tail(skb, target - skb_headlen(skb)); ++ } ++ + skb->dev = netif->dev; + skb->protocol = eth_type_trans(skb, skb->dev); + +-- +1.7.3.4 + + +From 673c82b5110cfffafe1e1978bc07d6d10d111d50 Mon Sep 17 00:00:00 2001 +From: Steven Smith +Date: Tue, 23 Feb 2010 11:49:26 +0000 +Subject: [PATCH 083/139] xen: netback: try to pull a minimum of 72 bytes into the skb data area + when receiving a packet into netback. + +The previous number, 64, tended to place a fragment boundary in the middle of +the TCP header options and led to unnecessary fragmentation in Windows <-> +Windows networking. + +Signed-off-by: Steven Smith +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/netback.c | 9 +++------ + 1 files changed, 3 insertions(+), 6 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index d4a7a56..44357d7 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -116,13 +116,10 @@ static inline int netif_page_index(struct page *pg) + /* + * This is the amount of packet we copy rather than map, so that the + * guest can't fiddle with the contents of the headers while we do +- * packet processing on them (netfilter, routing, etc). This could +- * probably do with being larger, since 1) 64-bytes isn't necessarily +- * long enough to cover a full christmas-tree ip+tcp header, let alone +- * packet contents, and 2) the data is probably in cache anyway +- * (though perhaps some other cpu's cache). ++ * packet processing on them (netfilter, routing, etc). 72 is enough ++ * to cover TCP+IP headers including options. + */ +-#define PKT_PROT_LEN 64 ++#define PKT_PROT_LEN 72 + + static struct pending_tx_info { + struct xen_netif_tx_request req; +-- +1.7.3.4 + + +From c83bd213efd3ebf700189249c30d987b1cb14d7e Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Tue, 23 Feb 2010 11:54:30 +0000 +Subject: [PATCH 084/139] xen: netback: Allow setting of large MTU before rings have connected. + +This allows large MTU to be configured by the VIF hotplug +script. Previously this would fail because at the point the hotplug +script runs the VIF features have most likely not been negotiated with +the frontend and so SG has not yet been enabled. Invert this behaviour +so that SG is assumed present until negotiations prove otherwise and +reduce MTU at that point. + +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/interface.c | 6 +++++- + drivers/xen/netback/xenbus.c | 8 +++++--- + 2 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index 21c1f95..b23b14d 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -104,6 +104,9 @@ static int netbk_set_sg(struct net_device *dev, u32 data) + return -ENOSYS; + } + ++ if (dev->mtu > ETH_DATA_LEN) ++ dev->mtu = ETH_DATA_LEN; ++ + return ethtool_op_set_sg(dev, data); + } + +@@ -207,6 +210,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int + memset(netif, 0, sizeof(*netif)); + netif->domid = domid; + netif->handle = handle; ++ netif->features = NETIF_F_SG; + atomic_set(&netif->refcnt, 1); + init_waitqueue_head(&netif->waiting_to_free); + netif->dev = dev; +@@ -223,7 +227,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int + init_timer(&netif->tx_queue_timeout); + + dev->netdev_ops = &netback_ops; +- dev->features = NETIF_F_IP_CSUM; ++ dev->features = NETIF_F_IP_CSUM|NETIF_F_SG; + + SET_ETHTOOL_OPS(dev, &network_ethtool_ops); + +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index d2407cc..fcd3c34 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -445,9 +445,11 @@ static int connect_rings(struct backend_info *be) + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0) + val = 0; +- if (val) { +- be->netif->features |= NETIF_F_SG; +- be->netif->dev->features |= NETIF_F_SG; ++ if (!val) { ++ be->netif->features &= ~NETIF_F_SG; ++ be->netif->dev->features &= ~NETIF_F_SG; ++ if (be->netif->dev->mtu > ETH_DATA_LEN) ++ be->netif->dev->mtu = ETH_DATA_LEN; + } + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d", +-- +1.7.3.4 + + +From e5cd35b00cb63f3a3fa1651260a58d59bbc134b7 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Fri, 19 Mar 2010 13:09:16 -0700 +Subject: [PATCH 085/139] xen: netback: use get_sset_count rather than obsolete get_stats_count + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/interface.c | 11 ++++++++--- + 1 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index b23b14d..086d939 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -136,9 +136,14 @@ static const struct netif_stat { + { "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) }, + }; + +-static int netbk_get_stats_count(struct net_device *dev) ++static int netbk_get_sset_count(struct net_device *dev, int string_set) + { +- return ARRAY_SIZE(netbk_stats); ++ switch (string_set) { ++ case ETH_SS_STATS: ++ return ARRAY_SIZE(netbk_stats); ++ default: ++ return -EINVAL; ++ } + } + + static void netbk_get_ethtool_stats(struct net_device *dev, +@@ -176,7 +181,7 @@ static struct ethtool_ops network_ethtool_ops = + .set_tso = netbk_set_tso, + .get_link = ethtool_op_get_link, + +- .get_stats_count = netbk_get_stats_count, ++ .get_sset_count = netbk_get_sset_count, + .get_ethtool_stats = netbk_get_ethtool_stats, + .get_strings = netbk_get_strings, + }; +-- +1.7.3.4 + + +From 0c34835ee66ad641f01a8077a973b7ec1bfdcd86 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Tue, 11 May 2010 09:33:42 +0100 +Subject: [PATCH 086/139] xen: netback: correctly setup skb->ip_summed on receive + +In 2.6.18 CHECKSUM_PARTIAL and CHECKSUM_UNNECESSARY were both synonyms for +CHECKSUM_HW. This is no longer the case and we need to select the correct one. + + data_validated csum_blank -> ip_summed + 0 0 CHECKSUM_NONE + 0 1 CHECKSUM_PARTIAL + 1 0 CHECKSUM_UNNECESSARY + 1 1 CHECKSUM_PARTIAL + +Signed-off-by: Ian Campbell +Tested-by: Matej Zary +Tested-by: Michael D Labriola +--- + drivers/xen/netback/netback.c | 10 +++------- + 1 files changed, 3 insertions(+), 7 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 44357d7..725da0f 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -1320,14 +1320,10 @@ static void net_tx_submit(void) + netif_idx_release(pending_idx); + } + +- /* +- * Old frontends do not assert data_validated but we +- * can infer it from csum_blank so test both flags. +- */ +- if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) ++ if (txp->flags & NETTXF_csum_blank) + skb->ip_summed = CHECKSUM_PARTIAL; +- else +- skb->ip_summed = CHECKSUM_NONE; ++ else if (txp->flags & NETTXF_data_validated) ++ skb->ip_summed = CHECKSUM_UNNECESSARY; + + netbk_fill_frags(skb); + +-- +1.7.3.4 + + +From 094944631cc5a9d6e623302c987f78117c0bf7ac Mon Sep 17 00:00:00 2001 +From: Dongxiao Xu +Date: Wed, 19 May 2010 16:58:56 -0700 +Subject: [PATCH 087/139] xen: netback: Move global/static variables into struct xen_netbk. + +Bundle a lot of discrete variables into a single structure. + +Signed-off-by: Dongxiao Xu +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/common.h | 59 +++++++ + drivers/xen/netback/netback.c | 360 ++++++++++++++++++++--------------------- + 2 files changed, 232 insertions(+), 187 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 1983768..00208f4 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -222,4 +222,63 @@ static inline int netbk_can_sg(struct net_device *dev) + return netif->features & NETIF_F_SG; + } + ++struct pending_tx_info { ++ struct xen_netif_tx_request req; ++ struct xen_netif *netif; ++}; ++typedef unsigned int pending_ring_idx_t; ++ ++struct netbk_rx_meta { ++ skb_frag_t frag; ++ int id; ++}; ++ ++struct netbk_tx_pending_inuse { ++ struct list_head list; ++ unsigned long alloc_time; ++}; ++ ++#define MAX_PENDING_REQS 256 ++ ++struct xen_netbk { ++ struct tasklet_struct net_tx_tasklet; ++ struct tasklet_struct net_rx_tasklet; ++ ++ struct sk_buff_head rx_queue; ++ struct sk_buff_head tx_queue; ++ ++ struct timer_list net_timer; ++ struct timer_list netbk_tx_pending_timer; ++ ++ struct page **mmap_pages; ++ ++ pending_ring_idx_t pending_prod; ++ pending_ring_idx_t pending_cons; ++ pending_ring_idx_t dealloc_prod; ++ pending_ring_idx_t dealloc_cons; ++ ++ struct list_head pending_inuse_head; ++ struct list_head net_schedule_list; ++ ++ /* Protect the net_schedule_list in netif. */ ++ spinlock_t net_schedule_list_lock; ++ ++ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; ++ struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS]; ++ struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; ++ struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS]; ++ ++ grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; ++ u16 pending_ring[MAX_PENDING_REQS]; ++ u16 dealloc_ring[MAX_PENDING_REQS]; ++ ++ struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3]; ++ struct mmu_update rx_mmu[NET_RX_RING_SIZE]; ++ struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE]; ++ struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE]; ++ unsigned char rx_notify[NR_IRQS]; ++ u16 notify_list[NET_RX_RING_SIZE]; ++ struct netbk_rx_meta meta[NET_RX_RING_SIZE]; ++}; ++ + #endif /* __NETIF__BACKEND__COMMON_H__ */ +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 725da0f..417f497 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -48,16 +48,7 @@ + + /*define NETBE_DEBUG_INTERRUPT*/ + +-struct netbk_rx_meta { +- skb_frag_t frag; +- int id; +-}; +- +-struct netbk_tx_pending_inuse { +- struct list_head list; +- unsigned long alloc_time; +-}; +- ++static struct xen_netbk *netbk; + + static void netif_idx_release(u16 pending_idx); + static void make_tx_response(struct xen_netif *netif, +@@ -71,22 +62,12 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif, + u16 flags); + + static void net_tx_action(unsigned long unused); +-static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0); + + static void net_rx_action(unsigned long unused); +-static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0); +- +-static struct timer_list net_timer; +-static struct timer_list netbk_tx_pending_timer; + +-#define MAX_PENDING_REQS 256 +- +-static struct sk_buff_head rx_queue; +- +-static struct page **mmap_pages; + static inline unsigned long idx_to_pfn(unsigned int idx) + { +- return page_to_pfn(mmap_pages[idx]); ++ return page_to_pfn(netbk->mmap_pages[idx]); + } + + static inline unsigned long idx_to_kaddr(unsigned int idx) +@@ -107,7 +88,7 @@ static inline int netif_page_index(struct page *pg) + if (!PageForeign(pg)) + return -1; + +- if ((idx >= MAX_PENDING_REQS) || (mmap_pages[idx] != pg)) ++ if ((idx >= MAX_PENDING_REQS) || (netbk->mmap_pages[idx] != pg)) + return -1; + + return idx; +@@ -121,46 +102,17 @@ static inline int netif_page_index(struct page *pg) + */ + #define PKT_PROT_LEN 72 + +-static struct pending_tx_info { +- struct xen_netif_tx_request req; +- struct xen_netif *netif; +-} pending_tx_info[MAX_PENDING_REQS]; +-static u16 pending_ring[MAX_PENDING_REQS]; +-typedef unsigned int pending_ring_idx_t; +- + static inline pending_ring_idx_t pending_index(unsigned i) + { + return i & (MAX_PENDING_REQS-1); + } + +-static pending_ring_idx_t pending_prod, pending_cons; +- + static inline pending_ring_idx_t nr_pending_reqs(void) + { +- return MAX_PENDING_REQS - pending_prod + pending_cons; ++ return MAX_PENDING_REQS - ++ netbk->pending_prod + netbk->pending_cons; + } + +-/* Freed TX SKBs get batched on this ring before return to pending_ring. */ +-static u16 dealloc_ring[MAX_PENDING_REQS]; +-static pending_ring_idx_t dealloc_prod, dealloc_cons; +- +-/* Doubly-linked list of in-use pending entries. */ +-static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS]; +-static LIST_HEAD(pending_inuse_head); +- +-static struct sk_buff_head tx_queue; +- +-static grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; +-static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; +-static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS]; +- +-static LIST_HEAD(net_schedule_list); +-static DEFINE_SPINLOCK(net_schedule_list_lock); +- +-#define MAX_MFN_ALLOC 64 +-static unsigned long mfn_list[MAX_MFN_ALLOC]; +-static unsigned int alloc_index = 0; +- + /* Setting this allows the safe use of this driver without netloop. */ + static int MODPARM_copy_skb = 1; + module_param_named(copy_skb, MODPARM_copy_skb, bool, 0); +@@ -168,18 +120,12 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop"); + + int netbk_copy_skb_mode; + +-static inline unsigned long alloc_mfn(void) +-{ +- BUG_ON(alloc_index == 0); +- return mfn_list[--alloc_index]; +-} +- + static inline void maybe_schedule_tx_action(void) + { + smp_mb(); + if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) && +- !list_empty(&net_schedule_list)) +- tasklet_schedule(&net_tx_tasklet); ++ !list_empty(&netbk->net_schedule_list)) ++ tasklet_schedule(&netbk->net_tx_tasklet); + } + + static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) +@@ -328,9 +274,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2); + } + } +- +- skb_queue_tail(&rx_queue, skb); +- tasklet_schedule(&net_rx_tasklet); ++ skb_queue_tail(&netbk->rx_queue, skb); ++ tasklet_schedule(&netbk->net_rx_tasklet); + + return 0; + +@@ -372,7 +317,7 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta, + copy_gop = npo->copy + npo->copy_prod++; + copy_gop->flags = GNTCOPY_dest_gref; + if (idx > -1) { +- struct pending_tx_info *src_pend = &pending_tx_info[idx]; ++ struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx]; + copy_gop->source.domid = src_pend->netif->domid; + copy_gop->source.u.ref = src_pend->req.gref; + copy_gop->flags |= GNTCOPY_source_gref; +@@ -487,30 +432,19 @@ static void net_rx_action(unsigned long unused) + int count; + unsigned long offset; + +- /* +- * Putting hundreds of bytes on the stack is considered rude. +- * Static works because a tasklet can only be on one CPU at any time. +- */ +- static struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3]; +- static struct mmu_update rx_mmu[NET_RX_RING_SIZE]; +- static struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE]; +- static struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE]; +- static unsigned char rx_notify[NR_IRQS]; +- static u16 notify_list[NET_RX_RING_SIZE]; +- static struct netbk_rx_meta meta[NET_RX_RING_SIZE]; +- + struct netrx_pending_operations npo = { +- mmu: rx_mmu, +- trans: grant_trans_op, +- copy: grant_copy_op, +- mcl: rx_mcl, +- meta: meta}; ++ .mmu = netbk->rx_mmu, ++ .trans = netbk->grant_trans_op, ++ .copy = netbk->grant_copy_op, ++ .mcl = netbk->rx_mcl, ++ .meta = netbk->meta, ++ }; + + skb_queue_head_init(&rxq); + + count = 0; + +- while ((skb = skb_dequeue(&rx_queue)) != NULL) { ++ while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) { + nr_frags = skb_shinfo(skb)->nr_frags; + *(int *)skb->cb = nr_frags; + +@@ -525,39 +459,39 @@ static void net_rx_action(unsigned long unused) + break; + } + +- BUG_ON(npo.meta_prod > ARRAY_SIZE(meta)); ++ BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta)); + + npo.mmu_mcl = npo.mcl_prod; + if (npo.mcl_prod) { + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); +- BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu)); ++ BUG_ON(npo.mmu_prod > ARRAY_SIZE(netbk->rx_mmu)); + mcl = npo.mcl + npo.mcl_prod++; + + BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping); + mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; + + mcl->op = __HYPERVISOR_mmu_update; +- mcl->args[0] = (unsigned long)rx_mmu; ++ mcl->args[0] = (unsigned long)netbk->rx_mmu; + mcl->args[1] = npo.mmu_prod; + mcl->args[2] = 0; + mcl->args[3] = DOMID_SELF; + } + + if (npo.trans_prod) { +- BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op)); ++ BUG_ON(npo.trans_prod > ARRAY_SIZE(netbk->grant_trans_op)); + mcl = npo.mcl + npo.mcl_prod++; + mcl->op = __HYPERVISOR_grant_table_op; + mcl->args[0] = GNTTABOP_transfer; +- mcl->args[1] = (unsigned long)grant_trans_op; ++ mcl->args[1] = (unsigned long)netbk->grant_trans_op; + mcl->args[2] = npo.trans_prod; + } + + if (npo.copy_prod) { +- BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op)); ++ BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op)); + mcl = npo.mcl + npo.mcl_prod++; + mcl->op = __HYPERVISOR_grant_table_op; + mcl->args[0] = GNTTABOP_copy; +- mcl->args[1] = (unsigned long)grant_copy_op; ++ mcl->args[1] = (unsigned long)netbk->grant_copy_op; + mcl->args[2] = npo.copy_prod; + } + +@@ -565,7 +499,7 @@ static void net_rx_action(unsigned long unused) + if (!npo.mcl_prod) + return; + +- BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl)); ++ BUG_ON(npo.mcl_prod > ARRAY_SIZE(netbk->rx_mcl)); + + ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod); + BUG_ON(ret != 0); +@@ -582,7 +516,7 @@ static void net_rx_action(unsigned long unused) + + status = netbk_check_gop(nr_frags, netif->domid, &npo); + +- id = meta[npo.meta_cons].id; ++ id = netbk->meta[npo.meta_cons].id; + flags = nr_frags ? NETRXF_more_data : 0; + + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ +@@ -595,7 +529,7 @@ static void net_rx_action(unsigned long unused) + resp = make_rx_response(netif, id, status, offset, + skb_headlen(skb), flags); + +- if (meta[npo.meta_cons].frag.size) { ++ if (netbk->meta[npo.meta_cons].frag.size) { + struct xen_netif_extra_info *gso = + (struct xen_netif_extra_info *) + RING_GET_RESPONSE(&netif->rx, +@@ -603,7 +537,7 @@ static void net_rx_action(unsigned long unused) + + resp->flags |= NETRXF_extra_info; + +- gso->u.gso.size = meta[npo.meta_cons].frag.size; ++ gso->u.gso.size = netbk->meta[npo.meta_cons].frag.size; + gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; + gso->u.gso.pad = 0; + gso->u.gso.features = 0; +@@ -613,14 +547,14 @@ static void net_rx_action(unsigned long unused) + } + + netbk_add_frag_responses(netif, status, +- meta + npo.meta_cons + 1, +- nr_frags); ++ netbk->meta + npo.meta_cons + 1, ++ nr_frags); + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret); + irq = netif->irq; +- if (ret && !rx_notify[irq]) { +- rx_notify[irq] = 1; +- notify_list[notify_nr++] = irq; ++ if (ret && !netbk->rx_notify[irq]) { ++ netbk->rx_notify[irq] = 1; ++ netbk->notify_list[notify_nr++] = irq; + } + + if (netif_queue_stopped(netif->dev) && +@@ -634,24 +568,25 @@ static void net_rx_action(unsigned long unused) + } + + while (notify_nr != 0) { +- irq = notify_list[--notify_nr]; +- rx_notify[irq] = 0; ++ irq = netbk->notify_list[--notify_nr]; ++ netbk->rx_notify[irq] = 0; + notify_remote_via_irq(irq); + } + + /* More work to do? */ +- if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer)) +- tasklet_schedule(&net_rx_tasklet); ++ if (!skb_queue_empty(&netbk->rx_queue) && ++ !timer_pending(&netbk->net_timer)) ++ tasklet_schedule(&netbk->net_rx_tasklet); + } + + static void net_alarm(unsigned long unused) + { +- tasklet_schedule(&net_rx_tasklet); ++ tasklet_schedule(&netbk->net_rx_tasklet); + } + + static void netbk_tx_pending_timeout(unsigned long unused) + { +- tasklet_schedule(&net_tx_tasklet); ++ tasklet_schedule(&netbk->net_tx_tasklet); + } + + struct net_device_stats *netif_be_get_stats(struct net_device *dev) +@@ -667,12 +602,12 @@ static int __on_net_schedule_list(struct xen_netif *netif) + + static void remove_from_net_schedule_list(struct xen_netif *netif) + { +- spin_lock_irq(&net_schedule_list_lock); ++ spin_lock_irq(&netbk->net_schedule_list_lock); + if (likely(__on_net_schedule_list(netif))) { + list_del_init(&netif->list); + netif_put(netif); + } +- spin_unlock_irq(&net_schedule_list_lock); ++ spin_unlock_irq(&netbk->net_schedule_list_lock); + } + + static void add_to_net_schedule_list_tail(struct xen_netif *netif) +@@ -680,13 +615,13 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif) + if (__on_net_schedule_list(netif)) + return; + +- spin_lock_irq(&net_schedule_list_lock); ++ spin_lock_irq(&netbk->net_schedule_list_lock); + if (!__on_net_schedule_list(netif) && + likely(netif_schedulable(netif))) { +- list_add_tail(&netif->list, &net_schedule_list); ++ list_add_tail(&netif->list, &netbk->net_schedule_list); + netif_get(netif); + } +- spin_unlock_irq(&net_schedule_list_lock); ++ spin_unlock_irq(&netbk->net_schedule_list_lock); + } + + void netif_schedule_work(struct xen_netif *netif) +@@ -736,8 +671,9 @@ static void tx_credit_callback(unsigned long data) + + static inline int copy_pending_req(pending_ring_idx_t pending_idx) + { +- return gnttab_copy_grant_page(grant_tx_handle[pending_idx], +- &mmap_pages[pending_idx]); ++ return gnttab_copy_grant_page( ++ netbk->grant_tx_handle[pending_idx], ++ &netbk->mmap_pages[pending_idx]); + } + + inline static void net_tx_action_dealloc(void) +@@ -750,22 +686,24 @@ inline static void net_tx_action_dealloc(void) + int ret; + LIST_HEAD(list); + +- dc = dealloc_cons; +- gop = tx_unmap_ops; ++ dc = netbk->dealloc_cons; ++ gop = netbk->tx_unmap_ops; + + /* + * Free up any grants we have finished using + */ + do { +- dp = dealloc_prod; ++ dp = netbk->dealloc_prod; + + /* Ensure we see all indices enqueued by netif_idx_release(). */ + smp_rmb(); + + while (dc != dp) { + unsigned long pfn; ++ struct netbk_tx_pending_inuse *pending_inuse = ++ netbk->pending_inuse; + +- pending_idx = dealloc_ring[pending_index(dc++)]; ++ pending_idx = netbk->dealloc_ring[pending_index(dc++)]; + list_move_tail(&pending_inuse[pending_idx].list, &list); + + pfn = idx_to_pfn(pending_idx); +@@ -773,22 +711,27 @@ inline static void net_tx_action_dealloc(void) + if (!phys_to_machine_mapping_valid(pfn)) + continue; + +- gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx), +- GNTMAP_host_map, +- grant_tx_handle[pending_idx]); ++ gnttab_set_unmap_op(gop, ++ idx_to_kaddr(pending_idx), ++ GNTMAP_host_map, ++ netbk->grant_tx_handle[pending_idx]); + gop++; + } + + if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB || +- list_empty(&pending_inuse_head)) ++ list_empty(&netbk->pending_inuse_head)) + break; + + /* Copy any entries that have been pending for too long. */ +- list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) { ++ list_for_each_entry_safe(inuse, n, ++ &netbk->pending_inuse_head, list) { ++ struct pending_tx_info *pending_tx_info; ++ pending_tx_info = netbk->pending_tx_info; ++ + if (time_after(inuse->alloc_time + HZ / 2, jiffies)) + break; + +- pending_idx = inuse - pending_inuse; ++ pending_idx = inuse - netbk->pending_inuse; + + pending_tx_info[pending_idx].netif->nr_copied_skbs++; + +@@ -805,16 +748,21 @@ inline static void net_tx_action_dealloc(void) + + break; + } +- } while (dp != dealloc_prod); ++ } while (dp != netbk->dealloc_prod); + +- dealloc_cons = dc; ++ netbk->dealloc_cons = dc; + + ret = HYPERVISOR_grant_table_op( +- GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops); ++ GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops, ++ gop - netbk->tx_unmap_ops); + BUG_ON(ret); + + list_for_each_entry_safe(inuse, n, &list, list) { +- pending_idx = inuse - pending_inuse; ++ struct pending_tx_info *pending_tx_info; ++ pending_ring_idx_t index; ++ ++ pending_tx_info = netbk->pending_tx_info; ++ pending_idx = inuse - netbk->pending_inuse; + + netif = pending_tx_info[pending_idx].netif; + +@@ -822,9 +770,10 @@ inline static void net_tx_action_dealloc(void) + NETIF_RSP_OKAY); + + /* Ready for next use. */ +- gnttab_reset_grant_page(mmap_pages[pending_idx]); ++ gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]); + +- pending_ring[pending_index(pending_prod++)] = pending_idx; ++ index = pending_index(netbk->pending_prod++); ++ netbk->pending_ring[index] = pending_idx; + + netif_put(netif); + +@@ -832,7 +781,8 @@ inline static void net_tx_action_dealloc(void) + } + } + +-static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *txp, RING_IDX end) ++static void netbk_tx_err(struct xen_netif *netif, ++ struct xen_netif_tx_request *txp, RING_IDX end) + { + RING_IDX cons = netif->tx.req_cons; + +@@ -902,7 +852,12 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif, + start = ((unsigned long)shinfo->frags[0].page == pending_idx); + + for (i = start; i < shinfo->nr_frags; i++, txp++) { +- pending_idx = pending_ring[pending_index(pending_cons++)]; ++ pending_ring_idx_t index; ++ struct pending_tx_info *pending_tx_info = ++ netbk->pending_tx_info; ++ ++ index = pending_index(netbk->pending_cons++); ++ pending_idx = netbk->pending_ring[index]; + + gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx), + GNTMAP_host_map | GNTMAP_readonly, +@@ -922,6 +877,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb, + { + struct gnttab_map_grant_ref *mop = *mopp; + int pending_idx = *((u16 *)skb->data); ++ struct pending_tx_info *pending_tx_info = netbk->pending_tx_info; + struct xen_netif *netif = pending_tx_info[pending_idx].netif; + struct xen_netif_tx_request *txp; + struct skb_shared_info *shinfo = skb_shinfo(skb); +@@ -931,15 +887,17 @@ static int netbk_tx_check_mop(struct sk_buff *skb, + /* Check status of header. */ + err = mop->status; + if (unlikely(err)) { ++ pending_ring_idx_t index; ++ index = pending_index(netbk->pending_prod++); + txp = &pending_tx_info[pending_idx].req; + make_tx_response(netif, txp, NETIF_RSP_ERROR); +- pending_ring[pending_index(pending_prod++)] = pending_idx; ++ netbk->pending_ring[index] = pending_idx; + netif_put(netif); + } else { + set_phys_to_machine( + __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT, + FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); +- grant_tx_handle[pending_idx] = mop->handle; ++ netbk->grant_tx_handle[pending_idx] = mop->handle; + } + + /* Skip first skb fragment if it is on same page as header fragment. */ +@@ -947,16 +905,19 @@ static int netbk_tx_check_mop(struct sk_buff *skb, + + for (i = start; i < nr_frags; i++) { + int j, newerr; ++ pending_ring_idx_t index; + + pending_idx = (unsigned long)shinfo->frags[i].page; + + /* Check error status: if okay then remember grant handle. */ + newerr = (++mop)->status; + if (likely(!newerr)) { ++ unsigned long addr; ++ addr = idx_to_kaddr(pending_idx); + set_phys_to_machine( +- __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT, ++ __pa(addr)>>PAGE_SHIFT, + FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT)); +- grant_tx_handle[pending_idx] = mop->handle; ++ netbk->grant_tx_handle[pending_idx] = mop->handle; + /* Had a previous error? Invalidate this fragment. */ + if (unlikely(err)) + netif_idx_release(pending_idx); +@@ -964,9 +925,10 @@ static int netbk_tx_check_mop(struct sk_buff *skb, + } + + /* Error on this fragment: respond to client with an error. */ +- txp = &pending_tx_info[pending_idx].req; ++ txp = &netbk->pending_tx_info[pending_idx].req; + make_tx_response(netif, txp, NETIF_RSP_ERROR); +- pending_ring[pending_index(pending_prod++)] = pending_idx; ++ index = pending_index(netbk->pending_prod++); ++ netbk->pending_ring[index] = pending_idx; + netif_put(netif); + + /* Not the first error? Preceding frags already invalidated. */ +@@ -1002,11 +964,11 @@ static void netbk_fill_frags(struct sk_buff *skb) + + pending_idx = (unsigned long)frag->page; + +- pending_inuse[pending_idx].alloc_time = jiffies; +- list_add_tail(&pending_inuse[pending_idx].list, +- &pending_inuse_head); ++ netbk->pending_inuse[pending_idx].alloc_time = jiffies; ++ list_add_tail(&netbk->pending_inuse[pending_idx].list, ++ &netbk->pending_inuse_head); + +- txp = &pending_tx_info[pending_idx].req; ++ txp = &netbk->pending_tx_info[pending_idx].req; + frag->page = virt_to_page(idx_to_kaddr(pending_idx)); + frag->size = txp->size; + frag->page_offset = txp->offset; +@@ -1145,9 +1107,9 @@ static unsigned net_tx_build_mops(void) + struct sk_buff *skb; + int ret; + +- mop = tx_map_ops; ++ mop = netbk->tx_map_ops; + while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && +- !list_empty(&net_schedule_list)) { ++ !list_empty(&netbk->net_schedule_list)) { + struct xen_netif *netif; + struct xen_netif_tx_request txreq; + struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; +@@ -1156,9 +1118,11 @@ static unsigned net_tx_build_mops(void) + RING_IDX idx; + int work_to_do; + unsigned int data_len; ++ pending_ring_idx_t index; + + /* Get a netif from the list with work to do. */ +- netif = list_first_entry(&net_schedule_list, struct xen_netif, list); ++ netif = list_first_entry(&netbk->net_schedule_list, ++ struct xen_netif, list); + netif_get(netif); + remove_from_net_schedule_list(netif); + +@@ -1217,7 +1181,8 @@ static unsigned net_tx_build_mops(void) + continue; + } + +- pending_idx = pending_ring[pending_index(pending_cons)]; ++ index = pending_index(netbk->pending_cons); ++ pending_idx = netbk->pending_ring[index]; + + data_len = (txreq.size > PKT_PROT_LEN && + ret < MAX_SKB_FRAGS) ? +@@ -1250,9 +1215,9 @@ static unsigned net_tx_build_mops(void) + txreq.gref, netif->domid); + mop++; + +- memcpy(&pending_tx_info[pending_idx].req, ++ memcpy(&netbk->pending_tx_info[pending_idx].req, + &txreq, sizeof(txreq)); +- pending_tx_info[pending_idx].netif = netif; ++ netbk->pending_tx_info[pending_idx].netif = netif; + *((u16 *)skb->data) = pending_idx; + + __skb_put(skb, data_len); +@@ -1267,20 +1232,20 @@ static unsigned net_tx_build_mops(void) + skb_shinfo(skb)->frags[0].page = (void *)~0UL; + } + +- __skb_queue_tail(&tx_queue, skb); ++ __skb_queue_tail(&netbk->tx_queue, skb); + +- pending_cons++; ++ netbk->pending_cons++; + + mop = netbk_get_requests(netif, skb, txfrags, mop); + + netif->tx.req_cons = idx; + netif_schedule_work(netif); + +- if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops)) ++ if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops)) + break; + } + +- return mop - tx_map_ops; ++ return mop - netbk->tx_map_ops; + } + + static void net_tx_submit(void) +@@ -1288,16 +1253,16 @@ static void net_tx_submit(void) + struct gnttab_map_grant_ref *mop; + struct sk_buff *skb; + +- mop = tx_map_ops; +- while ((skb = __skb_dequeue(&tx_queue)) != NULL) { ++ mop = netbk->tx_map_ops; ++ while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) { + struct xen_netif_tx_request *txp; + struct xen_netif *netif; + u16 pending_idx; + unsigned data_len; + + pending_idx = *((u16 *)skb->data); +- netif = pending_tx_info[pending_idx].netif; +- txp = &pending_tx_info[pending_idx].req; ++ netif = netbk->pending_tx_info[pending_idx].netif; ++ txp = &netbk->pending_tx_info[pending_idx].req; + + /* Check the remap error code. */ + if (unlikely(netbk_tx_check_mop(skb, &mop))) { +@@ -1363,12 +1328,13 @@ static void net_tx_submit(void) + } + + if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && +- !list_empty(&pending_inuse_head)) { ++ !list_empty(&netbk->pending_inuse_head)) { + struct netbk_tx_pending_inuse *oldest; + +- oldest = list_entry(pending_inuse_head.next, ++ oldest = list_entry(netbk->pending_inuse_head.next, + struct netbk_tx_pending_inuse, list); +- mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ); ++ mod_timer(&netbk->netbk_tx_pending_timer, ++ oldest->alloc_time + HZ); + } + } + +@@ -1378,7 +1344,7 @@ static void net_tx_action(unsigned long unused) + unsigned nr_mops; + int ret; + +- if (dealloc_cons != dealloc_prod) ++ if (netbk->dealloc_cons != netbk->dealloc_prod) + net_tx_action_dealloc(); + + nr_mops = net_tx_build_mops(); +@@ -1387,7 +1353,7 @@ static void net_tx_action(unsigned long unused) + return; + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, +- tx_map_ops, nr_mops); ++ netbk->tx_map_ops, nr_mops); + BUG_ON(ret); + + net_tx_submit(); +@@ -1397,15 +1363,17 @@ static void netif_idx_release(u16 pending_idx) + { + static DEFINE_SPINLOCK(_lock); + unsigned long flags; ++ pending_ring_idx_t index; + + spin_lock_irqsave(&_lock, flags); +- dealloc_ring[pending_index(dealloc_prod)] = pending_idx; ++ index = pending_index(netbk->dealloc_prod); ++ netbk->dealloc_ring[index] = pending_idx; + /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */ + smp_wmb(); +- dealloc_prod++; ++ netbk->dealloc_prod++; + spin_unlock_irqrestore(&_lock, flags); + +- tasklet_schedule(&net_tx_tasklet); ++ tasklet_schedule(&netbk->net_tx_tasklet); + } + + static void netif_page_release(struct page *page, unsigned int order) +@@ -1481,9 +1449,9 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs) + int i = 0; + + printk(KERN_ALERT "netif_schedule_list:\n"); +- spin_lock_irq(&net_schedule_list_lock); ++ spin_lock_irq(&netbk->net_schedule_list_lock); + +- list_for_each (ent, &net_schedule_list) { ++ list_for_each(ent, &netbk->net_schedule_list) { + netif = list_entry(ent, struct xen_netif, list); + printk(KERN_ALERT " %d: private(rx_req_cons=%08x " + "rx_resp_prod=%08x\n", +@@ -1500,7 +1468,7 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs) + i++; + } + +- spin_unlock_irq(&net_schedule_list_lock); ++ spin_unlock_irq(&netbk->net_schedule_list_lock); + printk(KERN_ALERT " ** End of netif_schedule_list **\n"); + + return IRQ_HANDLED; +@@ -1516,37 +1484,53 @@ static int __init netback_init(void) + if (!xen_domain()) + return -ENODEV; + ++ netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk)); ++ if (!netbk) { ++ printk(KERN_ALERT "%s: out of memory\n", __func__); ++ return -ENOMEM; ++ } ++ + /* We can increase reservation by this much in net_rx_action(). */ + // balloon_update_driver_allowance(NET_RX_RING_SIZE); + +- skb_queue_head_init(&rx_queue); +- skb_queue_head_init(&tx_queue); ++ skb_queue_head_init(&netbk->rx_queue); ++ skb_queue_head_init(&netbk->tx_queue); + +- init_timer(&net_timer); +- net_timer.data = 0; +- net_timer.function = net_alarm; ++ init_timer(&netbk->net_timer); ++ netbk->net_timer.data = 0; ++ netbk->net_timer.function = net_alarm; + +- init_timer(&netbk_tx_pending_timer); +- netbk_tx_pending_timer.data = 0; +- netbk_tx_pending_timer.function = netbk_tx_pending_timeout; ++ init_timer(&netbk->netbk_tx_pending_timer); ++ netbk->netbk_tx_pending_timer.data = 0; ++ netbk->netbk_tx_pending_timer.function = netbk_tx_pending_timeout; + +- mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS); +- if (mmap_pages == NULL) { +- printk("%s: out of memory\n", __FUNCTION__); +- return -ENOMEM; ++ netbk->mmap_pages = ++ alloc_empty_pages_and_pagevec(MAX_PENDING_REQS); ++ if (!netbk->mmap_pages) { ++ printk(KERN_ALERT "%s: out of memory\n", __func__); ++ rc = -ENOMEM; ++ goto failed_init2; + } + + for (i = 0; i < MAX_PENDING_REQS; i++) { +- page = mmap_pages[i]; ++ page = netbk->mmap_pages[i]; + SetPageForeign(page, netif_page_release); + netif_set_page_index(page, i); +- INIT_LIST_HEAD(&pending_inuse[i].list); ++ INIT_LIST_HEAD(&netbk->pending_inuse[i].list); + } + +- pending_cons = 0; +- pending_prod = MAX_PENDING_REQS; ++ netbk->pending_cons = 0; ++ netbk->pending_prod = MAX_PENDING_REQS; + for (i = 0; i < MAX_PENDING_REQS; i++) +- pending_ring[i] = i; ++ netbk->pending_ring[i] = i; ++ ++ tasklet_init(&netbk->net_tx_tasklet, net_tx_action, 0); ++ tasklet_init(&netbk->net_rx_tasklet, net_rx_action, 0); ++ ++ INIT_LIST_HEAD(&netbk->pending_inuse_head); ++ INIT_LIST_HEAD(&netbk->net_schedule_list); ++ ++ spin_lock_init(&netbk->net_schedule_list_lock); + + netbk_copy_skb_mode = NETBK_DONT_COPY_SKB; + if (MODPARM_copy_skb) { +@@ -1561,7 +1545,7 @@ static int __init netback_init(void) + + rc = netif_xenbus_init(); + if (rc) +- goto failed_init; ++ goto failed_init1; + + #ifdef NETBE_DEBUG_INTERRUPT + (void)bind_virq_to_irqhandler(VIRQ_DEBUG, +@@ -1574,10 +1558,12 @@ static int __init netback_init(void) + + return 0; + +-failed_init: +- free_empty_pages_and_pagevec(mmap_pages, MAX_PENDING_REQS); +- del_timer(&netbk_tx_pending_timer); +- del_timer(&net_timer); ++failed_init1: ++ free_empty_pages_and_pagevec(netbk->mmap_pages, MAX_PENDING_REQS); ++failed_init2: ++ del_timer(&netbk->netbk_tx_pending_timer); ++ del_timer(&netbk->net_timer); ++ vfree(netbk); + return rc; + + } +-- +1.7.3.4 + + +From c099c22d8b1c12fc7d68998982eb4ccd4918e813 Mon Sep 17 00:00:00 2001 +From: Dongxiao Xu +Date: Wed, 19 May 2010 16:58:57 -0700 +Subject: [PATCH 088/139] xen: netback: Introduce a new struct type page_ext. + +struct page_ext is used to store the group and idx information by +which a specified page could be identified. + +Signed-off-by: Dongxiao Xu +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/common.h | 15 +++++++++++++++ + drivers/xen/netback/netback.c | 28 +++++++++++++++++----------- + 2 files changed, 32 insertions(+), 11 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 00208f4..5e0e467 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -240,6 +240,21 @@ struct netbk_tx_pending_inuse { + + #define MAX_PENDING_REQS 256 + ++/* extra field used in struct page */ ++union page_ext { ++ struct { ++#if BITS_PER_LONG < 64 ++#define IDX_WIDTH 8 ++#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH) ++ unsigned int group:GROUP_WIDTH; ++ unsigned int idx:IDX_WIDTH; ++#else ++ unsigned int group, idx; ++#endif ++ } e; ++ void *mapping; ++}; ++ + struct xen_netbk { + struct tasklet_struct net_tx_tasklet; + struct tasklet_struct net_rx_tasklet; +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 417f497..71ec999 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -76,22 +76,27 @@ static inline unsigned long idx_to_kaddr(unsigned int idx) + } + + /* extra field used in struct page */ +-static inline void netif_set_page_index(struct page *pg, unsigned int index) ++static inline void netif_set_page_ext(struct page *pg, unsigned int group, ++ unsigned int idx) + { +- *(unsigned long *)&pg->mapping = index + 1; ++ union page_ext ext = { .e = { .group = group + 1, .idx = idx } }; ++ ++ BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping)); ++ pg->mapping = ext.mapping; + } + +-static inline int netif_page_index(struct page *pg) ++static inline unsigned int netif_page_group(const struct page *pg) + { +- unsigned long idx = (unsigned long)pg->mapping - 1; ++ union page_ext ext = { .mapping = pg->mapping }; + +- if (!PageForeign(pg)) +- return -1; ++ return ext.e.group - 1; ++} + +- if ((idx >= MAX_PENDING_REQS) || (netbk->mmap_pages[idx] != pg)) +- return -1; ++static inline unsigned int netif_page_index(const struct page *pg) ++{ ++ union page_ext ext = { .mapping = pg->mapping }; + +- return idx; ++ return ext.e.idx; + } + + /* +@@ -1380,7 +1385,8 @@ static void netif_page_release(struct page *page, unsigned int order) + { + int idx = netif_page_index(page); + BUG_ON(order); +- BUG_ON(idx < 0); ++ BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS); ++ BUG_ON(netbk->mmap_pages[idx] != page); + netif_idx_release(idx); + } + +@@ -1515,7 +1521,7 @@ static int __init netback_init(void) + for (i = 0; i < MAX_PENDING_REQS; i++) { + page = netbk->mmap_pages[i]; + SetPageForeign(page, netif_page_release); +- netif_set_page_index(page, i); ++ netif_set_page_ext(page, 0, i); + INIT_LIST_HEAD(&netbk->pending_inuse[i].list); + } + +-- +1.7.3.4 + + +From 9534985c5b9cc3f6238d6cb8bba7d376e82039d3 Mon Sep 17 00:00:00 2001 +From: Dongxiao Xu +Date: Wed, 19 May 2010 17:08:21 -0700 +Subject: [PATCH 089/139] xen: netback: Multiple tasklets support. + +Now netback uses one pair of tasklets for Tx/Rx data transaction. +Netback tasklet could only run at one CPU at a time, and it is +used to serve all the netfronts. Therefore it has become a +performance bottle neck. This patch is to use multiple tasklet +pairs to replace the current single pair in dom0. + +Assuming that Dom0 has CPUNR VCPUs, we define CPUNR kinds of +tasklets pair (CPUNR for Tx, and CPUNR for Rx). Each pare of +tasklets serve specific group of netfronts. Also for those global +and static variables, we duplicated them for each group in +order to avoid the spinlock. + +Signed-off-by: Dongxiao Xu +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/common.h | 6 + + drivers/xen/netback/interface.c | 27 ++++ + drivers/xen/netback/netback.c | 270 ++++++++++++++++++++++++--------------- + 3 files changed, 197 insertions(+), 106 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 5e0e467..847ba58 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -58,6 +58,7 @@ + struct xen_netif { + /* Unique identifier for this interface. */ + domid_t domid; ++ int group; + unsigned int handle; + + u8 fe_dev_addr[6]; +@@ -278,6 +279,8 @@ struct xen_netbk { + /* Protect the net_schedule_list in netif. */ + spinlock_t net_schedule_list_lock; + ++ atomic_t netfront_count; ++ + struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; + struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS]; + struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; +@@ -296,4 +299,7 @@ struct xen_netbk { + struct netbk_rx_meta meta[NET_RX_RING_SIZE]; + }; + ++extern struct xen_netbk *xen_netbk; ++extern int xen_netbk_group_nr; ++ + #endif /* __NETIF__BACKEND__COMMON_H__ */ +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index 086d939..172ef4c 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -54,8 +54,33 @@ + static unsigned long netbk_queue_length = 32; + module_param_named(queue_length, netbk_queue_length, ulong, 0644); + ++static void netbk_add_netif(struct xen_netbk *netbk, int group_nr, ++ struct xen_netif *netif) ++{ ++ int i; ++ int min_netfront_count; ++ int min_group = 0; ++ min_netfront_count = atomic_read(&netbk[0].netfront_count); ++ for (i = 0; i < group_nr; i++) { ++ int netfront_count = atomic_read(&netbk[i].netfront_count); ++ if (netfront_count < min_netfront_count) { ++ min_group = i; ++ min_netfront_count = netfront_count; ++ } ++ } ++ ++ netif->group = min_group; ++ atomic_inc(&netbk[netif->group].netfront_count); ++} ++ ++static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif) ++{ ++ atomic_dec(&netbk[netif->group].netfront_count); ++} ++ + static void __netif_up(struct xen_netif *netif) + { ++ netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif); + enable_irq(netif->irq); + netif_schedule_work(netif); + } +@@ -64,6 +89,7 @@ static void __netif_down(struct xen_netif *netif) + { + disable_irq(netif->irq); + netif_deschedule_work(netif); ++ netbk_remove_netif(xen_netbk, netif); + } + + static int net_open(struct net_device *dev) +@@ -214,6 +240,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int + netif = netdev_priv(dev); + memset(netif, 0, sizeof(*netif)); + netif->domid = domid; ++ netif->group = -1; + netif->handle = handle; + netif->features = NETIF_F_SG; + atomic_set(&netif->refcnt, 1); +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 71ec999..feefb14 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -48,9 +48,10 @@ + + /*define NETBE_DEBUG_INTERRUPT*/ + +-static struct xen_netbk *netbk; ++struct xen_netbk *xen_netbk; ++int xen_netbk_group_nr; + +-static void netif_idx_release(u16 pending_idx); ++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx); + static void make_tx_response(struct xen_netif *netif, + struct xen_netif_tx_request *txp, + s8 st); +@@ -61,18 +62,20 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif, + u16 size, + u16 flags); + +-static void net_tx_action(unsigned long unused); ++static void net_tx_action(unsigned long data); + +-static void net_rx_action(unsigned long unused); ++static void net_rx_action(unsigned long data); + +-static inline unsigned long idx_to_pfn(unsigned int idx) ++static inline unsigned long idx_to_pfn(struct xen_netbk *netbk, ++ unsigned int idx) + { + return page_to_pfn(netbk->mmap_pages[idx]); + } + +-static inline unsigned long idx_to_kaddr(unsigned int idx) ++static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk, ++ unsigned int idx) + { +- return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx)); ++ return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx)); + } + + /* extra field used in struct page */ +@@ -112,7 +115,7 @@ static inline pending_ring_idx_t pending_index(unsigned i) + return i & (MAX_PENDING_REQS-1); + } + +-static inline pending_ring_idx_t nr_pending_reqs(void) ++static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk) + { + return MAX_PENDING_REQS - + netbk->pending_prod + netbk->pending_cons; +@@ -125,10 +128,10 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop"); + + int netbk_copy_skb_mode; + +-static inline void maybe_schedule_tx_action(void) ++static inline void maybe_schedule_tx_action(struct xen_netbk *netbk) + { + smp_mb(); +- if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) && ++ if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) && + !list_empty(&netbk->net_schedule_list)) + tasklet_schedule(&netbk->net_tx_tasklet); + } +@@ -235,9 +238,15 @@ static void tx_queue_callback(unsigned long data) + int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct xen_netif *netif = netdev_priv(dev); ++ struct xen_netbk *netbk; + + BUG_ON(skb->dev != dev); + ++ if (netif->group == -1) ++ goto drop; ++ ++ netbk = &xen_netbk[netif->group]; ++ + /* Drop the packet if the target domain has no receive buffers. */ + if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif))) + goto drop; +@@ -313,6 +322,7 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta, + struct gnttab_copy *copy_gop; + struct xen_netif_rx_request *req; + unsigned long old_mfn; ++ int group = netif_page_group(page); + int idx = netif_page_index(page); + + old_mfn = virt_to_mfn(page_address(page)); +@@ -321,7 +331,8 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta, + + copy_gop = npo->copy + npo->copy_prod++; + copy_gop->flags = GNTCOPY_dest_gref; +- if (idx > -1) { ++ if (PageForeign(page)) { ++ struct xen_netbk *netbk = &xen_netbk[group]; + struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx]; + copy_gop->source.domid = src_pend->netif->domid; + copy_gop->source.u.ref = src_pend->req.gref; +@@ -422,9 +433,10 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status, + } + } + +-static void net_rx_action(unsigned long unused) ++static void net_rx_action(unsigned long data) + { + struct xen_netif *netif = NULL; ++ struct xen_netbk *netbk = (struct xen_netbk *)data; + s8 status; + u16 id, irq, flags; + struct xen_netif_rx_response *resp; +@@ -584,13 +596,15 @@ static void net_rx_action(unsigned long unused) + tasklet_schedule(&netbk->net_rx_tasklet); + } + +-static void net_alarm(unsigned long unused) ++static void net_alarm(unsigned long data) + { ++ struct xen_netbk *netbk = (struct xen_netbk *)data; + tasklet_schedule(&netbk->net_rx_tasklet); + } + +-static void netbk_tx_pending_timeout(unsigned long unused) ++static void netbk_tx_pending_timeout(unsigned long data) + { ++ struct xen_netbk *netbk = (struct xen_netbk *)data; + tasklet_schedule(&netbk->net_tx_tasklet); + } + +@@ -607,6 +621,7 @@ static int __on_net_schedule_list(struct xen_netif *netif) + + static void remove_from_net_schedule_list(struct xen_netif *netif) + { ++ struct xen_netbk *netbk = &xen_netbk[netif->group]; + spin_lock_irq(&netbk->net_schedule_list_lock); + if (likely(__on_net_schedule_list(netif))) { + list_del_init(&netif->list); +@@ -617,6 +632,7 @@ static void remove_from_net_schedule_list(struct xen_netif *netif) + + static void add_to_net_schedule_list_tail(struct xen_netif *netif) + { ++ struct xen_netbk *netbk = &xen_netbk[netif->group]; + if (__on_net_schedule_list(netif)) + return; + +@@ -631,13 +647,14 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif) + + void netif_schedule_work(struct xen_netif *netif) + { ++ struct xen_netbk *netbk = &xen_netbk[netif->group]; + int more_to_do; + + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); + + if (more_to_do) { + add_to_net_schedule_list_tail(netif); +- maybe_schedule_tx_action(); ++ maybe_schedule_tx_action(netbk); + } + } + +@@ -674,14 +691,15 @@ static void tx_credit_callback(unsigned long data) + netif_schedule_work(netif); + } + +-static inline int copy_pending_req(pending_ring_idx_t pending_idx) ++static inline int copy_pending_req(struct xen_netbk *netbk, ++ pending_ring_idx_t pending_idx) + { + return gnttab_copy_grant_page( + netbk->grant_tx_handle[pending_idx], + &netbk->mmap_pages[pending_idx]); + } + +-inline static void net_tx_action_dealloc(void) ++static inline void net_tx_action_dealloc(struct xen_netbk *netbk) + { + struct netbk_tx_pending_inuse *inuse, *n; + struct gnttab_unmap_grant_ref *gop; +@@ -711,13 +729,13 @@ inline static void net_tx_action_dealloc(void) + pending_idx = netbk->dealloc_ring[pending_index(dc++)]; + list_move_tail(&pending_inuse[pending_idx].list, &list); + +- pfn = idx_to_pfn(pending_idx); ++ pfn = idx_to_pfn(netbk, pending_idx); + /* Already unmapped? */ + if (!phys_to_machine_mapping_valid(pfn)) + continue; + + gnttab_set_unmap_op(gop, +- idx_to_kaddr(pending_idx), ++ idx_to_kaddr(netbk, pending_idx), + GNTMAP_host_map, + netbk->grant_tx_handle[pending_idx]); + gop++; +@@ -740,7 +758,7 @@ inline static void net_tx_action_dealloc(void) + + pending_tx_info[pending_idx].netif->nr_copied_skbs++; + +- switch (copy_pending_req(pending_idx)) { ++ switch (copy_pending_req(netbk, pending_idx)) { + case 0: + list_move_tail(&inuse->list, &list); + continue; +@@ -843,7 +861,8 @@ static int netbk_count_requests(struct xen_netif *netif, + return frags; + } + +-static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif, ++static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk, ++ struct xen_netif *netif, + struct sk_buff *skb, + struct xen_netif_tx_request *txp, + struct gnttab_map_grant_ref *mop) +@@ -864,7 +883,7 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif, + index = pending_index(netbk->pending_cons++); + pending_idx = netbk->pending_ring[index]; + +- gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx), ++ gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx), + GNTMAP_host_map | GNTMAP_readonly, + txp->gref, netif->domid); + +@@ -877,8 +896,9 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif, + return mop; + } + +-static int netbk_tx_check_mop(struct sk_buff *skb, +- struct gnttab_map_grant_ref **mopp) ++static int netbk_tx_check_mop(struct xen_netbk *netbk, ++ struct sk_buff *skb, ++ struct gnttab_map_grant_ref **mopp) + { + struct gnttab_map_grant_ref *mop = *mopp; + int pending_idx = *((u16 *)skb->data); +@@ -900,7 +920,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb, + netif_put(netif); + } else { + set_phys_to_machine( +- __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT, ++ __pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT, + FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); + netbk->grant_tx_handle[pending_idx] = mop->handle; + } +@@ -918,14 +938,14 @@ static int netbk_tx_check_mop(struct sk_buff *skb, + newerr = (++mop)->status; + if (likely(!newerr)) { + unsigned long addr; +- addr = idx_to_kaddr(pending_idx); ++ addr = idx_to_kaddr(netbk, pending_idx); + set_phys_to_machine( + __pa(addr)>>PAGE_SHIFT, + FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT)); + netbk->grant_tx_handle[pending_idx] = mop->handle; + /* Had a previous error? Invalidate this fragment. */ + if (unlikely(err)) +- netif_idx_release(pending_idx); ++ netif_idx_release(netbk, pending_idx); + continue; + } + +@@ -942,10 +962,10 @@ static int netbk_tx_check_mop(struct sk_buff *skb, + + /* First error: invalidate header and preceding fragments. */ + pending_idx = *((u16 *)skb->data); +- netif_idx_release(pending_idx); ++ netif_idx_release(netbk, pending_idx); + for (j = start; j < i; j++) { + pending_idx = (unsigned long)shinfo->frags[i].page; +- netif_idx_release(pending_idx); ++ netif_idx_release(netbk, pending_idx); + } + + /* Remember the error: invalidate all subsequent fragments. */ +@@ -956,7 +976,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb, + return err; + } + +-static void netbk_fill_frags(struct sk_buff *skb) ++static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb) + { + struct skb_shared_info *shinfo = skb_shinfo(skb); + int nr_frags = shinfo->nr_frags; +@@ -974,7 +994,7 @@ static void netbk_fill_frags(struct sk_buff *skb) + &netbk->pending_inuse_head); + + txp = &netbk->pending_tx_info[pending_idx].req; +- frag->page = virt_to_page(idx_to_kaddr(pending_idx)); ++ frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx)); + frag->size = txp->size; + frag->page_offset = txp->offset; + +@@ -1106,14 +1126,14 @@ static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size) + return false; + } + +-static unsigned net_tx_build_mops(void) ++static unsigned net_tx_build_mops(struct xen_netbk *netbk) + { + struct gnttab_map_grant_ref *mop; + struct sk_buff *skb; + int ret; + + mop = netbk->tx_map_ops; +- while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && ++ while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && + !list_empty(&netbk->net_schedule_list)) { + struct xen_netif *netif; + struct xen_netif_tx_request txreq; +@@ -1215,7 +1235,7 @@ static unsigned net_tx_build_mops(void) + } + } + +- gnttab_set_map_op(mop, idx_to_kaddr(pending_idx), ++ gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx), + GNTMAP_host_map | GNTMAP_readonly, + txreq.gref, netif->domid); + mop++; +@@ -1241,7 +1261,7 @@ static unsigned net_tx_build_mops(void) + + netbk->pending_cons++; + +- mop = netbk_get_requests(netif, skb, txfrags, mop); ++ mop = netbk_get_requests(netbk, netif, skb, txfrags, mop); + + netif->tx.req_cons = idx; + netif_schedule_work(netif); +@@ -1253,7 +1273,7 @@ static unsigned net_tx_build_mops(void) + return mop - netbk->tx_map_ops; + } + +-static void net_tx_submit(void) ++static void net_tx_submit(struct xen_netbk *netbk) + { + struct gnttab_map_grant_ref *mop; + struct sk_buff *skb; +@@ -1270,7 +1290,7 @@ static void net_tx_submit(void) + txp = &netbk->pending_tx_info[pending_idx].req; + + /* Check the remap error code. */ +- if (unlikely(netbk_tx_check_mop(skb, &mop))) { ++ if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) { + DPRINTK("netback grant failed.\n"); + skb_shinfo(skb)->nr_frags = 0; + kfree_skb(skb); +@@ -1279,7 +1299,7 @@ static void net_tx_submit(void) + + data_len = skb->len; + memcpy(skb->data, +- (void *)(idx_to_kaddr(pending_idx)|txp->offset), ++ (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset), + data_len); + if (data_len < txp->size) { + /* Append the packet payload as a fragment. */ +@@ -1287,7 +1307,7 @@ static void net_tx_submit(void) + txp->size -= data_len; + } else { + /* Schedule a response immediately. */ +- netif_idx_release(pending_idx); ++ netif_idx_release(netbk, pending_idx); + } + + if (txp->flags & NETTXF_csum_blank) +@@ -1295,7 +1315,7 @@ static void net_tx_submit(void) + else if (txp->flags & NETTXF_data_validated) + skb->ip_summed = CHECKSUM_UNNECESSARY; + +- netbk_fill_frags(skb); ++ netbk_fill_frags(netbk, skb); + + /* + * If the initial fragment was < PKT_PROT_LEN then +@@ -1344,15 +1364,16 @@ static void net_tx_submit(void) + } + + /* Called after netfront has transmitted */ +-static void net_tx_action(unsigned long unused) ++static void net_tx_action(unsigned long data) + { ++ struct xen_netbk *netbk = (struct xen_netbk *)data; + unsigned nr_mops; + int ret; + + if (netbk->dealloc_cons != netbk->dealloc_prod) +- net_tx_action_dealloc(); ++ net_tx_action_dealloc(netbk); + +- nr_mops = net_tx_build_mops(); ++ nr_mops = net_tx_build_mops(netbk); + + if (nr_mops == 0) + return; +@@ -1361,10 +1382,10 @@ static void net_tx_action(unsigned long unused) + netbk->tx_map_ops, nr_mops); + BUG_ON(ret); + +- net_tx_submit(); ++ net_tx_submit(netbk); + } + +-static void netif_idx_release(u16 pending_idx) ++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx) + { + static DEFINE_SPINLOCK(_lock); + unsigned long flags; +@@ -1383,19 +1404,28 @@ static void netif_idx_release(u16 pending_idx) + + static void netif_page_release(struct page *page, unsigned int order) + { ++ int group = netif_page_group(page); + int idx = netif_page_index(page); ++ struct xen_netbk *netbk = &xen_netbk[group]; + BUG_ON(order); ++ BUG_ON(group < 0 || group >= xen_netbk_group_nr); + BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS); + BUG_ON(netbk->mmap_pages[idx] != page); +- netif_idx_release(idx); ++ netif_idx_release(netbk, idx); + } + + irqreturn_t netif_be_int(int irq, void *dev_id) + { + struct xen_netif *netif = dev_id; ++ struct xen_netbk *netbk; ++ ++ if (netif->group == -1) ++ return IRQ_NONE; ++ ++ netbk = &xen_netbk[netif->group]; + + add_to_net_schedule_list_tail(netif); +- maybe_schedule_tx_action(); ++ maybe_schedule_tx_action(netbk); + + if (netif_schedulable(netif) && !netbk_queue_full(netif)) + netif_wake_queue(netif->dev); +@@ -1453,28 +1483,40 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs) + struct list_head *ent; + struct xen_netif *netif; + int i = 0; ++ int group = 0; + + printk(KERN_ALERT "netif_schedule_list:\n"); +- spin_lock_irq(&netbk->net_schedule_list_lock); + +- list_for_each(ent, &netbk->net_schedule_list) { +- netif = list_entry(ent, struct xen_netif, list); +- printk(KERN_ALERT " %d: private(rx_req_cons=%08x " +- "rx_resp_prod=%08x\n", +- i, netif->rx.req_cons, netif->rx.rsp_prod_pvt); +- printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n", +- netif->tx.req_cons, netif->tx.rsp_prod_pvt); +- printk(KERN_ALERT " shared(rx_req_prod=%08x " +- "rx_resp_prod=%08x\n", +- netif->rx.sring->req_prod, netif->rx.sring->rsp_prod); +- printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n", +- netif->rx.sring->rsp_event, netif->tx.sring->req_prod); +- printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n", +- netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event); +- i++; ++ for (group = 0; group < xen_netbk_group_nr; group++) { ++ struct xen_netbk *netbk = &xen_netbk[group]; ++ spin_lock_irq(&netbk->net_schedule_list_lock); ++ printk(KERN_ALERT "xen_netback group number: %d\n", group); ++ list_for_each(ent, &netbk->net_schedule_list) { ++ netif = list_entry(ent, struct xen_netif, list); ++ printk(KERN_ALERT " %d: private(rx_req_cons=%08x " ++ "rx_resp_prod=%08x\n", ++ i, netif->rx.req_cons, netif->rx.rsp_prod_pvt); ++ printk(KERN_ALERT ++ " tx_req_cons=%08x, tx_resp_prod=%08x)\n", ++ netif->tx.req_cons, netif->tx.rsp_prod_pvt); ++ printk(KERN_ALERT ++ " shared(rx_req_prod=%08x " ++ "rx_resp_prod=%08x\n", ++ netif->rx.sring->req_prod, ++ netif->rx.sring->rsp_prod); ++ printk(KERN_ALERT ++ " rx_event=%08x, tx_req_prod=%08x\n", ++ netif->rx.sring->rsp_event, ++ netif->tx.sring->req_prod); ++ printk(KERN_ALERT ++ " tx_resp_prod=%08x, tx_event=%08x)\n", ++ netif->tx.sring->rsp_prod, ++ netif->tx.sring->rsp_event); ++ i++; ++ } ++ spin_unlock_irq(&netbk->net_schedule_list_lock); + } + +- spin_unlock_irq(&netbk->net_schedule_list_lock); + printk(KERN_ALERT " ** End of netif_schedule_list **\n"); + + return IRQ_HANDLED; +@@ -1486,12 +1528,15 @@ static int __init netback_init(void) + int i; + struct page *page; + int rc = 0; ++ int group; + + if (!xen_domain()) + return -ENODEV; + +- netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk)); +- if (!netbk) { ++ xen_netbk_group_nr = num_online_cpus(); ++ xen_netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk) * ++ xen_netbk_group_nr); ++ if (!xen_netbk) { + printk(KERN_ALERT "%s: out of memory\n", __func__); + return -ENOMEM; + } +@@ -1499,44 +1544,54 @@ static int __init netback_init(void) + /* We can increase reservation by this much in net_rx_action(). */ + // balloon_update_driver_allowance(NET_RX_RING_SIZE); + +- skb_queue_head_init(&netbk->rx_queue); +- skb_queue_head_init(&netbk->tx_queue); +- +- init_timer(&netbk->net_timer); +- netbk->net_timer.data = 0; +- netbk->net_timer.function = net_alarm; +- +- init_timer(&netbk->netbk_tx_pending_timer); +- netbk->netbk_tx_pending_timer.data = 0; +- netbk->netbk_tx_pending_timer.function = netbk_tx_pending_timeout; ++ for (group = 0; group < xen_netbk_group_nr; group++) { ++ struct xen_netbk *netbk = &xen_netbk[group]; ++ skb_queue_head_init(&netbk->rx_queue); ++ skb_queue_head_init(&netbk->tx_queue); ++ ++ init_timer(&netbk->net_timer); ++ netbk->net_timer.data = (unsigned long)netbk; ++ netbk->net_timer.function = net_alarm; ++ ++ init_timer(&netbk->netbk_tx_pending_timer); ++ netbk->netbk_tx_pending_timer.data = (unsigned long)netbk; ++ netbk->netbk_tx_pending_timer.function = ++ netbk_tx_pending_timeout; ++ ++ netbk->mmap_pages = ++ alloc_empty_pages_and_pagevec(MAX_PENDING_REQS); ++ if (!netbk->mmap_pages) { ++ printk(KERN_ALERT "%s: out of memory\n", __func__); ++ del_timer(&netbk->netbk_tx_pending_timer); ++ del_timer(&netbk->net_timer); ++ rc = -ENOMEM; ++ goto failed_init; ++ } + +- netbk->mmap_pages = +- alloc_empty_pages_and_pagevec(MAX_PENDING_REQS); +- if (!netbk->mmap_pages) { +- printk(KERN_ALERT "%s: out of memory\n", __func__); +- rc = -ENOMEM; +- goto failed_init2; +- } ++ for (i = 0; i < MAX_PENDING_REQS; i++) { ++ page = netbk->mmap_pages[i]; ++ SetPageForeign(page, netif_page_release); ++ netif_set_page_ext(page, group, i); ++ INIT_LIST_HEAD(&netbk->pending_inuse[i].list); ++ } + +- for (i = 0; i < MAX_PENDING_REQS; i++) { +- page = netbk->mmap_pages[i]; +- SetPageForeign(page, netif_page_release); +- netif_set_page_ext(page, 0, i); +- INIT_LIST_HEAD(&netbk->pending_inuse[i].list); +- } ++ netbk->pending_cons = 0; ++ netbk->pending_prod = MAX_PENDING_REQS; ++ for (i = 0; i < MAX_PENDING_REQS; i++) ++ netbk->pending_ring[i] = i; + +- netbk->pending_cons = 0; +- netbk->pending_prod = MAX_PENDING_REQS; +- for (i = 0; i < MAX_PENDING_REQS; i++) +- netbk->pending_ring[i] = i; ++ tasklet_init(&netbk->net_tx_tasklet, net_tx_action, ++ (unsigned long)netbk); ++ tasklet_init(&netbk->net_rx_tasklet, net_rx_action, ++ (unsigned long)netbk); + +- tasklet_init(&netbk->net_tx_tasklet, net_tx_action, 0); +- tasklet_init(&netbk->net_rx_tasklet, net_rx_action, 0); ++ INIT_LIST_HEAD(&netbk->pending_inuse_head); ++ INIT_LIST_HEAD(&netbk->net_schedule_list); + +- INIT_LIST_HEAD(&netbk->pending_inuse_head); +- INIT_LIST_HEAD(&netbk->net_schedule_list); ++ spin_lock_init(&netbk->net_schedule_list_lock); + +- spin_lock_init(&netbk->net_schedule_list_lock); ++ atomic_set(&netbk->netfront_count, 0); ++ } + + netbk_copy_skb_mode = NETBK_DONT_COPY_SKB; + if (MODPARM_copy_skb) { +@@ -1551,25 +1606,28 @@ static int __init netback_init(void) + + rc = netif_xenbus_init(); + if (rc) +- goto failed_init1; ++ goto failed_init; + + #ifdef NETBE_DEBUG_INTERRUPT + (void)bind_virq_to_irqhandler(VIRQ_DEBUG, + 0, + netif_be_dbg, +- SA_SHIRQ, ++ IRQF_SHARED, + "net-be-dbg", + &netif_be_dbg); + #endif + + return 0; + +-failed_init1: +- free_empty_pages_and_pagevec(netbk->mmap_pages, MAX_PENDING_REQS); +-failed_init2: +- del_timer(&netbk->netbk_tx_pending_timer); +- del_timer(&netbk->net_timer); +- vfree(netbk); ++failed_init: ++ for (i = 0; i < group; i++) { ++ struct xen_netbk *netbk = &xen_netbk[i]; ++ free_empty_pages_and_pagevec(netbk->mmap_pages, ++ MAX_PENDING_REQS); ++ del_timer(&netbk->netbk_tx_pending_timer); ++ del_timer(&netbk->net_timer); ++ } ++ vfree(xen_netbk); + return rc; + + } +-- +1.7.3.4 + + +From e7317b70c0436c109b605bb377939cb2eaff6a6f Mon Sep 17 00:00:00 2001 +From: Dongxiao Xu +Date: Wed, 19 May 2010 17:08:22 -0700 +Subject: [PATCH 090/139] xen: netback: Use Kernel thread to replace the tasklet. + +Kernel thread has more control over QoS, and could improve dom0's +userspace responseness. This option is defaultly off currently. + +Signed-off-by: Dongxiao Xu +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/common.h | 13 ++++- + drivers/xen/netback/netback.c | 109 ++++++++++++++++++++++++++++++++++++---- + 2 files changed, 109 insertions(+), 13 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 847ba58..36cb2b9 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -257,8 +257,17 @@ union page_ext { + }; + + struct xen_netbk { +- struct tasklet_struct net_tx_tasklet; +- struct tasklet_struct net_rx_tasklet; ++ union { ++ struct { ++ struct tasklet_struct net_tx_tasklet; ++ struct tasklet_struct net_rx_tasklet; ++ } tasklet; ++ ++ struct { ++ wait_queue_head_t netbk_action_wq; ++ struct task_struct *task; ++ } kthread; ++ }; + + struct sk_buff_head rx_queue; + struct sk_buff_head tx_queue; +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index feefb14..547dcaa 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -38,6 +38,7 @@ + + #include + #include ++#include + + #include + #include +@@ -128,12 +129,31 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop"); + + int netbk_copy_skb_mode; + ++static int MODPARM_netback_kthread; ++module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0); ++MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet"); ++ ++/* ++ * Netback bottom half handler. ++ * dir indicates the data direction. ++ * rx: 1, tx: 0. ++ */ ++static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir) ++{ ++ if (MODPARM_netback_kthread) ++ wake_up(&netbk->kthread.netbk_action_wq); ++ else if (dir) ++ tasklet_schedule(&netbk->tasklet.net_rx_tasklet); ++ else ++ tasklet_schedule(&netbk->tasklet.net_tx_tasklet); ++} ++ + static inline void maybe_schedule_tx_action(struct xen_netbk *netbk) + { + smp_mb(); + if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) && + !list_empty(&netbk->net_schedule_list)) +- tasklet_schedule(&netbk->net_tx_tasklet); ++ xen_netbk_bh_handler(netbk, 0); + } + + static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) +@@ -289,7 +309,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + } + } + skb_queue_tail(&netbk->rx_queue, skb); +- tasklet_schedule(&netbk->net_rx_tasklet); ++ ++ xen_netbk_bh_handler(netbk, 1); + + return 0; + +@@ -593,19 +614,19 @@ static void net_rx_action(unsigned long data) + /* More work to do? */ + if (!skb_queue_empty(&netbk->rx_queue) && + !timer_pending(&netbk->net_timer)) +- tasklet_schedule(&netbk->net_rx_tasklet); ++ xen_netbk_bh_handler(netbk, 1); + } + + static void net_alarm(unsigned long data) + { + struct xen_netbk *netbk = (struct xen_netbk *)data; +- tasklet_schedule(&netbk->net_rx_tasklet); ++ xen_netbk_bh_handler(netbk, 1); + } + + static void netbk_tx_pending_timeout(unsigned long data) + { + struct xen_netbk *netbk = (struct xen_netbk *)data; +- tasklet_schedule(&netbk->net_tx_tasklet); ++ xen_netbk_bh_handler(netbk, 0); + } + + struct net_device_stats *netif_be_get_stats(struct net_device *dev) +@@ -1348,7 +1369,7 @@ static void net_tx_submit(struct xen_netbk *netbk) + continue; + } + +- netif_rx(skb); ++ netif_rx_ni(skb); + netif->dev->last_rx = jiffies; + } + +@@ -1399,7 +1420,7 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx) + netbk->dealloc_prod++; + spin_unlock_irqrestore(&_lock, flags); + +- tasklet_schedule(&netbk->net_tx_tasklet); ++ xen_netbk_bh_handler(netbk, 0); + } + + static void netif_page_release(struct page *page, unsigned int order) +@@ -1523,6 +1544,46 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs) + } + #endif + ++static inline int rx_work_todo(struct xen_netbk *netbk) ++{ ++ return !skb_queue_empty(&netbk->rx_queue); ++} ++ ++static inline int tx_work_todo(struct xen_netbk *netbk) ++{ ++ if (netbk->dealloc_cons != netbk->dealloc_prod) ++ return 1; ++ ++ if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && ++ !list_empty(&netbk->net_schedule_list)) ++ return 1; ++ ++ return 0; ++} ++ ++static int netbk_action_thread(void *data) ++{ ++ struct xen_netbk *netbk = (struct xen_netbk *)data; ++ while (!kthread_should_stop()) { ++ wait_event_interruptible(netbk->kthread.netbk_action_wq, ++ rx_work_todo(netbk) ++ || tx_work_todo(netbk) ++ || kthread_should_stop()); ++ cond_resched(); ++ ++ if (kthread_should_stop()) ++ break; ++ ++ if (rx_work_todo(netbk)) ++ net_rx_action((unsigned long)netbk); ++ ++ if (tx_work_todo(netbk)) ++ net_tx_action((unsigned long)netbk); ++ } ++ ++ return 0; ++} ++ + static int __init netback_init(void) + { + int i; +@@ -1580,10 +1641,34 @@ static int __init netback_init(void) + for (i = 0; i < MAX_PENDING_REQS; i++) + netbk->pending_ring[i] = i; + +- tasklet_init(&netbk->net_tx_tasklet, net_tx_action, +- (unsigned long)netbk); +- tasklet_init(&netbk->net_rx_tasklet, net_rx_action, +- (unsigned long)netbk); ++ if (MODPARM_netback_kthread) { ++ init_waitqueue_head(&netbk->kthread.netbk_action_wq); ++ netbk->kthread.task = ++ kthread_create(netbk_action_thread, ++ (void *)netbk, ++ "netback/%u", group); ++ ++ if (!IS_ERR(netbk->kthread.task)) { ++ kthread_bind(netbk->kthread.task, group); ++ wake_up_process(netbk->kthread.task); ++ } else { ++ printk(KERN_ALERT ++ "kthread_run() fails at netback\n"); ++ free_empty_pages_and_pagevec(netbk->mmap_pages, ++ MAX_PENDING_REQS); ++ del_timer(&netbk->netbk_tx_pending_timer); ++ del_timer(&netbk->net_timer); ++ rc = PTR_ERR(netbk->kthread.task); ++ goto failed_init; ++ } ++ } else { ++ tasklet_init(&netbk->tasklet.net_tx_tasklet, ++ net_tx_action, ++ (unsigned long)netbk); ++ tasklet_init(&netbk->tasklet.net_rx_tasklet, ++ net_rx_action, ++ (unsigned long)netbk); ++ } + + INIT_LIST_HEAD(&netbk->pending_inuse_head); + INIT_LIST_HEAD(&netbk->net_schedule_list); +@@ -1626,6 +1711,8 @@ failed_init: + MAX_PENDING_REQS); + del_timer(&netbk->netbk_tx_pending_timer); + del_timer(&netbk->net_timer); ++ if (MODPARM_netback_kthread) ++ kthread_stop(netbk->kthread.task); + } + vfree(xen_netbk); + return rc; +-- +1.7.3.4 + + +From 6359d5939c5d1f59b794cd02e8cdbd36b9f3434d Mon Sep 17 00:00:00 2001 +From: James Harper +Date: Fri, 28 May 2010 23:12:56 -0700 +Subject: [PATCH 091/139] xen: netback: avoid null-pointer access in netback_uevent + +Check if drvdata has been set up yet and return if it hasn't. + +Signed-off-by: James Harper +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/xenbus.c | 9 +++++++-- + 1 files changed, 7 insertions(+), 2 deletions(-) + +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index fcd3c34..e30b0c7 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -154,12 +154,17 @@ fail: + */ + static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env) + { +- struct backend_info *be = dev_get_drvdata(&xdev->dev); +- struct xen_netif *netif = be->netif; ++ struct backend_info *be; ++ struct xen_netif *netif; + char *val; + + DPRINTK("netback_uevent"); + ++ be = dev_get_drvdata(&xdev->dev); ++ if (!be) ++ return 0; ++ netif = be->netif; ++ + val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL); + if (IS_ERR(val)) { + int err = PTR_ERR(val); +-- +1.7.3.4 + + +From 4a818daa044d9d499412e8f6e2e3086c0521e7b3 Mon Sep 17 00:00:00 2001 +From: Keir Fraser +Date: Fri, 11 Jun 2010 11:48:30 +0100 +Subject: [PATCH 092/139] xen: netback: Fixes for delayed copy of tx network packets. + + - Should call net_tx_action_dealloc() even when dealloc ring is + empty, as there may in any case be work to do on the + pending_inuse list. + - Should not exit directly from the middle of the tx_action tasklet, + as the tx_pending_timer should always be checked and updated at the + end of the tasklet. + +Signed-off-by: Keir Fraser +Signed-off-by: Ian Campbell +[picked from linux-2.6.18-xen.hg 959:1a97bd686258, ported across a43e2175 "xen/netback: move code around"] +--- + drivers/xen/netback/netback.c | 25 ++++++++++++------------- + 1 files changed, 12 insertions(+), 13 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 547dcaa..58dfbd2 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -1372,16 +1372,6 @@ static void net_tx_submit(struct xen_netbk *netbk) + netif_rx_ni(skb); + netif->dev->last_rx = jiffies; + } +- +- if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && +- !list_empty(&netbk->pending_inuse_head)) { +- struct netbk_tx_pending_inuse *oldest; +- +- oldest = list_entry(netbk->pending_inuse_head.next, +- struct netbk_tx_pending_inuse, list); +- mod_timer(&netbk->netbk_tx_pending_timer, +- oldest->alloc_time + HZ); +- } + } + + /* Called after netfront has transmitted */ +@@ -1391,19 +1381,28 @@ static void net_tx_action(unsigned long data) + unsigned nr_mops; + int ret; + +- if (netbk->dealloc_cons != netbk->dealloc_prod) +- net_tx_action_dealloc(netbk); ++ net_tx_action_dealloc(netbk); + + nr_mops = net_tx_build_mops(netbk); + + if (nr_mops == 0) +- return; ++ goto out; + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + netbk->tx_map_ops, nr_mops); + BUG_ON(ret); + + net_tx_submit(netbk); ++out: ++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && ++ !list_empty(&netbk->pending_inuse_head)) { ++ struct netbk_tx_pending_inuse *oldest; ++ ++ oldest = list_entry(netbk->pending_inuse_head.next, ++ struct netbk_tx_pending_inuse, list); ++ mod_timer(&netbk->netbk_tx_pending_timer, ++ oldest->alloc_time + HZ); ++ } + } + + static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx) +-- +1.7.3.4 + + +From 48fa1af97e6c9d304c04f70a75de1340e7d79e18 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Fri, 11 Jun 2010 10:51:01 +0100 +Subject: [PATCH 093/139] xen: netback: handle NET_SKBUFF_DATA_USES_OFFSET correctly + +Signed-off-by: Ian Campbell +Cc: Jan Beulich +--- + drivers/xen/netback/netback.c | 4 ++++ + 1 files changed, 4 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 58dfbd2..aa094af 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -218,7 +218,11 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) + len -= copy; + } + ++#ifdef NET_SKBUFF_DATA_USES_OFFSET ++ offset = 0; ++#else + offset = nskb->data - skb->data; ++#endif + + nskb->transport_header = skb->transport_header + offset; + nskb->network_header = skb->network_header + offset; +-- +1.7.3.4 + + +From 7d3e6e42251f179e407fa5236f613e5500b3a3ea Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Fri, 11 Jun 2010 10:51:01 +0100 +Subject: [PATCH 094/139] xen: netback: drop frag member from struct netbk_rx_meta + +It has been unused since c3219dc "xen/netback: completely drop flip +support", as has netbk_free_pages(). + +(Although it now has only a single member struct netbk_rx_meta will +gain other members in a subsequent patch so there is no point +reworking to get rid of the struct) + +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/common.h | 1 - + drivers/xen/netback/netback.c | 8 -------- + 2 files changed, 0 insertions(+), 9 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 36cb2b9..be4fe91 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -230,7 +230,6 @@ struct pending_tx_info { + typedef unsigned int pending_ring_idx_t; + + struct netbk_rx_meta { +- skb_frag_t frag; + int id; + }; + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index aa094af..9f7e489 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -411,14 +411,6 @@ static void netbk_gop_skb(struct sk_buff *skb, + netif->rx.req_cons += nr_frags + extra; + } + +-static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta) +-{ +- int i; +- +- for (i = 0; i < nr_frags; i++) +- put_page(meta[i].frag.page); +-} +- + /* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was + used to set up the operations on the top of + netrx_pending_operations, which have since been done. Check that +-- +1.7.3.4 + + +From 1ced27150d0092c40ebbbbb3896192003d433c0e Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Fri, 11 Jun 2010 10:51:01 +0100 +Subject: [PATCH 095/139] xen: netback: linearise SKBs as we copy them into guest memory on guest-RX. + +There's no point in sending lots of little packets to a copying +receiver if we can instead arrange to copy them all into a single RX +buffer. We need to copy anyway, so there's no overhead here, and this +is a little bit easier on the receiving domain's network stack. + +Based on a patch by Steven Smith. Fixed to not skip unnecessarily to +the next buffer which could leave the head fragment of a received +frame empty if the headlen of an SKB was large (which would crash +netfront). Instead we only try and pack "small enough" fragments +together but do not try to coalesce large or whole page fragments. + +In previous iterations of this patch we also tried to only include +2048 bytes per frag because very old netfronts stored other +information in the second half of the page. It has been determined +that only frontends which support scatter-gather are going to come +down this path and that any guest which supports scatter-gather is +also new enough to allow us to use the full page size for each +fragment (since this limitation which fixed as part of the SG +implementation) so we do not need this restriction. + +Signed-off-by: Ian Campbell +Cc: Steven Smith +--- + drivers/xen/netback/common.h | 15 ++- + drivers/xen/netback/netback.c | 282 ++++++++++++++++++++++++++++++----------- + 2 files changed, 218 insertions(+), 79 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index be4fe91..9c0c048 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -82,7 +82,9 @@ struct xen_netif { + /* Internal feature information. */ + u8 can_queue:1; /* can queue packets for receiver? */ + +- /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */ ++ /* Allow netif_be_start_xmit() to peek ahead in the rx request ++ * ring. This is a prediction of what rx_req_cons will be once ++ * all queued skbs are put on the ring. */ + RING_IDX rx_req_cons_peek; + + /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */ +@@ -231,6 +233,8 @@ typedef unsigned int pending_ring_idx_t; + + struct netbk_rx_meta { + int id; ++ int size; ++ int gso_size; + }; + + struct netbk_tx_pending_inuse { +@@ -240,6 +244,8 @@ struct netbk_tx_pending_inuse { + + #define MAX_PENDING_REQS 256 + ++#define MAX_BUFFER_OFFSET PAGE_SIZE ++ + /* extra field used in struct page */ + union page_ext { + struct { +@@ -301,7 +307,12 @@ struct xen_netbk { + struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3]; + struct mmu_update rx_mmu[NET_RX_RING_SIZE]; + struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE]; +- struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE]; ++ /* ++ * Each head or fragment can be up to 4096 bytes. Given ++ * MAX_BUFFER_OFFSET of 4096 the worst case is that each ++ * head/fragment uses 2 copy operation. ++ */ ++ struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE]; + unsigned char rx_notify[NR_IRQS]; + u16 notify_list[NET_RX_RING_SIZE]; + struct netbk_rx_meta meta[NET_RX_RING_SIZE]; +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 9f7e489..d53d88e 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -259,6 +259,48 @@ static void tx_queue_callback(unsigned long data) + netif_wake_queue(netif->dev); + } + ++/* Figure out how many ring slots we're going to need to send @skb to ++ the guest. */ ++static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif) ++{ ++ unsigned count; ++ unsigned copy_off; ++ unsigned i; ++ ++ copy_off = 0; ++ count = 1; ++ ++ BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET); ++ ++ copy_off = skb_headlen(skb); ++ ++ if (skb_shinfo(skb)->gso_size) ++ count++; ++ ++ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { ++ unsigned long size = skb_shinfo(skb)->frags[i].size; ++ unsigned long bytes; ++ while (size > 0) { ++ BUG_ON(copy_off > MAX_BUFFER_OFFSET); ++ ++ /* These checks are the same as in netbk_gop_frag_copy */ ++ if (copy_off == MAX_BUFFER_OFFSET ++ || ((copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && copy_off)) { ++ count++; ++ copy_off = 0; ++ } ++ ++ bytes = size; ++ if (copy_off + bytes > MAX_BUFFER_OFFSET) ++ bytes = MAX_BUFFER_OFFSET - copy_off; ++ ++ copy_off += bytes; ++ size -= bytes; ++ } ++ } ++ return count; ++} ++ + int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct xen_netif *netif = netdev_priv(dev); +@@ -290,8 +332,9 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + skb = nskb; + } + +- netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 + +- !!skb_shinfo(skb)->gso_size; ++ /* Reserve ring slots for the worst-case number of ++ * fragments. */ ++ netif->rx_req_cons_peek += count_skb_slots(skb, netif); + netif_get(netif); + + if (netbk_can_queue(dev) && netbk_queue_full(netif)) { +@@ -335,96 +378,165 @@ struct netrx_pending_operations { + struct gnttab_copy *copy; + struct multicall_entry *mcl; + struct netbk_rx_meta *meta; ++ int copy_off; ++ grant_ref_t copy_gref; + }; + + /* Set up the grant operations for this fragment. If it's a flipping + interface, we also set up the unmap request from here. */ +-static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta, +- int i, struct netrx_pending_operations *npo, +- struct page *page, unsigned long size, +- unsigned long offset) ++ ++static void netbk_gop_frag_copy(struct xen_netif *netif, ++ struct netrx_pending_operations *npo, ++ struct page *page, unsigned long size, ++ unsigned long offset, int head) + { + struct gnttab_copy *copy_gop; +- struct xen_netif_rx_request *req; +- unsigned long old_mfn; ++ struct netbk_rx_meta *meta; + int group = netif_page_group(page); + int idx = netif_page_index(page); ++ unsigned long bytes; ++ ++ /* Data must not cross a page boundary. */ ++ BUG_ON(size + offset > PAGE_SIZE); + +- old_mfn = virt_to_mfn(page_address(page)); ++ meta = npo->meta + npo->meta_prod - 1; + +- req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i); ++ while (size > 0) { ++ BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); + +- copy_gop = npo->copy + npo->copy_prod++; +- copy_gop->flags = GNTCOPY_dest_gref; +- if (PageForeign(page)) { ++ /* ++ * Move to a new receive buffer if: ++ * ++ * simple case: we have completely filled the current buffer. ++ * ++ * complex case: the current frag would overflow ++ * the current buffer but only if: ++ * (i) this frag would fit completely in the next buffer ++ * and (ii) there is already some data in the current buffer ++ * and (iii) this is not the head buffer. ++ * ++ * Where: ++ * - (i) stops us splitting a frag into two copies ++ * unless the frag is too large for a single buffer. ++ * - (ii) stops us from leaving a buffer pointlessly empty. ++ * - (iii) stops us leaving the first buffer ++ * empty. Strictly speaking this is already covered ++ * by (ii) but is explicitly checked because ++ * netfront relies on the first buffer being ++ * non-empty and can crash otherwise. ++ * ++ * This means we will effectively linearise small ++ * frags but do not needlessly split large buffers ++ * into multiple copies tend to give large frags their ++ * own buffers as before. ++ */ ++ if (npo->copy_off == MAX_BUFFER_OFFSET ++ || ((npo->copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && npo->copy_off && !head)) { ++ struct xen_netif_rx_request *req; ++ ++ BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */ ++ /* Overflowed this request, go to the next one */ ++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); ++ meta = npo->meta + npo->meta_prod++; ++ meta->size = 0; ++ meta->id = req->id; ++ npo->copy_off = 0; ++ npo->copy_gref = req->gref; ++ } ++ ++ bytes = size; ++ if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) ++ bytes = MAX_BUFFER_OFFSET - npo->copy_off; ++ ++ copy_gop = npo->copy + npo->copy_prod++; ++ copy_gop->flags = GNTCOPY_dest_gref; ++ if (PageForeign(page)) { + struct xen_netbk *netbk = &xen_netbk[group]; + struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx]; + copy_gop->source.domid = src_pend->netif->domid; + copy_gop->source.u.ref = src_pend->req.gref; +- copy_gop->flags |= GNTCOPY_source_gref; +- } else { +- copy_gop->source.domid = DOMID_SELF; +- copy_gop->source.u.gmfn = old_mfn; +- } +- copy_gop->source.offset = offset; +- copy_gop->dest.domid = netif->domid; +- copy_gop->dest.offset = 0; +- copy_gop->dest.u.ref = req->gref; +- copy_gop->len = size; ++ copy_gop->flags |= GNTCOPY_source_gref; ++ } else { ++ copy_gop->source.domid = DOMID_SELF; ++ copy_gop->source.u.gmfn = virt_to_mfn(page_address(page)); ++ } ++ copy_gop->source.offset = offset; ++ copy_gop->dest.domid = netif->domid; + +- return req->id; ++ copy_gop->dest.offset = npo->copy_off; ++ copy_gop->dest.u.ref = npo->copy_gref; ++ copy_gop->len = bytes; ++ ++ npo->copy_off += bytes; ++ meta->size += bytes; ++ ++ offset += bytes; ++ size -= bytes; ++ head = 0; /* Must be something in this buffer now */ ++ } + } + +-static void netbk_gop_skb(struct sk_buff *skb, +- struct netrx_pending_operations *npo) ++/* Prepare an SKB to be transmitted to the frontend. This is ++ responsible for allocating grant operations, meta structures, etc. ++ It returns the number of meta structures consumed. The number of ++ ring slots used is always equal to the number of meta slots used ++ plus the number of GSO descriptors used. Currently, we use either ++ zero GSO descriptors (for non-GSO packets) or one descriptor (for ++ frontend-side LRO). */ ++static int netbk_gop_skb(struct sk_buff *skb, ++ struct netrx_pending_operations *npo) + { + struct xen_netif *netif = netdev_priv(skb->dev); + int nr_frags = skb_shinfo(skb)->nr_frags; + int i; +- int extra; +- struct netbk_rx_meta *head_meta, *meta; ++ struct xen_netif_rx_request *req; ++ struct netbk_rx_meta *meta; ++ int old_meta_prod; ++ ++ old_meta_prod = npo->meta_prod; + +- head_meta = npo->meta + npo->meta_prod++; +- head_meta->frag.page_offset = skb_shinfo(skb)->gso_type; +- head_meta->frag.size = skb_shinfo(skb)->gso_size; +- extra = !!head_meta->frag.size + 1; ++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); ++ meta = npo->meta + npo->meta_prod++; ++ meta->gso_size = skb_shinfo(skb)->gso_size; ++ meta->size = 0; ++ meta->id = req->id; ++ npo->copy_off = 0; ++ npo->copy_gref = req->gref; ++ ++ netbk_gop_frag_copy(netif, ++ npo, virt_to_page(skb->data), ++ skb_headlen(skb), ++ offset_in_page(skb->data), 1); ++ ++ /* Leave a gap for the GSO descriptor. */ ++ if (skb_shinfo(skb)->gso_size) ++ netif->rx.req_cons++; + + for (i = 0; i < nr_frags; i++) { +- meta = npo->meta + npo->meta_prod++; +- meta->frag = skb_shinfo(skb)->frags[i]; +- meta->id = netbk_gop_frag(netif, meta, i + extra, npo, +- meta->frag.page, +- meta->frag.size, +- meta->frag.page_offset); ++ netbk_gop_frag_copy(netif, npo, ++ skb_shinfo(skb)->frags[i].page, ++ skb_shinfo(skb)->frags[i].size, ++ skb_shinfo(skb)->frags[i].page_offset, ++ 0); + } + +- /* +- * This must occur at the end to ensure that we don't trash skb_shinfo +- * until we're done. We know that the head doesn't cross a page +- * boundary because such packets get copied in netif_be_start_xmit. +- */ +- head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo, +- virt_to_page(skb->data), +- skb_headlen(skb), +- offset_in_page(skb->data)); +- +- netif->rx.req_cons += nr_frags + extra; ++ return npo->meta_prod - old_meta_prod; + } + + /* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was + used to set up the operations on the top of + netrx_pending_operations, which have since been done. Check that + they didn't give any errors and advance over them. */ +-static int netbk_check_gop(int nr_frags, domid_t domid, ++static int netbk_check_gop(int nr_meta_slots, domid_t domid, + struct netrx_pending_operations *npo) + { + struct gnttab_copy *copy_op; + int status = NETIF_RSP_OKAY; + int i; + +- for (i = 0; i <= nr_frags; i++) { +- copy_op = npo->copy + npo->copy_cons++; +- if (copy_op->status != GNTST_okay) { ++ for (i = 0; i < nr_meta_slots; i++) { ++ copy_op = npo->copy + npo->copy_cons++; ++ if (copy_op->status != GNTST_okay) { + DPRINTK("Bad status %d from copy to DOM%d.\n", + copy_op->status, domid); + status = NETIF_RSP_ERROR; +@@ -435,27 +547,35 @@ static int netbk_check_gop(int nr_frags, domid_t domid, + } + + static void netbk_add_frag_responses(struct xen_netif *netif, int status, +- struct netbk_rx_meta *meta, int nr_frags) ++ struct netbk_rx_meta *meta, ++ int nr_meta_slots) + { + int i; + unsigned long offset; + +- for (i = 0; i < nr_frags; i++) { +- int id = meta[i].id; +- int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data; +- ++ for (i = 0; i < nr_meta_slots; i++) { ++ int flags; ++ if (i == nr_meta_slots - 1) ++ flags = 0; ++ else ++ flags = NETRXF_more_data; ++ + offset = 0; +- make_rx_response(netif, id, status, offset, +- meta[i].frag.size, flags); ++ make_rx_response(netif, meta[i].id, status, offset, ++ meta[i].size, flags); + } + } + ++struct skb_cb_overlay { ++ int meta_slots_used; ++}; ++ + static void net_rx_action(unsigned long data) + { + struct xen_netif *netif = NULL; + struct xen_netbk *netbk = (struct xen_netbk *)data; + s8 status; +- u16 id, irq, flags; ++ u16 irq, flags; + struct xen_netif_rx_response *resp; + struct multicall_entry *mcl; + struct sk_buff_head rxq; +@@ -465,6 +585,7 @@ static void net_rx_action(unsigned long data) + int nr_frags; + int count; + unsigned long offset; ++ struct skb_cb_overlay *sco; + + struct netrx_pending_operations npo = { + .mmu = netbk->rx_mmu, +@@ -479,10 +600,11 @@ static void net_rx_action(unsigned long data) + count = 0; + + while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) { ++ netif = netdev_priv(skb->dev); + nr_frags = skb_shinfo(skb)->nr_frags; +- *(int *)skb->cb = nr_frags; + +- netbk_gop_skb(skb, &npo); ++ sco = (struct skb_cb_overlay *)skb->cb; ++ sco->meta_slots_used = netbk_gop_skb(skb, &npo); + + count += nr_frags + 1; + +@@ -541,18 +663,20 @@ static void net_rx_action(unsigned long data) + BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0); + + while ((skb = __skb_dequeue(&rxq)) != NULL) { +- nr_frags = *(int *)skb->cb; ++ sco = (struct skb_cb_overlay *)skb->cb; + + netif = netdev_priv(skb->dev); + + netif->stats.tx_bytes += skb->len; + netif->stats.tx_packets++; + +- status = netbk_check_gop(nr_frags, netif->domid, &npo); +- +- id = netbk->meta[npo.meta_cons].id; +- flags = nr_frags ? NETRXF_more_data : 0; ++ status = netbk_check_gop(sco->meta_slots_used, ++ netif->domid, &npo); + ++ if (sco->meta_slots_used == 1) ++ flags = 0; ++ else ++ flags = NETRXF_more_data; + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ + flags |= NETRXF_csum_blank | NETRXF_data_validated; + else if (skb->ip_summed == CHECKSUM_UNNECESSARY) +@@ -560,10 +684,12 @@ static void net_rx_action(unsigned long data) + flags |= NETRXF_data_validated; + + offset = 0; +- resp = make_rx_response(netif, id, status, offset, +- skb_headlen(skb), flags); ++ resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id, ++ status, offset, ++ netbk->meta[npo.meta_cons].size, ++ flags); + +- if (netbk->meta[npo.meta_cons].frag.size) { ++ if (netbk->meta[npo.meta_cons].gso_size) { + struct xen_netif_extra_info *gso = + (struct xen_netif_extra_info *) + RING_GET_RESPONSE(&netif->rx, +@@ -571,7 +697,7 @@ static void net_rx_action(unsigned long data) + + resp->flags |= NETRXF_extra_info; + +- gso->u.gso.size = netbk->meta[npo.meta_cons].frag.size; ++ gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size; + gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; + gso->u.gso.pad = 0; + gso->u.gso.features = 0; +@@ -580,9 +706,11 @@ static void net_rx_action(unsigned long data) + gso->flags = 0; + } + +- netbk_add_frag_responses(netif, status, +- netbk->meta + npo.meta_cons + 1, +- nr_frags); ++ if (sco->meta_slots_used > 1) { ++ netbk_add_frag_responses(netif, status, ++ netbk->meta + npo.meta_cons + 1, ++ sco->meta_slots_used - 1); ++ } + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret); + irq = netif->irq; +@@ -597,8 +725,8 @@ static void net_rx_action(unsigned long data) + netif_wake_queue(netif->dev); + + netif_put(netif); ++ npo.meta_cons += sco->meta_slots_used; + dev_kfree_skb(skb); +- npo.meta_cons += nr_frags + 1; + } + + while (notify_nr != 0) { +-- +1.7.3.4 + + +From 96069b28e612232fb739ef48d9c2c5178b19f562 Mon Sep 17 00:00:00 2001 +From: Dongxiao Xu +Date: Thu, 10 Jun 2010 19:03:15 +0800 +Subject: [PATCH 096/139] xen: netback: Set allocated memory to zero from vmalloc. + +This should fix the windows/linux pv driver issue. + +Signed-off-by: Dongxiao Xu +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 1 + + 1 files changed, 1 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index d53d88e..c7024d4 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -1724,6 +1724,7 @@ static int __init netback_init(void) + printk(KERN_ALERT "%s: out of memory\n", __func__); + return -ENOMEM; + } ++ memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr); + + /* We can increase reservation by this much in net_rx_action(). */ + // balloon_update_driver_allowance(NET_RX_RING_SIZE); +-- +1.7.3.4 + + +From 109a748d1c11b7eeaaacedb08c48bc65640b0bb8 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Mon, 14 Jun 2010 13:23:33 +0100 +Subject: [PATCH 097/139] xen: netback: minor code formatting fixup + +Don't include redundant casts from allocation. + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 3 +-- + 1 files changed, 1 insertions(+), 2 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index c7024d4..58e920a 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -1718,8 +1718,7 @@ static int __init netback_init(void) + return -ENODEV; + + xen_netbk_group_nr = num_online_cpus(); +- xen_netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk) * +- xen_netbk_group_nr); ++ xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr); + if (!xen_netbk) { + printk(KERN_ALERT "%s: out of memory\n", __func__); + return -ENOMEM; +-- +1.7.3.4 + + +From 2424b59d68ee6ccdb7e52ab68bdba3a8b742513d Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Wed, 30 Jun 2010 10:12:49 +0100 +Subject: [PATCH 098/139] xen: netback: drop more relics of flipping mode + +The mmu_update and gnttab_transfer arrays were only used by flipping +mode. With those gone the multicall now consists of a single call to +GNTTABOP_copy so drop the multicall as well and just make the one +hypercall. + +Signed-off-by: Ian Campbell +Cc: Jeremy Fitzhardinge +Cc: Paul Durrant +--- + drivers/xen/netback/common.h | 3 -- + drivers/xen/netback/netback.c | 55 +++-------------------------------------- + 2 files changed, 4 insertions(+), 54 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 9c0c048..08e7a0e 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -304,9 +304,6 @@ struct xen_netbk { + u16 pending_ring[MAX_PENDING_REQS]; + u16 dealloc_ring[MAX_PENDING_REQS]; + +- struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3]; +- struct mmu_update rx_mmu[NET_RX_RING_SIZE]; +- struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE]; + /* + * Each head or fragment can be up to 4096 bytes. Given + * MAX_BUFFER_OFFSET of 4096 the worst case is that each +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 58e920a..ca65840 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -368,15 +368,9 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + } + + struct netrx_pending_operations { +- unsigned trans_prod, trans_cons; +- unsigned mmu_prod, mmu_mcl; +- unsigned mcl_prod, mcl_cons; + unsigned copy_prod, copy_cons; + unsigned meta_prod, meta_cons; +- struct mmu_update *mmu; +- struct gnttab_transfer *trans; + struct gnttab_copy *copy; +- struct multicall_entry *mcl; + struct netbk_rx_meta *meta; + int copy_off; + grant_ref_t copy_gref; +@@ -577,7 +571,6 @@ static void net_rx_action(unsigned long data) + s8 status; + u16 irq, flags; + struct xen_netif_rx_response *resp; +- struct multicall_entry *mcl; + struct sk_buff_head rxq; + struct sk_buff *skb; + int notify_nr = 0; +@@ -588,10 +581,7 @@ static void net_rx_action(unsigned long data) + struct skb_cb_overlay *sco; + + struct netrx_pending_operations npo = { +- .mmu = netbk->rx_mmu, +- .trans = netbk->grant_trans_op, + .copy = netbk->grant_copy_op, +- .mcl = netbk->rx_mcl, + .meta = netbk->meta, + }; + +@@ -617,50 +607,13 @@ static void net_rx_action(unsigned long data) + + BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta)); + +- npo.mmu_mcl = npo.mcl_prod; +- if (npo.mcl_prod) { +- BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); +- BUG_ON(npo.mmu_prod > ARRAY_SIZE(netbk->rx_mmu)); +- mcl = npo.mcl + npo.mcl_prod++; +- +- BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping); +- mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; +- +- mcl->op = __HYPERVISOR_mmu_update; +- mcl->args[0] = (unsigned long)netbk->rx_mmu; +- mcl->args[1] = npo.mmu_prod; +- mcl->args[2] = 0; +- mcl->args[3] = DOMID_SELF; +- } +- +- if (npo.trans_prod) { +- BUG_ON(npo.trans_prod > ARRAY_SIZE(netbk->grant_trans_op)); +- mcl = npo.mcl + npo.mcl_prod++; +- mcl->op = __HYPERVISOR_grant_table_op; +- mcl->args[0] = GNTTABOP_transfer; +- mcl->args[1] = (unsigned long)netbk->grant_trans_op; +- mcl->args[2] = npo.trans_prod; +- } +- +- if (npo.copy_prod) { +- BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op)); +- mcl = npo.mcl + npo.mcl_prod++; +- mcl->op = __HYPERVISOR_grant_table_op; +- mcl->args[0] = GNTTABOP_copy; +- mcl->args[1] = (unsigned long)netbk->grant_copy_op; +- mcl->args[2] = npo.copy_prod; +- } +- +- /* Nothing to do? */ +- if (!npo.mcl_prod) ++ if (!npo.copy_prod) + return; + +- BUG_ON(npo.mcl_prod > ARRAY_SIZE(netbk->rx_mcl)); +- +- ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod); ++ BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op)); ++ ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op, ++ npo.copy_prod); + BUG_ON(ret != 0); +- /* The mmu_machphys_update() must not fail. */ +- BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0); + + while ((skb = __skb_dequeue(&rxq)) != NULL) { + sco = (struct skb_cb_overlay *)skb->cb; +-- +1.7.3.4 + + +From 673a19d9e2d78939c6dc9c49e7e35ee54b54c8c7 Mon Sep 17 00:00:00 2001 +From: Paul Durrant +Date: Fri, 2 Jul 2010 10:28:11 +0100 +Subject: [PATCH 099/139] xen: netback: Fix basic indentation issue + +Signed-off-by: Paul Durrant +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 11 +++++++---- + 1 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index ca65840..848503e 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -445,10 +445,13 @@ static void netbk_gop_frag_copy(struct xen_netif *netif, + copy_gop = npo->copy + npo->copy_prod++; + copy_gop->flags = GNTCOPY_dest_gref; + if (PageForeign(page)) { +- struct xen_netbk *netbk = &xen_netbk[group]; +- struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx]; +- copy_gop->source.domid = src_pend->netif->domid; +- copy_gop->source.u.ref = src_pend->req.gref; ++ struct xen_netbk *netbk = &xen_netbk[group]; ++ struct pending_tx_info *src_pend; ++ ++ src_pend = &netbk->pending_tx_info[idx]; ++ ++ copy_gop->source.domid = src_pend->netif->domid; ++ copy_gop->source.u.ref = src_pend->req.gref; + copy_gop->flags |= GNTCOPY_source_gref; + } else { + copy_gop->source.domid = DOMID_SELF; +-- +1.7.3.4 + + +From d08b2d1f2ff4723b335d0fb5b91ffd6cb6a005d3 Mon Sep 17 00:00:00 2001 +From: Paul Durrant +Date: Mon, 5 Jul 2010 11:45:29 +0100 +Subject: [PATCH 100/139] xen: netback: Add a new style of passing GSO packets to frontends. + +feature-gso-tcpv4-prefix uses precedes the packet data passed to +the frontend with a ring entry that contains the necessary +metadata. This style of GSO passing is required for Citrix +Windows PV Drivers. + +Signed-off-by: Paul Durrant +Cc: Ian Campbell +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/common.h | 3 ++- + drivers/xen/netback/netback.c | 37 ++++++++++++++++++++++++++++++++++--- + drivers/xen/netback/xenbus.c | 15 ++++++++++++--- + include/xen/interface/io/netif.h | 4 ++++ + 4 files changed, 52 insertions(+), 7 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 08e7a0e..78451ab 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -80,7 +80,8 @@ struct xen_netif { + int features; + + /* Internal feature information. */ +- u8 can_queue:1; /* can queue packets for receiver? */ ++ u8 can_queue:1; /* can queue packets for receiver? */ ++ u8 gso_prefix:1; /* use a prefix segment for GSO information */ + + /* Allow netif_be_start_xmit() to peek ahead in the rx request + * ring. This is a prediction of what rx_req_cons will be once +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 848503e..e93a62e 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -432,6 +432,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif, + /* Overflowed this request, go to the next one */ + req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); + meta = npo->meta + npo->meta_prod++; ++ meta->gso_size = 0; + meta->size = 0; + meta->id = req->id; + npo->copy_off = 0; +@@ -492,9 +493,23 @@ static int netbk_gop_skb(struct sk_buff *skb, + + old_meta_prod = npo->meta_prod; + ++ /* Set up a GSO prefix descriptor, if necessary */ ++ if (skb_shinfo(skb)->gso_size && netif->gso_prefix) { ++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); ++ meta = npo->meta + npo->meta_prod++; ++ meta->gso_size = skb_shinfo(skb)->gso_size; ++ meta->size = 0; ++ meta->id = req->id; ++ } ++ + req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); + meta = npo->meta + npo->meta_prod++; +- meta->gso_size = skb_shinfo(skb)->gso_size; ++ ++ if (!netif->gso_prefix) ++ meta->gso_size = skb_shinfo(skb)->gso_size; ++ else ++ meta->gso_size = 0; ++ + meta->size = 0; + meta->id = req->id; + npo->copy_off = 0; +@@ -506,7 +521,7 @@ static int netbk_gop_skb(struct sk_buff *skb, + offset_in_page(skb->data), 1); + + /* Leave a gap for the GSO descriptor. */ +- if (skb_shinfo(skb)->gso_size) ++ if (skb_shinfo(skb)->gso_size && !netif->gso_prefix) + netif->rx.req_cons++; + + for (i = 0; i < nr_frags; i++) { +@@ -623,6 +638,21 @@ static void net_rx_action(unsigned long data) + + netif = netdev_priv(skb->dev); + ++ if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) { ++ resp = RING_GET_RESPONSE(&netif->rx, ++ netif->rx.rsp_prod_pvt++); ++ ++ resp->flags = NETRXF_gso_prefix | NETRXF_more_data; ++ ++ resp->offset = netbk->meta[npo.meta_cons].gso_size; ++ resp->id = netbk->meta[npo.meta_cons].id; ++ resp->status = sco->meta_slots_used; ++ ++ npo.meta_cons++; ++ sco->meta_slots_used--; ++ } ++ ++ + netif->stats.tx_bytes += skb->len; + netif->stats.tx_packets++; + +@@ -633,6 +663,7 @@ static void net_rx_action(unsigned long data) + flags = 0; + else + flags = NETRXF_more_data; ++ + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ + flags |= NETRXF_csum_blank | NETRXF_data_validated; + else if (skb->ip_summed == CHECKSUM_UNNECESSARY) +@@ -645,7 +676,7 @@ static void net_rx_action(unsigned long data) + netbk->meta[npo.meta_cons].size, + flags); + +- if (netbk->meta[npo.meta_cons].gso_size) { ++ if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) { + struct xen_netif_extra_info *gso = + (struct xen_netif_extra_info *) + RING_GET_RESPONSE(&netif->rx, +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index e30b0c7..cda987f 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -457,16 +457,25 @@ static int connect_rings(struct backend_info *be) + be->netif->dev->mtu = ETH_DATA_LEN; + } + +- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d", +- &val) < 0) ++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", ++ "%d", &val) < 0) + val = 0; + if (val) { + be->netif->features |= NETIF_F_TSO; + be->netif->dev->features |= NETIF_F_TSO; + } + ++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix", ++ "%d", &val) < 0) ++ val = 0; ++ if (val) { ++ be->netif->features |= NETIF_F_TSO; ++ be->netif->dev->features |= NETIF_F_TSO; ++ be->netif->gso_prefix = 1; ++ } ++ + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload", +- "%d", &val) < 0) ++ "%d", &val) < 0) + val = 0; + if (val) { + be->netif->features &= ~NETIF_F_IP_CSUM; +diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h +index 518481c..8309344 100644 +--- a/include/xen/interface/io/netif.h ++++ b/include/xen/interface/io/netif.h +@@ -131,6 +131,10 @@ struct xen_netif_rx_request { + #define _NETRXF_extra_info (3) + #define NETRXF_extra_info (1U<<_NETRXF_extra_info) + ++/* GSO Prefix descriptor. */ ++#define _NETRXF_gso_prefix (4) ++#define NETRXF_gso_prefix (1U<<_NETRXF_gso_prefix) ++ + struct xen_netif_rx_response { + uint16_t id; + uint16_t offset; /* Offset in page of start of received packet */ +-- +1.7.3.4 + + +From bd910979612331d60a629c16a49ebeb5efa0f035 Mon Sep 17 00:00:00 2001 +From: Paul Durrant +Date: Fri, 2 Jul 2010 10:28:13 +0100 +Subject: [PATCH 101/139] xen: netback: Make frontend features distinct from netback feature flags. + +Make sure that if a feature flag is disabled by ethtool on netback +that we do not gratuitously re-enabled it when we check the frontend +features during ring connection. + +Signed-off-by: Paul Durrant +Cc: Ian Campbell +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/common.h | 14 ++++++-- + drivers/xen/netback/interface.c | 68 ++++++++++++++++++++++++++++++-------- + drivers/xen/netback/netback.c | 2 +- + drivers/xen/netback/xenbus.c | 44 ++++++++++--------------- + 4 files changed, 81 insertions(+), 47 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 78451ab..a5f3759 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -76,12 +76,17 @@ struct xen_netif { + struct vm_struct *tx_comms_area; + struct vm_struct *rx_comms_area; + +- /* Set of features that can be turned on in dev->features. */ +- int features; ++ /* Flags that must not be set in dev->features */ ++ int features_disabled; ++ ++ /* Frontend feature information. */ ++ u8 can_sg:1; ++ u8 gso:1; ++ u8 gso_prefix:1; ++ u8 csum:1; + + /* Internal feature information. */ + u8 can_queue:1; /* can queue packets for receiver? */ +- u8 gso_prefix:1; /* use a prefix segment for GSO information */ + + /* Allow netif_be_start_xmit() to peek ahead in the rx request + * ring. This is a prediction of what rx_req_cons will be once +@@ -187,6 +192,7 @@ void netif_accel_init(void); + + void netif_disconnect(struct xen_netif *netif); + ++void netif_set_features(struct xen_netif *netif); + struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle); + int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref, + unsigned long rx_ring_ref, unsigned int evtchn); +@@ -223,7 +229,7 @@ static inline int netbk_can_queue(struct net_device *dev) + static inline int netbk_can_sg(struct net_device *dev) + { + struct xen_netif *netif = netdev_priv(dev); +- return netif->features & NETIF_F_SG; ++ return netif->can_sg; + } + + struct pending_tx_info { +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index 172ef4c..2e8508a 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -121,31 +121,69 @@ static int netbk_change_mtu(struct net_device *dev, int mtu) + return 0; + } + +-static int netbk_set_sg(struct net_device *dev, u32 data) ++void netif_set_features(struct xen_netif *netif) + { +- if (data) { +- struct xen_netif *netif = netdev_priv(dev); ++ struct net_device *dev = netif->dev; ++ int features = dev->features; ++ ++ if (netif->can_sg) ++ features |= NETIF_F_SG; ++ if (netif->gso || netif->gso_prefix) ++ features |= NETIF_F_TSO; ++ if (netif->csum) ++ features |= NETIF_F_IP_CSUM; ++ ++ features &= ~(netif->features_disabled); + +- if (!(netif->features & NETIF_F_SG)) ++ if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) ++ dev->mtu = ETH_DATA_LEN; ++ ++ dev->features = features; ++} ++ ++static int netbk_set_tx_csum(struct net_device *dev, u32 data) ++{ ++ struct xen_netif *netif = netdev_priv(dev); ++ if (data) { ++ if (!netif->csum) + return -ENOSYS; ++ netif->features_disabled &= ~NETIF_F_IP_CSUM; ++ } else { ++ netif->features_disabled |= NETIF_F_IP_CSUM; + } + +- if (dev->mtu > ETH_DATA_LEN) +- dev->mtu = ETH_DATA_LEN; ++ netif_set_features(netif); ++ return 0; ++} + +- return ethtool_op_set_sg(dev, data); ++static int netbk_set_sg(struct net_device *dev, u32 data) ++{ ++ struct xen_netif *netif = netdev_priv(dev); ++ if (data) { ++ if (!netif->can_sg) ++ return -ENOSYS; ++ netif->features_disabled &= ~NETIF_F_SG; ++ } else { ++ netif->features_disabled |= NETIF_F_SG; ++ } ++ ++ netif_set_features(netif); ++ return 0; + } + + static int netbk_set_tso(struct net_device *dev, u32 data) + { ++ struct xen_netif *netif = netdev_priv(dev); + if (data) { +- struct xen_netif *netif = netdev_priv(dev); +- +- if (!(netif->features & NETIF_F_TSO)) ++ if (!netif->gso && !netif->gso_prefix) + return -ENOSYS; ++ netif->features_disabled &= ~NETIF_F_TSO; ++ } else { ++ netif->features_disabled |= NETIF_F_TSO; + } + +- return ethtool_op_set_tso(dev, data); ++ netif_set_features(netif); ++ return 0; + } + + static void netbk_get_drvinfo(struct net_device *dev, +@@ -200,7 +238,7 @@ static struct ethtool_ops network_ethtool_ops = + .get_drvinfo = netbk_get_drvinfo, + + .get_tx_csum = ethtool_op_get_tx_csum, +- .set_tx_csum = ethtool_op_set_tx_csum, ++ .set_tx_csum = netbk_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = netbk_set_sg, + .get_tso = ethtool_op_get_tso, +@@ -242,7 +280,8 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int + netif->domid = domid; + netif->group = -1; + netif->handle = handle; +- netif->features = NETIF_F_SG; ++ netif->can_sg = 1; ++ netif->csum = 1; + atomic_set(&netif->refcnt, 1); + init_waitqueue_head(&netif->waiting_to_free); + netif->dev = dev; +@@ -259,8 +298,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int + init_timer(&netif->tx_queue_timeout); + + dev->netdev_ops = &netback_ops; +- dev->features = NETIF_F_IP_CSUM|NETIF_F_SG; +- ++ netif_set_features(netif); + SET_ETHTOOL_OPS(dev, &network_ethtool_ops); + + dev->tx_queue_len = netbk_queue_length; +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index e93a62e..63a771e 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -238,7 +238,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) + + static inline int netbk_max_required_rx_slots(struct xen_netif *netif) + { +- if (netif->features & (NETIF_F_SG|NETIF_F_TSO)) ++ if (netif->can_sg || netif->gso || netif->gso_prefix) + return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */ + return 1; /* all in one */ + } +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index cda987f..17ff5cf 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -404,6 +404,7 @@ static void connect(struct backend_info *be) + + static int connect_rings(struct backend_info *be) + { ++ struct xen_netif *netif = be->netif; + struct xenbus_device *dev = be->dev; + unsigned long tx_ring_ref, rx_ring_ref; + unsigned int evtchn, rx_copy; +@@ -437,53 +438,42 @@ static int connect_rings(struct backend_info *be) + if (!rx_copy) + return -EOPNOTSUPP; + +- if (be->netif->dev->tx_queue_len != 0) { ++ if (netif->dev->tx_queue_len != 0) { + if (xenbus_scanf(XBT_NIL, dev->otherend, + "feature-rx-notify", "%d", &val) < 0) + val = 0; + if (val) +- be->netif->can_queue = 1; ++ netif->can_queue = 1; + else + /* Must be non-zero for pfifo_fast to work. */ +- be->netif->dev->tx_queue_len = 1; ++ netif->dev->tx_queue_len = 1; + } + +- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0) ++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", ++ "%d", &val) < 0) + val = 0; +- if (!val) { +- be->netif->features &= ~NETIF_F_SG; +- be->netif->dev->features &= ~NETIF_F_SG; +- if (be->netif->dev->mtu > ETH_DATA_LEN) +- be->netif->dev->mtu = ETH_DATA_LEN; +- } ++ netif->can_sg = !!val; + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", +- "%d", &val) < 0) ++ "%d", &val) < 0) + val = 0; +- if (val) { +- be->netif->features |= NETIF_F_TSO; +- be->netif->dev->features |= NETIF_F_TSO; +- } ++ netif->gso = !!val; + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix", +- "%d", &val) < 0) ++ "%d", &val) < 0) + val = 0; +- if (val) { +- be->netif->features |= NETIF_F_TSO; +- be->netif->dev->features |= NETIF_F_TSO; +- be->netif->gso_prefix = 1; +- } ++ netif->gso_prefix = !!val; + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload", +- "%d", &val) < 0) ++ "%d", &val) < 0) + val = 0; +- if (val) { +- be->netif->features &= ~NETIF_F_IP_CSUM; +- be->netif->dev->features &= ~NETIF_F_IP_CSUM; +- } ++ netif->csum = !val; ++ ++ /* Set dev->features */ ++ netif_set_features(netif); + + /* Map the shared frame, irq etc. */ +- err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn); ++ err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn); + if (err) { + xenbus_dev_fatal(dev, err, + "mapping shared-frames %lu/%lu port %u", +-- +1.7.3.4 + + +From cf8c20169427de5829e3ec723712b77de52e64ac Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Thu, 15 Jul 2010 10:46:50 -0700 +Subject: [PATCH 102/139] xen: netback: only initialize for PV domains + +HVM domains don't support netback + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 63a771e..911c85b 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -1701,7 +1701,7 @@ static int __init netback_init(void) + int rc = 0; + int group; + +- if (!xen_domain()) ++ if (!xen_pv_domain()) + return -ENODEV; + + xen_netbk_group_nr = num_online_cpus(); +-- +1.7.3.4 + + +From 00a5c7eddb919701ac998b33bf4f283efaa06bbc Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Wed, 21 Jul 2010 13:24:26 -0700 +Subject: [PATCH 103/139] xen/rings: make protocol specific usage of shared sring explicit + +I don't think protocol specific data't really belongs in this header +but since it is already there and we seem to be stuck with it lets at +least make the users explicit lest people get caught out by future new +fields moving the pad field around. + +Signed-off-by: Jeremy Fitzhardinge +(cherry picked from commit df0afd34ec3015e44b8121d0e542d32fb04d438d) +--- + include/xen/interface/io/ring.h | 8 +++++++- + 1 files changed, 7 insertions(+), 1 deletions(-) + +diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h +index e8cbf43..e2d62cf 100644 +--- a/include/xen/interface/io/ring.h ++++ b/include/xen/interface/io/ring.h +@@ -73,7 +73,13 @@ union __name##_sring_entry { \ + struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ +- uint8_t pad[48]; \ ++ union { \ ++ struct { \ ++ uint8_t msg; \ ++ } tapif_user; \ ++ uint8_t pvt_pad[4]; \ ++ } private; \ ++ uint8_t pad[44]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ + }; \ + \ +-- +1.7.3.4 + + +From 1866aec6117132b4399f9e956994af259ad5cfdb Mon Sep 17 00:00:00 2001 +From: Bastian Blank +Date: Thu, 29 Jul 2010 17:30:18 +0200 +Subject: [PATCH 104/139] xen: netback: Fix null-pointer access in netback_uevent + +The uevent method of Xen netback does not check if the the network +device is already setup and tries to dereference a null-pointer if not. + +Signed-off-by: Bastian Blank +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/xenbus.c | 10 ++-------- + 1 files changed, 2 insertions(+), 8 deletions(-) + +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index 17ff5cf..1fec65a 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -154,17 +154,11 @@ fail: + */ + static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env) + { +- struct backend_info *be; +- struct xen_netif *netif; ++ struct backend_info *be = dev_get_drvdata(&xdev->dev); + char *val; + + DPRINTK("netback_uevent"); + +- be = dev_get_drvdata(&xdev->dev); +- if (!be) +- return 0; +- netif = be->netif; +- + val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL); + if (IS_ERR(val)) { + int err = PTR_ERR(val); +@@ -179,7 +173,7 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en + kfree(val); + } + +- if (add_uevent_var(env, "vif=%s", netif->dev->name)) ++ if (be && be->netif && add_uevent_var(env, "vif=%s", be->netif->dev->name)) + return -ENOMEM; + + return 0; +-- +1.7.3.4 + + +From 7f1732b25d00393131220a0369caa8a28faf46e1 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Fri, 30 Jul 2010 15:16:47 +0100 +Subject: [PATCH 105/139] xen: netback: check if foreign pages are actually netback-created foreign pages. + +020ba906 "xen/netback: Multiple tasklets support." changed +netbk_gop_frag_copy to attempt to lookup a pending_tx_info for any +foreign page, regardless of whether the page was a netback-foreign +page. + +In the case of non-netback pages this can lead to dereferencing a NULL +src_pend->netif. + +Restore the behaviour of netif_page_index prior toa3031942 +"xen/netback: Introduce a new struct type page_ext" by performing +tests to ensure that page is a netback page and extend the same checks +to netif_page_group. + +Actually combine netif_page_{index,group} in to a single function +since they are always called together and it saves duplicating all the +checks. + +Signed-off-by: Ian Campbell +Cc: Jeremy Fitzhardinge +Cc: Xu, Dongxiao +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 56 ++++++++++++++++++++++++++++------------ + 1 files changed, 39 insertions(+), 17 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 911c85b..95df223 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -89,18 +89,37 @@ static inline void netif_set_page_ext(struct page *pg, unsigned int group, + pg->mapping = ext.mapping; + } + +-static inline unsigned int netif_page_group(const struct page *pg) ++static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsigned int *_idx) + { + union page_ext ext = { .mapping = pg->mapping }; ++ struct xen_netbk *netbk; ++ unsigned int group, idx; + +- return ext.e.group - 1; +-} ++ if (!PageForeign(pg)) ++ return 0; + +-static inline unsigned int netif_page_index(const struct page *pg) +-{ +- union page_ext ext = { .mapping = pg->mapping }; ++ group = ext.e.group - 1; ++ ++ if (group < 0 || group >= xen_netbk_group_nr) ++ return 0; ++ ++ netbk = &xen_netbk[group]; ++ ++ if (netbk->mmap_pages == NULL) ++ return 0; + +- return ext.e.idx; ++ idx = ext.e.idx; ++ ++ if ((idx < 0) || (idx >= MAX_PENDING_REQS)) ++ return 0; ++ ++ if (netbk->mmap_pages[idx] != pg) ++ return 0; ++ ++ *_group = group; ++ *_idx = idx; ++ ++ return 1; + } + + /* +@@ -386,8 +405,12 @@ static void netbk_gop_frag_copy(struct xen_netif *netif, + { + struct gnttab_copy *copy_gop; + struct netbk_rx_meta *meta; +- int group = netif_page_group(page); +- int idx = netif_page_index(page); ++ /* ++ * These variables a used iff netif_get_page_ext returns true, ++ * in which case they are guaranteed to be initialized. ++ */ ++ unsigned int uninitialized_var(group), uninitialized_var(idx); ++ int foreign = netif_get_page_ext(page, &group, &idx); + unsigned long bytes; + + /* Data must not cross a page boundary. */ +@@ -445,7 +468,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif, + + copy_gop = npo->copy + npo->copy_prod++; + copy_gop->flags = GNTCOPY_dest_gref; +- if (PageForeign(page)) { ++ if (foreign) { + struct xen_netbk *netbk = &xen_netbk[group]; + struct pending_tx_info *src_pend; + +@@ -1535,14 +1558,13 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx) + + static void netif_page_release(struct page *page, unsigned int order) + { +- int group = netif_page_group(page); +- int idx = netif_page_index(page); +- struct xen_netbk *netbk = &xen_netbk[group]; ++ unsigned int group, idx; ++ int foreign = netif_get_page_ext(page, &group, &idx); ++ ++ BUG_ON(!foreign); + BUG_ON(order); +- BUG_ON(group < 0 || group >= xen_netbk_group_nr); +- BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS); +- BUG_ON(netbk->mmap_pages[idx] != page); +- netif_idx_release(netbk, idx); ++ ++ netif_idx_release(&xen_netbk[group], idx); + } + + irqreturn_t netif_be_int(int irq, void *dev_id) +-- +1.7.3.4 + + +From 14a12990d12cd9ee919d5579c1d0c3df74ad66e7 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Fri, 30 Jul 2010 15:16:46 +0100 +Subject: [PATCH 106/139] xen: netback: do not unleash netback threads until initialisation is complete + +Otherwise netbk_action_thread can reference &netbk->net_schedule_list +(via tx_work_todo) before it is initialised. Until now it was zeroed +which is probably safe but not exactly robust. + +Signed-off-by: Ian Campbell +Cc: Jeremy Fitzhardinge +Cc: Xu, Dongxiao +Cc: Paul Durrant +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 4 +++- + 1 files changed, 3 insertions(+), 1 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 95df223..2646383 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -1782,7 +1782,6 @@ static int __init netback_init(void) + + if (!IS_ERR(netbk->kthread.task)) { + kthread_bind(netbk->kthread.task, group); +- wake_up_process(netbk->kthread.task); + } else { + printk(KERN_ALERT + "kthread_run() fails at netback\n"); +@@ -1808,6 +1807,9 @@ static int __init netback_init(void) + spin_lock_init(&netbk->net_schedule_list_lock); + + atomic_set(&netbk->netfront_count, 0); ++ ++ if (MODPARM_netback_kthread) ++ wake_up_process(netbk->kthread.task); + } + + netbk_copy_skb_mode = NETBK_DONT_COPY_SKB; +-- +1.7.3.4 + + +From 6decec42c631f2e2e268f00ce8841faf38817ca8 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Thu, 2 Sep 2010 14:36:40 +0100 +Subject: [PATCH 108/139] xen: netback: save interrupt state in add_to_net_schedule_list_tail + +add_to_net_schedule_list_tail is called from both hard interrupt context +(add_to_net_schedule_list_tail) and soft interrupt/process context +(netif_schedule_work) so use the interrupt state saving spinlock +variants. + +Fixes: + ------------[ cut here ]------------ + WARNING: at kernel/lockdep.c:2323 trace_hardirqs_on_caller+0xef/0x1a0() + Hardware name: PowerEdge 860 + Modules linked in: rtc_cmos rtc_core rtc_lib + Pid: 16, comm: xenwatch Not tainted 2.6.32.18-x86_32p-xen0-00850-ge6b9b2c #98 + Call Trace: + [] warn_slowpath_common+0x6c/0xc0 + [] warn_slowpath_null+0x15/0x20 + [] trace_hardirqs_on_caller+0xef/0x1a0 + [] trace_hardirqs_on+0xb/0x10 + [] _spin_unlock_irq+0x22/0x40 + [] add_to_net_schedule_list_tail+0x5f/0xb0 + [] netif_be_int+0x2b/0x120 + [] handle_IRQ_event+0x2e/0xe0 + [] handle_level_irq+0x6e/0xf0 + [] __xen_evtchn_do_upcall+0x16f/0x190 + [] xen_evtchn_do_upcall+0x28/0x40 + [] xen_do_upcall+0x7/0xc + [] xs_talkv+0x59/0x1a0 + [] xs_single+0x3a/0x50 + [] xenbus_read+0x39/0x60 + [] frontend_changed+0x3e7/0x6a0 + [] xenbus_otherend_changed+0x8a/0xa0 + [] frontend_changed+0x12/0x20 + [] xenwatch_thread+0x7c/0x140 + [] kthread+0x74/0x80 + [] kernel_thread_helper+0x7/0x10 + ---[ end trace 48d73949a8e0909a ]--- + +Signed-off-by: Ian Campbell +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/netback.c | 6 ++++-- + 1 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 2646383..1d080f6 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -787,17 +787,19 @@ static void remove_from_net_schedule_list(struct xen_netif *netif) + + static void add_to_net_schedule_list_tail(struct xen_netif *netif) + { ++ unsigned long flags; ++ + struct xen_netbk *netbk = &xen_netbk[netif->group]; + if (__on_net_schedule_list(netif)) + return; + +- spin_lock_irq(&netbk->net_schedule_list_lock); ++ spin_lock_irqsave(&netbk->net_schedule_list_lock, flags); + if (!__on_net_schedule_list(netif) && + likely(netif_schedulable(netif))) { + list_add_tail(&netif->list, &netbk->net_schedule_list); + netif_get(netif); + } +- spin_unlock_irq(&netbk->net_schedule_list_lock); ++ spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags); + } + + void netif_schedule_work(struct xen_netif *netif) +-- +1.7.3.4 + + +From 0e667d904c6ab6c44cedef51ef00964f9e0559ba Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Fri, 8 Oct 2010 17:11:51 +0100 +Subject: [PATCH 109/139] xen: netback: increase size of rx_meta array. + +We can end up needing as many of these as we need grant copy operations so +increase the array size for the same reason. + +Crash observed on XenServer: +kernel: ------------[ cut here ]------------ +kernel: kernel BUG at drivers/xen/netback/netback.c:834! +kernel: invalid opcode: 0000 [#1] SMP +kernel: last sysfs file: /sys/devices/xen-backend/vbd-10-768/statistics/rd_usecs +kernel: Process netback (pid: 1413, ti=ec8a4000 task=ed0a6b70 task.ti=ec8a4000) +kernel: Stack: 00000000 00000612 00000001 00000000 00020000 00000000 ecfbe000 00000000 +kernel: ec8a5f80 ec8a5f98 ec8a5fac 00000000 c0537220 c0539220 00000000 c0534220 +kernel: cd7afaa0 cd7afaa0 0000000c 00000014 062de396 00000001 00000001 00000014 +kernel: Call Trace: +kernel: [] ? netbk_action_thread+0x0/0x1fe0 +kernel: [] ? kthread+0x42/0x70 +kernel: [] ? kthread+0x0/0x70 +kernel: [] ? kernel_thread_helper+0x7/0x10 +kernel: ======================= +kernel: Code: 00 00 c7 42 08 20 82 53 c0 8b 85 e4 fe ff ff c7 42 10 00 00 00 00 \ + c7 42 14 f0 7f 00 00 89 42 0c 8b 8d ec fe ff ff e9 3e e9 ff ff <0f> \ + 0b eb fe 0f 0b eb fe 0f 0b eb fe 0f 0b eb fe 31 c0 e8 bf 31 +kernel: EIP: [] netbk_action_thread+0x19fa/0x1fe0 SS:ESP 0069:ec8a5d98 + +Corresponding to + BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta)); + +Signed-off-by: Ian Campbell +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/netback/common.h | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index a5f3759..ce0041a 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -319,7 +319,7 @@ struct xen_netbk { + struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE]; + unsigned char rx_notify[NR_IRQS]; + u16 notify_list[NET_RX_RING_SIZE]; +- struct netbk_rx_meta meta[NET_RX_RING_SIZE]; ++ struct netbk_rx_meta meta[2*NET_RX_RING_SIZE]; + }; + + extern struct xen_netbk *xen_netbk; +-- +1.7.3.4 + + +From 36713152990836043c908777654ea01ed13ccdf4 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Fri, 15 Oct 2010 13:41:44 +0100 +Subject: [PATCH 110/139] xen: netback: take net_schedule_list_lock when removing entry from net_schedule_list + +There is a race in net_tx_build_mops between checking if +net_schedule_list is empty and actually dequeuing the first entry on +the list. If another thread dequeues the only entry on the list during +this window we crash because list_first_entry expects a non-empty +list, like so: + +[ 0.133127] BUG: unable to handle kernel NULL pointer dereference at 00000008 +[ 0.133132] IP: [] net_tx_build_mops+0x91/0xa70 +[ 0.133142] *pdpt = 0000000000000000 *pde = 000000000000000f +[ 0.133147] Oops: 0002 1 SMP +[ 0.133150] last sysfs file: +[ 0.133152] Modules linked in: +[ 0.133154] +[ 0.133156] Pid: 55, comm: netback/1 Not tainted (2.6.32.12-0.7.1 #1) Latitude E4310 +[ 0.133158] EIP: 0061:[] EFLAGS: 00010202 CPU: 1 +[ 0.133161] EIP is at net_tx_build_mops+0x91/0xa70 +[ 0.133163] EAX: 00000012 EBX: 00000008 ECX: e112b734 EDX: e112b76c +[ 0.133165] ESI: ffffff30 EDI: 00000000 EBP: e112b734 ESP: dfe85d98 +[ 0.133167] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069 +[ 0.133169] Process netback/1 (pid: 55, ti=dfe84000 task=dfe83340 task.ti=dfe84000) +[ 0.133170] Stack: +[ 0.133172] 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 +[ 0.133177] <0> 00000000 e112b734 e112ec08 e112b7f8 e112ec08 ffffff30 00000000 00000000 +[ 0.133186] <0> 00000000 00000000 00000000 e112b76c dfe85df4 00000001 00000000 aaaaaaaa +[ 0.133193] Call Trace: +[ 0.133202] [] net_tx_action+0x42f/0xac0 +[ 0.133206] [] netbk_action_thread+0x6a/0x1b0 +[ 0.133212] [] kthread+0x74/0x80 +[ 0.133218] [] kernel_thread_helper+0x7/0x10 +[ 0.133220] Code: c4 00 00 00 89 74 24 58 39 74 24 2c 0f 84 c7 06 00 00 8b 74 24 \ + 58 8b 5c 24 58 81 ee d0 00 00 00 83 c3 08 89 74 24 34 8b 7c 24 \ + 58 ff 47 08 89 f0 e8 b4 f9 ff ff 8b 46 2c 8b 56 34 89 44 24 5c +[ 0.133261] EIP: [] net_tx_build_mops+0x91/0xa70 SS:ESP 0069:dfe85d98 +[ 0.133265] CR2: 0000000000000008 +[ 0.133274] --[ end trace e2c5c15f54bd9d93 ]-- + +Therefore after the initial lock free check for an empty list check +again with the lock held before dequeueing the entry. + +Based on a patch by Tomasz Wroblewski. + +Signed-off-by: Ian Campbell +Cc: Tomasz Wroblewski +--- + drivers/xen/netback/netback.c | 35 ++++++++++++++++++++++++++++------- + 1 files changed, 28 insertions(+), 7 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 1d080f6..3b03435 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -774,15 +774,34 @@ static int __on_net_schedule_list(struct xen_netif *netif) + return !list_empty(&netif->list); + } + ++/* Must be called with net_schedule_list_lock held */ + static void remove_from_net_schedule_list(struct xen_netif *netif) + { +- struct xen_netbk *netbk = &xen_netbk[netif->group]; +- spin_lock_irq(&netbk->net_schedule_list_lock); + if (likely(__on_net_schedule_list(netif))) { + list_del_init(&netif->list); + netif_put(netif); + } ++} ++ ++static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk) ++{ ++ struct xen_netif *netif = NULL; ++ ++ spin_lock_irq(&netbk->net_schedule_list_lock); ++ if (list_empty(&netbk->net_schedule_list)) ++ goto out; ++ ++ netif = list_first_entry(&netbk->net_schedule_list, ++ struct xen_netif, list); ++ if (!netif) ++ goto out; ++ ++ netif_get(netif); ++ ++ remove_from_net_schedule_list(netif); ++out: + spin_unlock_irq(&netbk->net_schedule_list_lock); ++ return netif; + } + + static void add_to_net_schedule_list_tail(struct xen_netif *netif) +@@ -817,7 +836,10 @@ void netif_schedule_work(struct xen_netif *netif) + + void netif_deschedule_work(struct xen_netif *netif) + { ++ struct xen_netbk *netbk = &xen_netbk[netif->group]; ++ spin_lock_irq(&netbk->net_schedule_list_lock); + remove_from_net_schedule_list(netif); ++ spin_unlock_irq(&netbk->net_schedule_list_lock); + } + + +@@ -1301,12 +1323,11 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk) + int work_to_do; + unsigned int data_len; + pending_ring_idx_t index; +- ++ + /* Get a netif from the list with work to do. */ +- netif = list_first_entry(&netbk->net_schedule_list, +- struct xen_netif, list); +- netif_get(netif); +- remove_from_net_schedule_list(netif); ++ netif = poll_net_schedule_list(netbk); ++ if (!netif) ++ continue; + + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do); + if (!work_to_do) { +-- +1.7.3.4 + + +From 6ad4931abe4b111253da13805cc504cc72b0df1c Mon Sep 17 00:00:00 2001 +From: Paul Durrant +Date: Wed, 15 Dec 2010 09:48:12 +0000 +Subject: [PATCH 111/139] xen: netback: Re-define PKT_PROT_LEN to be bigger. + +Re-define PKT_PROT_LEN to be big enough to handle maximal IPv4 and TCP options and phrase +the definition so that it's reasonably obvious that's what it's for. + +Signed-off-by: Paul Durrant +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/netback.c | 14 +++++++++----- + 1 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 3b03435..9bbd230 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -36,9 +36,11 @@ + + #include "common.h" + +-#include +-#include + #include ++#include ++#include ++ ++#include + + #include + #include +@@ -125,10 +127,12 @@ static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsi + /* + * This is the amount of packet we copy rather than map, so that the + * guest can't fiddle with the contents of the headers while we do +- * packet processing on them (netfilter, routing, etc). 72 is enough +- * to cover TCP+IP headers including options. ++ * packet processing on them (netfilter, routing, etc). + */ +-#define PKT_PROT_LEN 72 ++#define PKT_PROT_LEN (ETH_HLEN + \ ++ VLAN_HLEN + \ ++ sizeof(struct iphdr) + MAX_IPOPTLEN + \ ++ sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) + + static inline pending_ring_idx_t pending_index(unsigned i) + { +-- +1.7.3.4 + + +From a2d629a773aba2049106bad81596ef88e80a9cd4 Mon Sep 17 00:00:00 2001 +From: Paul Durrant +Date: Tue, 14 Dec 2010 20:35:19 +0000 +Subject: [PATCH 112/139] xen: netback: Don't count packets we don't actually receive. + +Make sure we only bump rx_packets when we're definitely going to call netif_rx_ni(). + +Signed-off-by: Paul Durrant +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/netback.c | 6 +++--- + 1 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 9bbd230..78d3509 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -1512,9 +1512,6 @@ static void net_tx_submit(struct xen_netbk *netbk) + skb->dev = netif->dev; + skb->protocol = eth_type_trans(skb, skb->dev); + +- netif->stats.rx_bytes += skb->len; +- netif->stats.rx_packets++; +- + if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb_checksum_setup(skb)) { + DPRINTK("Can't setup checksum in net_tx_action\n"); +@@ -1530,6 +1527,9 @@ static void net_tx_submit(struct xen_netbk *netbk) + continue; + } + ++ netif->stats.rx_bytes += skb->len; ++ netif->stats.rx_packets++; ++ + netif_rx_ni(skb); + netif->dev->last_rx = jiffies; + } +-- +1.7.3.4 + + +From c6efc62e71720df66d9a91d33a3de813f0ec41c2 Mon Sep 17 00:00:00 2001 +From: Paul Durrant +Date: Tue, 14 Dec 2010 20:35:20 +0000 +Subject: [PATCH 113/139] xen: netback: Remove the 500ms timeout to restart the netif queue. + +It is generally unhelpful as it results in a massive tail-drop should a guest become +unresponsive for a relatively short period of time and no back-pressure (other than +that caused by a higher layer protocol) is applied to the sender. + +Signed-off-by: Paul Durrant +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/netback.c | 20 +------------------- + 1 files changed, 1 insertions(+), 19 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 78d3509..2caa5f8 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -275,13 +275,6 @@ static inline int netbk_queue_full(struct xen_netif *netif) + ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed); + } + +-static void tx_queue_callback(unsigned long data) +-{ +- struct xen_netif *netif = (struct xen_netif *)data; +- if (netif_schedulable(netif)) +- netif_wake_queue(netif->dev); +-} +- + /* Figure out how many ring slots we're going to need to send @skb to + the guest. */ + static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif) +@@ -364,19 +357,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + netif->rx.sring->req_event = netif->rx_req_cons_peek + + netbk_max_required_rx_slots(netif); + mb(); /* request notification /then/ check & stop the queue */ +- if (netbk_queue_full(netif)) { ++ if (netbk_queue_full(netif)) + netif_stop_queue(dev); +- /* +- * Schedule 500ms timeout to restart the queue, thus +- * ensuring that an inactive queue will be drained. +- * Packets will be immediately be dropped until more +- * receive buffers become available (see +- * netbk_queue_full() check above). +- */ +- netif->tx_queue_timeout.data = (unsigned long)netif; +- netif->tx_queue_timeout.function = tx_queue_callback; +- mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2); +- } + } + skb_queue_tail(&netbk->rx_queue, skb); + +-- +1.7.3.4 + + +From 0e8da97315f8cc89611f9194097931df4e67efc7 Mon Sep 17 00:00:00 2001 +From: Paul Durrant +Date: Tue, 14 Dec 2010 20:35:21 +0000 +Subject: [PATCH 114/139] xen: netback: Add a missing test to tx_work_todo. + +Adda test so that, when netback is using worker threads, net_tx_action() +gets called in a timely manner when the pending_inuse list is populated. + +Signed-off-by: Paul Durrant +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/netback.c | 4 ++++ + 1 files changed, 4 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 2caa5f8..dd52d01 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -1695,6 +1695,10 @@ static inline int tx_work_todo(struct xen_netbk *netbk) + if (netbk->dealloc_cons != netbk->dealloc_prod) + return 1; + ++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && ++ !list_empty(&netbk->pending_inuse_head)) ++ return 1; ++ + if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && + !list_empty(&netbk->net_schedule_list)) + return 1; +-- +1.7.3.4 + + +From e2f4dacefdb6cdff5e4e0b380632ff7ca750ba8b Mon Sep 17 00:00:00 2001 +From: Paul Durrant +Date: Tue, 14 Dec 2010 20:35:22 +0000 +Subject: [PATCH 115/139] xen: netback: Re-factor net_tx_action_dealloc() slightly. + +There is no need for processing of the pending_inuse list to be within the dealloc_prod/cons +loop. + +Signed-off-by: Paul Durrant +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/netback.c | 26 ++++++++++++++------------ + 1 files changed, 14 insertions(+), 12 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index dd52d01..53b3a0e 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -906,11 +906,20 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk) + gop++; + } + +- if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB || +- list_empty(&netbk->pending_inuse_head)) +- break; ++ } while (dp != netbk->dealloc_prod); ++ ++ netbk->dealloc_cons = dc; + +- /* Copy any entries that have been pending for too long. */ ++ ret = HYPERVISOR_grant_table_op( ++ GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops, ++ gop - netbk->tx_unmap_ops); ++ BUG_ON(ret); ++ ++ /* ++ * Copy any entries that have been pending for too long ++ */ ++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && ++ !list_empty(&netbk->pending_inuse_head)) { + list_for_each_entry_safe(inuse, n, + &netbk->pending_inuse_head, list) { + struct pending_tx_info *pending_tx_info; +@@ -936,14 +945,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk) + + break; + } +- } while (dp != netbk->dealloc_prod); +- +- netbk->dealloc_cons = dc; +- +- ret = HYPERVISOR_grant_table_op( +- GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops, +- gop - netbk->tx_unmap_ops); +- BUG_ON(ret); ++ } + + list_for_each_entry_safe(inuse, n, &list, list) { + struct pending_tx_info *pending_tx_info; +-- +1.7.3.4 + + +From 082386b4a581b2ba5a125cc8944a57ceb33ff37c Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Wed, 5 Jan 2011 09:57:37 +0000 +Subject: [PATCH 116/139] xen: netback: Drop GSO SKBs which do not have csum_blank. + +The Linux network stack expects all GSO SKBs to have ip_summed == +CHECKSUM_PARTIAL (which implies that the frame contains a partial +checksum) and the Xen network ring protocol similarly expects an SKB +which has GSO set to also have NETRX_csum_blank (which also implies a +partial checksum). Therefore drop such frames on receive otherwise +they will trigger the warning in skb_gso_segment. + +Signed-off-by: Ian Campbell +Cc: Jeremy Fitzhardinge +Cc: xen-devel@lists.xensource.com +--- + drivers/xen/netback/netback.c | 4 ++++ + 1 files changed, 4 insertions(+), 0 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 53b3a0e..2766b93 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -1502,6 +1502,10 @@ static void net_tx_submit(struct xen_netbk *netbk) + kfree_skb(skb); + continue; + } ++ } else if (skb_is_gso(skb)) { ++ DPRINTK("Dropping GSO but not CHECKSUM_PARTIAL skb\n"); ++ kfree_skb(skb); ++ continue; + } + + if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) && +-- +1.7.3.4 + + +From 27e6a8538a7e781f4774e4746f67eb113996333d Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Wed, 19 Jan 2011 12:43:38 +0000 +Subject: [PATCH 117/139] xen: netback: completely remove tx_queue_timer + +"xen: netback: Remove the 500ms timeout to restart the netif queue." missed +removing the timer initialisation. + +Also remove the related comment which has been obsolete since the default for +MODPARM_copy_skb was switched to true some time ago. + +Signed-off-by: Ian Campbell +Cc: Paul Durrant +--- + drivers/xen/netback/common.h | 3 --- + drivers/xen/netback/interface.c | 13 +------------ + 2 files changed, 1 insertions(+), 15 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index ce0041a..7e03a46 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -99,9 +99,6 @@ struct xen_netif { + unsigned long remaining_credit; + struct timer_list credit_timeout; + +- /* Enforce draining of the transmit queue. */ +- struct timer_list tx_queue_timeout; +- + /* Statistics */ + int nr_copied_skbs; + +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index 2e8508a..efdc21c 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -41,15 +41,7 @@ + * Module parameter 'queue_length': + * + * Enables queuing in the network stack when a client has run out of receive +- * descriptors. Although this feature can improve receive bandwidth by avoiding +- * packet loss, it can also result in packets sitting in the 'tx_queue' for +- * unbounded time. This is bad if those packets hold onto foreign resources. +- * For example, consider a packet that holds onto resources belonging to the +- * guest for which it is queued (e.g., packet received on vif1.0, destined for +- * vif1.1 which is not activated in the guest): in this situation the guest +- * will never be destroyed, unless vif1.1 is taken down. To avoid this, we +- * run a timer (tx_queue_timeout) to drain the queue when the interface is +- * blocked. ++ * descriptors. + */ + static unsigned long netbk_queue_length = 32; + module_param_named(queue_length, netbk_queue_length, ulong, 0644); +@@ -295,8 +287,6 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int + /* Initialize 'expires' now: it's used to track the credit window. */ + netif->credit_timeout.expires = jiffies; + +- init_timer(&netif->tx_queue_timeout); +- + dev->netdev_ops = &netback_ops; + netif_set_features(netif); + SET_ETHTOOL_OPS(dev, &network_ethtool_ops); +@@ -458,7 +448,6 @@ void netif_disconnect(struct xen_netif *netif) + wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0); + + del_timer_sync(&netif->credit_timeout); +- del_timer_sync(&netif->tx_queue_timeout); + + if (netif->irq) + unbind_from_irqhandler(netif->irq, netif); +-- +1.7.3.4 + + +From eccfb3db4b10548f9ecc71cc6f79bbec7e594f1d Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Tue, 18 Jan 2011 11:37:12 +0000 +Subject: [PATCH 118/139] xen: netback: rationalise types used in count_skb_slots + +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/netback.c | 10 +++------- + 1 files changed, 3 insertions(+), 7 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 2766b93..52f1745 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -277,14 +277,10 @@ static inline int netbk_queue_full(struct xen_netif *netif) + + /* Figure out how many ring slots we're going to need to send @skb to + the guest. */ +-static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif) ++static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif) + { +- unsigned count; +- unsigned copy_off; +- unsigned i; +- +- copy_off = 0; +- count = 1; ++ unsigned int count = 1; ++ int i, copy_off = 0; + + BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET); + +-- +1.7.3.4 + + +From 351ea99b9be1dbab49e069a4250740acc4375f6d Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Tue, 18 Jan 2011 11:21:35 +0000 +Subject: [PATCH 119/139] xen: netback: refactor logic for moving to a new receive buffer. + +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/netback.c | 80 ++++++++++++++++++++++++----------------- + 1 files changed, 47 insertions(+), 33 deletions(-) + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 52f1745..ae4821a 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -275,8 +275,51 @@ static inline int netbk_queue_full(struct xen_netif *netif) + ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed); + } + +-/* Figure out how many ring slots we're going to need to send @skb to +- the guest. */ ++/* ++ * Returns true if we should start a new receive buffer instead of ++ * adding 'size' bytes to a buffer which currently contains 'offset' ++ * bytes. ++ */ ++static bool start_new_rx_buffer(int offset, unsigned long size, int head) ++{ ++ /* simple case: we have completely filled the current buffer. */ ++ if (offset == MAX_BUFFER_OFFSET) ++ return true; ++ ++ /* ++ * complex case: start a fresh buffer if the current frag ++ * would overflow the current buffer but only if: ++ * (i) this frag would fit completely in the next buffer ++ * and (ii) there is already some data in the current buffer ++ * and (iii) this is not the head buffer. ++ * ++ * Where: ++ * - (i) stops us splitting a frag into two copies ++ * unless the frag is too large for a single buffer. ++ * - (ii) stops us from leaving a buffer pointlessly empty. ++ * - (iii) stops us leaving the first buffer ++ * empty. Strictly speaking this is already covered ++ * by (ii) but is explicitly checked because ++ * netfront relies on the first buffer being ++ * non-empty and can crash otherwise. ++ * ++ * This means we will effectively linearise small ++ * frags but do not needlessly split large buffers ++ * into multiple copies tend to give large frags their ++ * own buffers as before. ++ */ ++ if ((offset + size > MAX_BUFFER_OFFSET) && ++ (size <= MAX_BUFFER_OFFSET) && offset && !head) ++ return true; ++ ++ return false; ++} ++ ++/* ++ * Figure out how many ring slots we're going to need to send @skb to ++ * the guest. This function is essentially a dry run of ++ * netbk_gop_frag_copy. ++ */ + static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif) + { + unsigned int count = 1; +@@ -295,9 +338,7 @@ static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif + while (size > 0) { + BUG_ON(copy_off > MAX_BUFFER_OFFSET); + +- /* These checks are the same as in netbk_gop_frag_copy */ +- if (copy_off == MAX_BUFFER_OFFSET +- || ((copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && copy_off)) { ++ if (start_new_rx_buffer(copy_off, size, 0)) { + count++; + copy_off = 0; + } +@@ -403,34 +444,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif, + while (size > 0) { + BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); + +- /* +- * Move to a new receive buffer if: +- * +- * simple case: we have completely filled the current buffer. +- * +- * complex case: the current frag would overflow +- * the current buffer but only if: +- * (i) this frag would fit completely in the next buffer +- * and (ii) there is already some data in the current buffer +- * and (iii) this is not the head buffer. +- * +- * Where: +- * - (i) stops us splitting a frag into two copies +- * unless the frag is too large for a single buffer. +- * - (ii) stops us from leaving a buffer pointlessly empty. +- * - (iii) stops us leaving the first buffer +- * empty. Strictly speaking this is already covered +- * by (ii) but is explicitly checked because +- * netfront relies on the first buffer being +- * non-empty and can crash otherwise. +- * +- * This means we will effectively linearise small +- * frags but do not needlessly split large buffers +- * into multiple copies tend to give large frags their +- * own buffers as before. +- */ +- if (npo->copy_off == MAX_BUFFER_OFFSET +- || ((npo->copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && npo->copy_off && !head)) { ++ if (start_new_rx_buffer(npo->copy_off, size, head)) { + struct xen_netif_rx_request *req; + + BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */ +-- +1.7.3.4 + + +From 790bdefa794705301733d53d36c3e8d3a98c811d Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Fri, 24 Dec 2010 13:37:04 +0000 +Subject: [PATCH 120/139] xen: netback: cleanup coding style + +Fix checkpatch.pl errors plus manual sweep. + +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/common.h | 52 +------------- + drivers/xen/netback/interface.c | 14 ++-- + drivers/xen/netback/netback.c | 142 ++++++++++++++++++++++---------------- + drivers/xen/netback/xenbus.c | 61 +++++++---------- + 4 files changed, 118 insertions(+), 151 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 7e03a46..77bb3fc 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -35,13 +35,13 @@ + #include + #include + #include ++#include + #include + #include + #include + #include + + #include +-#include + #include + #include + #include +@@ -49,7 +49,7 @@ + + #define DPRINTK(_f, _a...) \ + pr_debug("(file=%s, line=%d) " _f, \ +- __FILE__ , __LINE__ , ## _a ) ++ __FILE__ , __LINE__ , ## _a) + #define IPRINTK(fmt, args...) \ + printk(KERN_INFO "xen_net: " fmt, ##args) + #define WPRINTK(fmt, args...) \ +@@ -131,66 +131,22 @@ enum { + + extern int netbk_copy_skb_mode; + +-/* Function pointers into netback accelerator plugin modules */ +-struct netback_accel_hooks { +- struct module *owner; +- int (*probe)(struct xenbus_device *dev); +- int (*remove)(struct xenbus_device *dev); +-}; +- +-/* Structure to track the state of a netback accelerator plugin */ +-struct netback_accelerator { +- struct list_head link; +- int id; +- char *eth_name; +- atomic_t use_count; +- struct netback_accel_hooks *hooks; +-}; +- + struct backend_info { + struct xenbus_device *dev; + struct xen_netif *netif; + enum xenbus_state frontend_state; + struct xenbus_watch hotplug_status_watch; + int have_hotplug_status_watch:1; +- +- /* State relating to the netback accelerator */ +- void *netback_accel_priv; +- /* The accelerator that this backend is currently using */ +- struct netback_accelerator *accelerator; + }; + +-#define NETBACK_ACCEL_VERSION 0x00010001 +- +-/* +- * Connect an accelerator plugin module to netback. Returns zero on +- * success, < 0 on error, > 0 (with highest version number supported) +- * if version mismatch. +- */ +-extern int netback_connect_accelerator(unsigned version, +- int id, const char *eth_name, +- struct netback_accel_hooks *hooks); +-/* Disconnect a previously connected accelerator plugin module */ +-extern void netback_disconnect_accelerator(int id, const char *eth_name); +- +- +-extern +-void netback_probe_accelerators(struct backend_info *be, +- struct xenbus_device *dev); +-extern +-void netback_remove_accelerators(struct backend_info *be, +- struct xenbus_device *dev); +-extern +-void netif_accel_init(void); +- +- + #define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE) + #define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE) + + void netif_disconnect(struct xen_netif *netif); + + void netif_set_features(struct xen_netif *netif); +-struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle); ++struct xen_netif *netif_alloc(struct device *parent, domid_t domid, ++ unsigned int handle); + int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref, + unsigned long rx_ring_ref, unsigned int evtchn); + +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index efdc21c..c66b180 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -225,8 +225,7 @@ static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data) + } + } + +-static struct ethtool_ops network_ethtool_ops = +-{ ++static struct ethtool_ops network_ethtool_ops = { + .get_drvinfo = netbk_get_drvinfo, + + .get_tx_csum = ethtool_op_get_tx_csum, +@@ -242,8 +241,7 @@ static struct ethtool_ops network_ethtool_ops = + .get_strings = netbk_get_strings, + }; + +-static struct net_device_ops netback_ops = +-{ ++static struct net_device_ops netback_ops = { + .ndo_start_xmit = netif_be_start_xmit, + .ndo_get_stats = netif_be_get_stats, + .ndo_open = net_open, +@@ -251,7 +249,8 @@ static struct net_device_ops netback_ops = + .ndo_change_mtu = netbk_change_mtu, + }; + +-struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle) ++struct xen_netif *netif_alloc(struct device *parent, domid_t domid, ++ unsigned int handle) + { + int err = 0; + struct net_device *dev; +@@ -316,8 +315,9 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int + return netif; + } + +-static int map_frontend_pages( +- struct xen_netif *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref) ++static int map_frontend_pages(struct xen_netif *netif, ++ grant_ref_t tx_ring_ref, ++ grant_ref_t rx_ring_ref) + { + struct gnttab_map_grant_ref op; + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index ae4821a..99440fd 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -1,11 +1,9 @@ +-/****************************************************************************** +- * drivers/xen/netback/netback.c +- * ++/* + * Back-end of the driver for virtual network devices. This portion of the + * driver exports a 'unified' network-device interface that can be accessed + * by any operating system that implements a compatible front end. A + * reference front-end implementation can be found in: +- * drivers/xen/netfront/netfront.c ++ * drivers/net/xen-netfront.c + * + * Copyright (c) 2002-2005, K A Fraser + * +@@ -82,8 +80,8 @@ static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk, + } + + /* extra field used in struct page */ +-static inline void netif_set_page_ext(struct page *pg, unsigned int group, +- unsigned int idx) ++static inline void netif_set_page_ext(struct page *pg, ++ unsigned int group, unsigned int idx) + { + union page_ext ext = { .e = { .group = group + 1, .idx = idx } }; + +@@ -91,7 +89,8 @@ static inline void netif_set_page_ext(struct page *pg, unsigned int group, + pg->mapping = ext.mapping; + } + +-static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsigned int *_idx) ++static int netif_get_page_ext(struct page *pg, ++ unsigned int *_group, unsigned int *_idx) + { + union page_ext ext = { .mapping = pg->mapping }; + struct xen_netbk *netbk; +@@ -325,7 +324,7 @@ static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif + unsigned int count = 1; + int i, copy_off = 0; + +- BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET); ++ BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET); + + copy_off = skb_headlen(skb); + +@@ -376,7 +375,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + */ + if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) { + struct sk_buff *nskb = netbk_copy_skb(skb); +- if ( unlikely(nskb == NULL) ) ++ if (unlikely(nskb == NULL)) + goto drop; + /* Copy only the header fields we use in this driver. */ + nskb->dev = skb->dev; +@@ -385,8 +384,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) + skb = nskb; + } + +- /* Reserve ring slots for the worst-case number of +- * fragments. */ ++ /* Reserve ring slots for the worst-case number of fragments. */ + netif->rx_req_cons_peek += count_skb_slots(skb, netif); + netif_get(netif); + +@@ -418,9 +416,29 @@ struct netrx_pending_operations { + grant_ref_t copy_gref; + }; + +-/* Set up the grant operations for this fragment. If it's a flipping +- interface, we also set up the unmap request from here. */ ++static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif, ++ struct netrx_pending_operations *npo) ++{ ++ struct netbk_rx_meta *meta; ++ struct xen_netif_rx_request *req; + ++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); ++ ++ meta = npo->meta + npo->meta_prod++; ++ meta->gso_size = 0; ++ meta->size = 0; ++ meta->id = req->id; ++ ++ npo->copy_off = 0; ++ npo->copy_gref = req->gref; ++ ++ return meta; ++} ++ ++/* ++ * Set up the grant operations for this fragment. If it's a flipping ++ * interface, we also set up the unmap request from here. ++ */ + static void netbk_gop_frag_copy(struct xen_netif *netif, + struct netrx_pending_operations *npo, + struct page *page, unsigned long size, +@@ -431,7 +449,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif, + /* + * These variables a used iff netif_get_page_ext returns true, + * in which case they are guaranteed to be initialized. +- */ ++ */ + unsigned int uninitialized_var(group), uninitialized_var(idx); + int foreign = netif_get_page_ext(page, &group, &idx); + unsigned long bytes; +@@ -445,17 +463,13 @@ static void netbk_gop_frag_copy(struct xen_netif *netif, + BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); + + if (start_new_rx_buffer(npo->copy_off, size, head)) { +- struct xen_netif_rx_request *req; +- +- BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */ +- /* Overflowed this request, go to the next one */ +- req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); +- meta = npo->meta + npo->meta_prod++; +- meta->gso_size = 0; +- meta->size = 0; +- meta->id = req->id; +- npo->copy_off = 0; +- npo->copy_gref = req->gref; ++ /* ++ * Netfront requires there to be some data in ++ * the head buffer. ++ */ ++ BUG_ON(head); ++ ++ meta = get_next_rx_buffer(netif, npo); + } + + bytes = size; +@@ -474,8 +488,9 @@ static void netbk_gop_frag_copy(struct xen_netif *netif, + copy_gop->source.u.ref = src_pend->req.gref; + copy_gop->flags |= GNTCOPY_source_gref; + } else { ++ void *vaddr = page_address(page); + copy_gop->source.domid = DOMID_SELF; +- copy_gop->source.u.gmfn = virt_to_mfn(page_address(page)); ++ copy_gop->source.u.gmfn = virt_to_mfn(vaddr); + } + copy_gop->source.offset = offset; + copy_gop->dest.domid = netif->domid; +@@ -489,17 +504,22 @@ static void netbk_gop_frag_copy(struct xen_netif *netif, + + offset += bytes; + size -= bytes; +- head = 0; /* Must be something in this buffer now */ ++ head = 0; /* There must be something in this buffer now. */ + } + } + +-/* Prepare an SKB to be transmitted to the frontend. This is +- responsible for allocating grant operations, meta structures, etc. +- It returns the number of meta structures consumed. The number of +- ring slots used is always equal to the number of meta slots used +- plus the number of GSO descriptors used. Currently, we use either +- zero GSO descriptors (for non-GSO packets) or one descriptor (for +- frontend-side LRO). */ ++/* ++ * Prepare an SKB to be transmitted to the frontend. ++ * ++ * This function is responsible for allocating grant operations, meta ++ * structures, etc. ++ * ++ * It returns the number of meta structures consumed. The number of ++ * ring slots used is always equal to the number of meta slots used ++ * plus the number of GSO descriptors used. Currently, we use either ++ * zero GSO descriptors (for non-GSO packets) or one descriptor (for ++ * frontend-side LRO). ++ */ + static int netbk_gop_skb(struct sk_buff *skb, + struct netrx_pending_operations *npo) + { +@@ -554,10 +574,12 @@ static int netbk_gop_skb(struct sk_buff *skb, + return npo->meta_prod - old_meta_prod; + } + +-/* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was +- used to set up the operations on the top of +- netrx_pending_operations, which have since been done. Check that +- they didn't give any errors and advance over them. */ ++/* ++ * This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was ++ * used to set up the operations on the top of ++ * netrx_pending_operations, which have since been done. Check that ++ * they didn't give any errors and advance over them. ++ */ + static int netbk_check_gop(int nr_meta_slots, domid_t domid, + struct netrx_pending_operations *npo) + { +@@ -584,6 +606,12 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status, + int i; + unsigned long offset; + ++ /* No fragments used */ ++ if (nr_meta_slots <= 1) ++ return; ++ ++ nr_meta_slots--; ++ + for (i = 0; i < nr_meta_slots; i++) { + int flags; + if (i == nr_meta_slots - 1) +@@ -712,11 +740,9 @@ static void net_rx_action(unsigned long data) + gso->flags = 0; + } + +- if (sco->meta_slots_used > 1) { +- netbk_add_frag_responses(netif, status, +- netbk->meta + npo.meta_cons + 1, +- sco->meta_slots_used - 1); +- } ++ netbk_add_frag_responses(netif, status, ++ netbk->meta + npo.meta_cons + 1, ++ sco->meta_slots_used); + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret); + irq = netif->irq; +@@ -887,9 +913,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk) + dc = netbk->dealloc_cons; + gop = netbk->tx_unmap_ops; + +- /* +- * Free up any grants we have finished using +- */ ++ /* Free up any grants we have finished using. */ + do { + dp = netbk->dealloc_prod; + +@@ -999,7 +1023,8 @@ static void netbk_tx_err(struct xen_netif *netif, + + static int netbk_count_requests(struct xen_netif *netif, + struct xen_netif_tx_request *first, +- struct xen_netif_tx_request *txp, int work_to_do) ++ struct xen_netif_tx_request *txp, ++ int work_to_do) + { + RING_IDX cons = netif->tx.req_cons; + int frags = 0; +@@ -1039,10 +1064,10 @@ static int netbk_count_requests(struct xen_netif *netif, + } + + static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk, +- struct xen_netif *netif, +- struct sk_buff *skb, +- struct xen_netif_tx_request *txp, +- struct gnttab_map_grant_ref *mop) ++ struct xen_netif *netif, ++ struct sk_buff *skb, ++ struct xen_netif_tx_request *txp, ++ struct gnttab_map_grant_ref *mop) + { + struct skb_shared_info *shinfo = skb_shinfo(skb); + skb_frag_t *frags = shinfo->frags; +@@ -1181,7 +1206,8 @@ static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb) + } + } + +-int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extras, ++int netbk_get_extras(struct xen_netif *netif, ++ struct xen_netif_extra_info *extras, + int work_to_do) + { + struct xen_netif_extra_info extra; +@@ -1209,7 +1235,8 @@ int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extra + return work_to_do; + } + +-static int netbk_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *gso) ++static int netbk_set_skb_gso(struct sk_buff *skb, ++ struct xen_netif_extra_info *gso) + { + if (!gso->u.gso.size) { + DPRINTK("GSO size must not be zero.\n"); +@@ -1315,7 +1342,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk) + struct xen_netif *netif; + struct xen_netif_tx_request txreq; + struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; +- struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; ++ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; + u16 pending_idx; + RING_IDX idx; + int work_to_do; +@@ -1377,7 +1404,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk) + if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { + DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", + txreq.offset, txreq.size, +- (txreq.offset &~PAGE_MASK) + txreq.size); ++ (txreq.offset&~PAGE_MASK) + txreq.size); + netbk_tx_err(netif, &txreq, idx); + continue; + } +@@ -1763,9 +1790,6 @@ static int __init netback_init(void) + } + memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr); + +- /* We can increase reservation by this much in net_rx_action(). */ +-// balloon_update_driver_allowance(NET_RX_RING_SIZE); +- + for (group = 0; group < xen_netbk_group_nr; group++) { + struct xen_netbk *netbk = &xen_netbk[group]; + skb_queue_head_init(&netbk->rx_queue); +@@ -1850,8 +1874,6 @@ static int __init netback_init(void) + netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB; + } + +- //netif_accel_init(); +- + rc = netif_xenbus_init(); + if (rc) + goto failed_init; +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index 1fec65a..e2095fc 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -1,20 +1,20 @@ + /* Xenbus code for netif backend +- Copyright (C) 2005 Rusty Russell +- Copyright (C) 2005 XenSource Ltd +- +- This program is free software; you can redistribute it and/or modify +- it under the terms of the GNU General Public License as published by +- the Free Software Foundation; either version 2 of the License, or +- (at your option) any later version. +- +- This program is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- GNU General Public License for more details. +- +- You should have received a copy of the GNU General Public License +- along with this program; if not, write to the Free Software +- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * Copyright (C) 2005 Rusty Russell ++ * Copyright (C) 2005 XenSource Ltd ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + #include +@@ -22,13 +22,6 @@ + #include + #include "common.h" + +-#if 0 +-#undef DPRINTK +-#define DPRINTK(fmt, args...) \ +- printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) +-#endif +- +- + static int connect_rings(struct backend_info *); + static void connect(struct backend_info *); + static void backend_create_netif(struct backend_info *be); +@@ -36,9 +29,7 @@ static void unregister_hotplug_status_watch(struct backend_info *be); + + static int netback_remove(struct xenbus_device *dev) + { +- struct backend_info *be = dev_get_drvdata(&dev->dev); +- +- //netback_remove_accelerators(be, dev); ++ struct backend_info *be = dev_get_drvdata(&dev->dev); + + unregister_hotplug_status_watch(be); + if (be->netif) { +@@ -126,8 +117,6 @@ static int netback_probe(struct xenbus_device *dev, + goto fail; + } + +- //netback_probe_accelerators(be, dev); +- + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) + goto fail; +@@ -147,12 +136,13 @@ fail: + } + + +-/** ++/* + * Handle the creation of the hotplug script environment. We add the script + * and vif variables to the environment, for the benefit of the vif-* hotplug + * scripts. + */ +-static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env) ++static int netback_uevent(struct xenbus_device *xdev, ++ struct kobj_uevent_env *env) + { + struct backend_info *be = dev_get_drvdata(&xdev->dev); + char *val; +@@ -164,8 +154,7 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en + int err = PTR_ERR(val); + xenbus_dev_fatal(xdev, err, "reading script"); + return err; +- } +- else { ++ } else { + if (add_uevent_var(env, "script=%s", val)) { + kfree(val); + return -ENOMEM; +@@ -173,10 +162,10 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en + kfree(val); + } + +- if (be && be->netif && add_uevent_var(env, "vif=%s", be->netif->dev->name)) +- return -ENOMEM; ++ if (!be || !be->netif) ++ return 0; + +- return 0; ++ return add_uevent_var(env, "vif=%s", be->netif->dev->name); + } + + +@@ -234,7 +223,7 @@ static void frontend_changed(struct xenbus_device *dev, + case XenbusStateInitialising: + if (dev->state == XenbusStateClosed) { + printk(KERN_INFO "%s: %s: prepare for reconnect\n", +- __FUNCTION__, dev->nodename); ++ __func__, dev->nodename); + xenbus_switch_state(dev, XenbusStateInitWait); + } + break; +-- +1.7.3.4 + + +From 0b0514b47e27f4c1b438b30972011aa27ac1ee8f Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Wed, 19 Jan 2011 10:51:45 +0000 +Subject: [PATCH 121/139] xen: netback: drop private ?PRINTK macros in favour of pr_* + +Signed-off-by: Ian Campbell +--- + drivers/xen/netback/common.h | 10 ++-------- + drivers/xen/netback/interface.c | 13 +++++++------ + drivers/xen/netback/netback.c | 28 ++++++++++++++-------------- + drivers/xen/netback/xenbus.c | 13 +++---------- + 4 files changed, 26 insertions(+), 38 deletions(-) + +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +index 77bb3fc..079e1de 100644 +--- a/drivers/xen/netback/common.h ++++ b/drivers/xen/netback/common.h +@@ -29,6 +29,8 @@ + #ifndef __NETIF__BACKEND__COMMON_H__ + #define __NETIF__BACKEND__COMMON_H__ + ++#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ ++ + #include + #include + #include +@@ -47,14 +49,6 @@ + #include + #include + +-#define DPRINTK(_f, _a...) \ +- pr_debug("(file=%s, line=%d) " _f, \ +- __FILE__ , __LINE__ , ## _a) +-#define IPRINTK(fmt, args...) \ +- printk(KERN_INFO "xen_net: " fmt, ##args) +-#define WPRINTK(fmt, args...) \ +- printk(KERN_WARNING "xen_net: " fmt, ##args) +- + struct xen_netif { + /* Unique identifier for this interface. */ + domid_t domid; +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +index c66b180..c36db26 100644 +--- a/drivers/xen/netback/interface.c ++++ b/drivers/xen/netback/interface.c +@@ -31,6 +31,7 @@ + */ + + #include "common.h" ++ + #include + #include + +@@ -260,7 +261,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, + snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle); + dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup); + if (dev == NULL) { +- DPRINTK("Could not create netif: out of memory\n"); ++ pr_debug("Could not allocate netdev\n"); + return ERR_PTR(-ENOMEM); + } + +@@ -305,13 +306,13 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, + err = register_netdevice(dev); + rtnl_unlock(); + if (err) { +- DPRINTK("Could not register new net device %s: err=%d\n", +- dev->name, err); ++ pr_debug("Could not register new net device %s: err=%d\n", ++ dev->name, err); + free_netdev(dev); + return ERR_PTR(err); + } + +- DPRINTK("Successfully created netif\n"); ++ pr_debug("Successfully created netif\n"); + return netif; + } + +@@ -328,7 +329,7 @@ static int map_frontend_pages(struct xen_netif *netif, + BUG(); + + if (op.status) { +- DPRINTK(" Gnttab failure mapping tx_ring_ref!\n"); ++ pr_debug("Gnttab failure mapping tx_ring_ref!\n"); + return op.status; + } + +@@ -348,7 +349,7 @@ static int map_frontend_pages(struct xen_netif *netif, + (unsigned long)netif->tx_comms_area->addr, + GNTMAP_host_map, netif->tx_shmem_handle); + HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1); +- DPRINTK(" Gnttab failure mapping rx_ring_ref!\n"); ++ pr_debug("Gnttab failure mapping rx_ring_ref!\n"); + return op.status; + } + +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +index 99440fd..e0ca232 100644 +--- a/drivers/xen/netback/netback.c ++++ b/drivers/xen/netback/netback.c +@@ -590,8 +590,8 @@ static int netbk_check_gop(int nr_meta_slots, domid_t domid, + for (i = 0; i < nr_meta_slots; i++) { + copy_op = npo->copy + npo->copy_cons++; + if (copy_op->status != GNTST_okay) { +- DPRINTK("Bad status %d from copy to DOM%d.\n", +- copy_op->status, domid); ++ pr_debug("Bad status %d from copy to DOM%d.\n", ++ copy_op->status, domid); + status = NETIF_RSP_ERROR; + } + } +@@ -1215,7 +1215,7 @@ int netbk_get_extras(struct xen_netif *netif, + + do { + if (unlikely(work_to_do-- <= 0)) { +- DPRINTK("Missing extra info\n"); ++ pr_debug("Missing extra info\n"); + return -EBADR; + } + +@@ -1224,7 +1224,7 @@ int netbk_get_extras(struct xen_netif *netif, + if (unlikely(!extra.type || + extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { + netif->tx.req_cons = ++cons; +- DPRINTK("Invalid extra type: %d\n", extra.type); ++ pr_debug("Invalid extra type: %d\n", extra.type); + return -EINVAL; + } + +@@ -1239,13 +1239,13 @@ static int netbk_set_skb_gso(struct sk_buff *skb, + struct xen_netif_extra_info *gso) + { + if (!gso->u.gso.size) { +- DPRINTK("GSO size must not be zero.\n"); ++ pr_debug("GSO size must not be zero.\n"); + return -EINVAL; + } + + /* Currently only TCPv4 S.O. is supported. */ + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { +- DPRINTK("Bad GSO type %d.\n", gso->u.gso.type); ++ pr_debug("Bad GSO type %d.\n", gso->u.gso.type); + return -EINVAL; + } + +@@ -1395,16 +1395,16 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk) + idx += ret; + + if (unlikely(txreq.size < ETH_HLEN)) { +- DPRINTK("Bad packet size: %d\n", txreq.size); ++ pr_debug("Bad packet size: %d\n", txreq.size); + netbk_tx_err(netif, &txreq, idx); + continue; + } + + /* No crossing a page as the payload mustn't fragment. */ + if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { +- DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", +- txreq.offset, txreq.size, +- (txreq.offset&~PAGE_MASK) + txreq.size); ++ pr_debug("txreq.offset: %x, size: %u, end: %lu\n", ++ txreq.offset, txreq.size, ++ (txreq.offset&~PAGE_MASK) + txreq.size); + netbk_tx_err(netif, &txreq, idx); + continue; + } +@@ -1419,7 +1419,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk) + skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(skb == NULL)) { +- DPRINTK("Can't allocate a skb in start_xmit.\n"); ++ pr_debug("Can't allocate a skb in start_xmit.\n"); + netbk_tx_err(netif, &txreq, idx); + break; + } +@@ -1494,7 +1494,7 @@ static void net_tx_submit(struct xen_netbk *netbk) + + /* Check the remap error code. */ + if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) { +- DPRINTK("netback grant failed.\n"); ++ pr_debug("netback grant failed.\n"); + skb_shinfo(skb)->nr_frags = 0; + kfree_skb(skb); + continue; +@@ -1535,12 +1535,12 @@ static void net_tx_submit(struct xen_netbk *netbk) + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb_checksum_setup(skb)) { +- DPRINTK("Can't setup checksum in net_tx_action\n"); ++ pr_debug("skb_checksum_setup failed\n"); + kfree_skb(skb); + continue; + } + } else if (skb_is_gso(skb)) { +- DPRINTK("Dropping GSO but not CHECKSUM_PARTIAL skb\n"); ++ pr_debug("GSO SKB checksum is not partial\n"); + kfree_skb(skb); + continue; + } +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +index e2095fc..640c696 100644 +--- a/drivers/xen/netback/xenbus.c ++++ b/drivers/xen/netback/xenbus.c +@@ -17,9 +17,6 @@ + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +-#include +-#include +-#include + #include "common.h" + + static int connect_rings(struct backend_info *); +@@ -130,7 +127,7 @@ abort_transaction: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, err, "%s", message); + fail: +- DPRINTK("failed"); ++ pr_debug("failed"); + netback_remove(dev); + return err; + } +@@ -147,8 +144,6 @@ static int netback_uevent(struct xenbus_device *xdev, + struct backend_info *be = dev_get_drvdata(&xdev->dev); + char *val; + +- DPRINTK("netback_uevent"); +- + val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL); + if (IS_ERR(val)) { + int err = PTR_ERR(val); +@@ -215,7 +210,7 @@ static void frontend_changed(struct xenbus_device *dev, + { + struct backend_info *be = dev_get_drvdata(&dev->dev); + +- DPRINTK("%s", xenbus_strstate(frontend_state)); ++ pr_debug("frontend state %s", xenbus_strstate(frontend_state)); + + be->frontend_state = frontend_state; + +@@ -295,7 +290,7 @@ static void xen_net_read_rate(struct xenbus_device *dev, + return; + + fail: +- WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n"); ++ pr_warn("Failed to parse network rate limit. Traffic unlimited.\n"); + kfree(ratestr); + } + +@@ -394,8 +389,6 @@ static int connect_rings(struct backend_info *be) + int err; + int val; + +- DPRINTK(""); +- + err = xenbus_gather(XBT_NIL, dev->otherend, + "tx-ring-ref", "%lu", &tx_ring_ref, + "rx-ring-ref", "%lu", &rx_ring_ref, +-- +1.7.3.4 + + +From e9124d120cf83945516c4085b32ea40c1bb94ffb Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Tue, 18 Jan 2011 12:54:12 +0000 +Subject: [PATCH 122/139] xen: netback: move under drivers/net/xen-netback/ + +From the kernel's PoV netback is just another network device driver. + +Signed-off-by: Ian Campbell +--- + drivers/net/Kconfig | 7 + + drivers/net/Makefile | 1 + + drivers/net/xen-netback/Makefile | 3 + + drivers/net/xen-netback/common.h | 275 +++++ + drivers/net/xen-netback/interface.c | 465 +++++++++ + drivers/net/xen-netback/netback.c | 1909 +++++++++++++++++++++++++++++++++++ + drivers/net/xen-netback/xenbus.c | 487 +++++++++ + drivers/xen/Kconfig | 7 - + drivers/xen/Makefile | 1 - + drivers/xen/netback/Makefile | 3 - + drivers/xen/netback/common.h | 275 ----- + drivers/xen/netback/interface.c | 465 --------- + drivers/xen/netback/netback.c | 1909 ----------------------------------- + drivers/xen/netback/xenbus.c | 487 --------- + 14 files changed, 3147 insertions(+), 3147 deletions(-) + create mode 100644 drivers/net/xen-netback/Makefile + create mode 100644 drivers/net/xen-netback/common.h + create mode 100644 drivers/net/xen-netback/interface.c + create mode 100644 drivers/net/xen-netback/netback.c + create mode 100644 drivers/net/xen-netback/xenbus.c + delete mode 100644 drivers/xen/netback/Makefile + delete mode 100644 drivers/xen/netback/common.h + delete mode 100644 drivers/xen/netback/interface.c + delete mode 100644 drivers/xen/netback/netback.c + delete mode 100644 drivers/xen/netback/xenbus.c + +diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig +index cbf0635..5b088f5 100644 +--- a/drivers/net/Kconfig ++++ b/drivers/net/Kconfig +@@ -2970,6 +2970,13 @@ config XEN_NETDEV_FRONTEND + if you are compiling a kernel for a Xen guest, you almost + certainly want to enable this. + ++config XEN_NETDEV_BACKEND ++ tristate "Xen backend network device" ++ depends on XEN_BACKEND ++ help ++ Implement the network backend driver, which passes packets ++ from the guest domain's frontend drivers to the network. ++ + config ISERIES_VETH + tristate "iSeries Virtual Ethernet driver support" + depends on PPC_ISERIES +diff --git a/drivers/net/Makefile b/drivers/net/Makefile +index b90738d..145dfd7 100644 +--- a/drivers/net/Makefile ++++ b/drivers/net/Makefile +@@ -171,6 +171,7 @@ obj-$(CONFIG_SLIP) += slip.o + obj-$(CONFIG_SLHC) += slhc.o + + obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o ++obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/ + + obj-$(CONFIG_DUMMY) += dummy.o + obj-$(CONFIG_IFB) += ifb.o +diff --git a/drivers/net/xen-netback/Makefile b/drivers/net/xen-netback/Makefile +new file mode 100644 +index 0000000..e346e81 +--- /dev/null ++++ b/drivers/net/xen-netback/Makefile +@@ -0,0 +1,3 @@ ++obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o ++ ++xen-netback-y := netback.o xenbus.o interface.o +diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h +new file mode 100644 +index 0000000..079e1de +--- /dev/null ++++ b/drivers/net/xen-netback/common.h +@@ -0,0 +1,275 @@ ++/****************************************************************************** ++ * arch/xen/drivers/netif/backend/common.h ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation; or, when distributed ++ * separately from the Linux kernel or incorporated into other ++ * software packages, subject to the following license: ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this source file (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, copy, modify, ++ * merge, publish, distribute, sublicense, and/or sell copies of the Software, ++ * and to permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#ifndef __NETIF__BACKEND__COMMON_H__ ++#define __NETIF__BACKEND__COMMON_H__ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++struct xen_netif { ++ /* Unique identifier for this interface. */ ++ domid_t domid; ++ int group; ++ unsigned int handle; ++ ++ u8 fe_dev_addr[6]; ++ ++ /* Physical parameters of the comms window. */ ++ grant_handle_t tx_shmem_handle; ++ grant_ref_t tx_shmem_ref; ++ grant_handle_t rx_shmem_handle; ++ grant_ref_t rx_shmem_ref; ++ unsigned int irq; ++ ++ /* The shared rings and indexes. */ ++ struct xen_netif_tx_back_ring tx; ++ struct xen_netif_rx_back_ring rx; ++ struct vm_struct *tx_comms_area; ++ struct vm_struct *rx_comms_area; ++ ++ /* Flags that must not be set in dev->features */ ++ int features_disabled; ++ ++ /* Frontend feature information. */ ++ u8 can_sg:1; ++ u8 gso:1; ++ u8 gso_prefix:1; ++ u8 csum:1; ++ ++ /* Internal feature information. */ ++ u8 can_queue:1; /* can queue packets for receiver? */ ++ ++ /* Allow netif_be_start_xmit() to peek ahead in the rx request ++ * ring. This is a prediction of what rx_req_cons will be once ++ * all queued skbs are put on the ring. */ ++ RING_IDX rx_req_cons_peek; ++ ++ /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */ ++ unsigned long credit_bytes; ++ unsigned long credit_usec; ++ unsigned long remaining_credit; ++ struct timer_list credit_timeout; ++ ++ /* Statistics */ ++ int nr_copied_skbs; ++ ++ /* Miscellaneous private stuff. */ ++ struct list_head list; /* scheduling list */ ++ atomic_t refcnt; ++ struct net_device *dev; ++ struct net_device_stats stats; ++ ++ unsigned int carrier; ++ ++ wait_queue_head_t waiting_to_free; ++}; ++ ++/* ++ * Implement our own carrier flag: the network stack's version causes delays ++ * when the carrier is re-enabled (in particular, dev_activate() may not ++ * immediately be called, which can cause packet loss; also the etherbridge ++ * can be rather lazy in activating its port). ++ */ ++#define netback_carrier_on(netif) ((netif)->carrier = 1) ++#define netback_carrier_off(netif) ((netif)->carrier = 0) ++#define netback_carrier_ok(netif) ((netif)->carrier) ++ ++enum { ++ NETBK_DONT_COPY_SKB, ++ NETBK_DELAYED_COPY_SKB, ++ NETBK_ALWAYS_COPY_SKB, ++}; ++ ++extern int netbk_copy_skb_mode; ++ ++struct backend_info { ++ struct xenbus_device *dev; ++ struct xen_netif *netif; ++ enum xenbus_state frontend_state; ++ struct xenbus_watch hotplug_status_watch; ++ int have_hotplug_status_watch:1; ++}; ++ ++#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE) ++#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE) ++ ++void netif_disconnect(struct xen_netif *netif); ++ ++void netif_set_features(struct xen_netif *netif); ++struct xen_netif *netif_alloc(struct device *parent, domid_t domid, ++ unsigned int handle); ++int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref, ++ unsigned long rx_ring_ref, unsigned int evtchn); ++ ++static inline void netif_get(struct xen_netif *netif) ++{ ++ atomic_inc(&netif->refcnt); ++} ++ ++static inline void netif_put(struct xen_netif *netif) ++{ ++ if (atomic_dec_and_test(&netif->refcnt)) ++ wake_up(&netif->waiting_to_free); ++} ++ ++int netif_xenbus_init(void); ++ ++#define netif_schedulable(netif) \ ++ (netif_running((netif)->dev) && netback_carrier_ok(netif)) ++ ++void netif_schedule_work(struct xen_netif *netif); ++void netif_deschedule_work(struct xen_netif *netif); ++ ++int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev); ++struct net_device_stats *netif_be_get_stats(struct net_device *dev); ++irqreturn_t netif_be_int(int irq, void *dev_id); ++ ++static inline int netbk_can_queue(struct net_device *dev) ++{ ++ struct xen_netif *netif = netdev_priv(dev); ++ return netif->can_queue; ++} ++ ++static inline int netbk_can_sg(struct net_device *dev) ++{ ++ struct xen_netif *netif = netdev_priv(dev); ++ return netif->can_sg; ++} ++ ++struct pending_tx_info { ++ struct xen_netif_tx_request req; ++ struct xen_netif *netif; ++}; ++typedef unsigned int pending_ring_idx_t; ++ ++struct netbk_rx_meta { ++ int id; ++ int size; ++ int gso_size; ++}; ++ ++struct netbk_tx_pending_inuse { ++ struct list_head list; ++ unsigned long alloc_time; ++}; ++ ++#define MAX_PENDING_REQS 256 ++ ++#define MAX_BUFFER_OFFSET PAGE_SIZE ++ ++/* extra field used in struct page */ ++union page_ext { ++ struct { ++#if BITS_PER_LONG < 64 ++#define IDX_WIDTH 8 ++#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH) ++ unsigned int group:GROUP_WIDTH; ++ unsigned int idx:IDX_WIDTH; ++#else ++ unsigned int group, idx; ++#endif ++ } e; ++ void *mapping; ++}; ++ ++struct xen_netbk { ++ union { ++ struct { ++ struct tasklet_struct net_tx_tasklet; ++ struct tasklet_struct net_rx_tasklet; ++ } tasklet; ++ ++ struct { ++ wait_queue_head_t netbk_action_wq; ++ struct task_struct *task; ++ } kthread; ++ }; ++ ++ struct sk_buff_head rx_queue; ++ struct sk_buff_head tx_queue; ++ ++ struct timer_list net_timer; ++ struct timer_list netbk_tx_pending_timer; ++ ++ struct page **mmap_pages; ++ ++ pending_ring_idx_t pending_prod; ++ pending_ring_idx_t pending_cons; ++ pending_ring_idx_t dealloc_prod; ++ pending_ring_idx_t dealloc_cons; ++ ++ struct list_head pending_inuse_head; ++ struct list_head net_schedule_list; ++ ++ /* Protect the net_schedule_list in netif. */ ++ spinlock_t net_schedule_list_lock; ++ ++ atomic_t netfront_count; ++ ++ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; ++ struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS]; ++ struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; ++ struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS]; ++ ++ grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; ++ u16 pending_ring[MAX_PENDING_REQS]; ++ u16 dealloc_ring[MAX_PENDING_REQS]; ++ ++ /* ++ * Each head or fragment can be up to 4096 bytes. Given ++ * MAX_BUFFER_OFFSET of 4096 the worst case is that each ++ * head/fragment uses 2 copy operation. ++ */ ++ struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE]; ++ unsigned char rx_notify[NR_IRQS]; ++ u16 notify_list[NET_RX_RING_SIZE]; ++ struct netbk_rx_meta meta[2*NET_RX_RING_SIZE]; ++}; ++ ++extern struct xen_netbk *xen_netbk; ++extern int xen_netbk_group_nr; ++ ++#endif /* __NETIF__BACKEND__COMMON_H__ */ +diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c +new file mode 100644 +index 0000000..c36db26 +--- /dev/null ++++ b/drivers/net/xen-netback/interface.c +@@ -0,0 +1,465 @@ ++/****************************************************************************** ++ * arch/xen/drivers/netif/backend/interface.c ++ * ++ * Network-device interface management. ++ * ++ * Copyright (c) 2004-2005, Keir Fraser ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation; or, when distributed ++ * separately from the Linux kernel or incorporated into other ++ * software packages, subject to the following license: ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this source file (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, copy, modify, ++ * merge, publish, distribute, sublicense, and/or sell copies of the Software, ++ * and to permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include "common.h" ++ ++#include ++#include ++ ++#include ++#include ++ ++/* ++ * Module parameter 'queue_length': ++ * ++ * Enables queuing in the network stack when a client has run out of receive ++ * descriptors. ++ */ ++static unsigned long netbk_queue_length = 32; ++module_param_named(queue_length, netbk_queue_length, ulong, 0644); ++ ++static void netbk_add_netif(struct xen_netbk *netbk, int group_nr, ++ struct xen_netif *netif) ++{ ++ int i; ++ int min_netfront_count; ++ int min_group = 0; ++ min_netfront_count = atomic_read(&netbk[0].netfront_count); ++ for (i = 0; i < group_nr; i++) { ++ int netfront_count = atomic_read(&netbk[i].netfront_count); ++ if (netfront_count < min_netfront_count) { ++ min_group = i; ++ min_netfront_count = netfront_count; ++ } ++ } ++ ++ netif->group = min_group; ++ atomic_inc(&netbk[netif->group].netfront_count); ++} ++ ++static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif) ++{ ++ atomic_dec(&netbk[netif->group].netfront_count); ++} ++ ++static void __netif_up(struct xen_netif *netif) ++{ ++ netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif); ++ enable_irq(netif->irq); ++ netif_schedule_work(netif); ++} ++ ++static void __netif_down(struct xen_netif *netif) ++{ ++ disable_irq(netif->irq); ++ netif_deschedule_work(netif); ++ netbk_remove_netif(xen_netbk, netif); ++} ++ ++static int net_open(struct net_device *dev) ++{ ++ struct xen_netif *netif = netdev_priv(dev); ++ if (netback_carrier_ok(netif)) { ++ __netif_up(netif); ++ netif_start_queue(dev); ++ } ++ return 0; ++} ++ ++static int net_close(struct net_device *dev) ++{ ++ struct xen_netif *netif = netdev_priv(dev); ++ if (netback_carrier_ok(netif)) ++ __netif_down(netif); ++ netif_stop_queue(dev); ++ return 0; ++} ++ ++static int netbk_change_mtu(struct net_device *dev, int mtu) ++{ ++ int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; ++ ++ if (mtu > max) ++ return -EINVAL; ++ dev->mtu = mtu; ++ return 0; ++} ++ ++void netif_set_features(struct xen_netif *netif) ++{ ++ struct net_device *dev = netif->dev; ++ int features = dev->features; ++ ++ if (netif->can_sg) ++ features |= NETIF_F_SG; ++ if (netif->gso || netif->gso_prefix) ++ features |= NETIF_F_TSO; ++ if (netif->csum) ++ features |= NETIF_F_IP_CSUM; ++ ++ features &= ~(netif->features_disabled); ++ ++ if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) ++ dev->mtu = ETH_DATA_LEN; ++ ++ dev->features = features; ++} ++ ++static int netbk_set_tx_csum(struct net_device *dev, u32 data) ++{ ++ struct xen_netif *netif = netdev_priv(dev); ++ if (data) { ++ if (!netif->csum) ++ return -ENOSYS; ++ netif->features_disabled &= ~NETIF_F_IP_CSUM; ++ } else { ++ netif->features_disabled |= NETIF_F_IP_CSUM; ++ } ++ ++ netif_set_features(netif); ++ return 0; ++} ++ ++static int netbk_set_sg(struct net_device *dev, u32 data) ++{ ++ struct xen_netif *netif = netdev_priv(dev); ++ if (data) { ++ if (!netif->can_sg) ++ return -ENOSYS; ++ netif->features_disabled &= ~NETIF_F_SG; ++ } else { ++ netif->features_disabled |= NETIF_F_SG; ++ } ++ ++ netif_set_features(netif); ++ return 0; ++} ++ ++static int netbk_set_tso(struct net_device *dev, u32 data) ++{ ++ struct xen_netif *netif = netdev_priv(dev); ++ if (data) { ++ if (!netif->gso && !netif->gso_prefix) ++ return -ENOSYS; ++ netif->features_disabled &= ~NETIF_F_TSO; ++ } else { ++ netif->features_disabled |= NETIF_F_TSO; ++ } ++ ++ netif_set_features(netif); ++ return 0; ++} ++ ++static void netbk_get_drvinfo(struct net_device *dev, ++ struct ethtool_drvinfo *info) ++{ ++ strcpy(info->driver, "netbk"); ++ strcpy(info->bus_info, dev_name(dev->dev.parent)); ++} ++ ++static const struct netif_stat { ++ char name[ETH_GSTRING_LEN]; ++ u16 offset; ++} netbk_stats[] = { ++ { "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) }, ++}; ++ ++static int netbk_get_sset_count(struct net_device *dev, int string_set) ++{ ++ switch (string_set) { ++ case ETH_SS_STATS: ++ return ARRAY_SIZE(netbk_stats); ++ default: ++ return -EINVAL; ++ } ++} ++ ++static void netbk_get_ethtool_stats(struct net_device *dev, ++ struct ethtool_stats *stats, u64 * data) ++{ ++ void *netif = netdev_priv(dev); ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(netbk_stats); i++) ++ data[i] = *(int *)(netif + netbk_stats[i].offset); ++} ++ ++static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data) ++{ ++ int i; ++ ++ switch (stringset) { ++ case ETH_SS_STATS: ++ for (i = 0; i < ARRAY_SIZE(netbk_stats); i++) ++ memcpy(data + i * ETH_GSTRING_LEN, ++ netbk_stats[i].name, ETH_GSTRING_LEN); ++ break; ++ } ++} ++ ++static struct ethtool_ops network_ethtool_ops = { ++ .get_drvinfo = netbk_get_drvinfo, ++ ++ .get_tx_csum = ethtool_op_get_tx_csum, ++ .set_tx_csum = netbk_set_tx_csum, ++ .get_sg = ethtool_op_get_sg, ++ .set_sg = netbk_set_sg, ++ .get_tso = ethtool_op_get_tso, ++ .set_tso = netbk_set_tso, ++ .get_link = ethtool_op_get_link, ++ ++ .get_sset_count = netbk_get_sset_count, ++ .get_ethtool_stats = netbk_get_ethtool_stats, ++ .get_strings = netbk_get_strings, ++}; ++ ++static struct net_device_ops netback_ops = { ++ .ndo_start_xmit = netif_be_start_xmit, ++ .ndo_get_stats = netif_be_get_stats, ++ .ndo_open = net_open, ++ .ndo_stop = net_close, ++ .ndo_change_mtu = netbk_change_mtu, ++}; ++ ++struct xen_netif *netif_alloc(struct device *parent, domid_t domid, ++ unsigned int handle) ++{ ++ int err = 0; ++ struct net_device *dev; ++ struct xen_netif *netif; ++ char name[IFNAMSIZ] = {}; ++ ++ snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle); ++ dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup); ++ if (dev == NULL) { ++ pr_debug("Could not allocate netdev\n"); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ SET_NETDEV_DEV(dev, parent); ++ ++ netif = netdev_priv(dev); ++ memset(netif, 0, sizeof(*netif)); ++ netif->domid = domid; ++ netif->group = -1; ++ netif->handle = handle; ++ netif->can_sg = 1; ++ netif->csum = 1; ++ atomic_set(&netif->refcnt, 1); ++ init_waitqueue_head(&netif->waiting_to_free); ++ netif->dev = dev; ++ INIT_LIST_HEAD(&netif->list); ++ ++ netback_carrier_off(netif); ++ ++ netif->credit_bytes = netif->remaining_credit = ~0UL; ++ netif->credit_usec = 0UL; ++ init_timer(&netif->credit_timeout); ++ /* Initialize 'expires' now: it's used to track the credit window. */ ++ netif->credit_timeout.expires = jiffies; ++ ++ dev->netdev_ops = &netback_ops; ++ netif_set_features(netif); ++ SET_ETHTOOL_OPS(dev, &network_ethtool_ops); ++ ++ dev->tx_queue_len = netbk_queue_length; ++ ++ /* ++ * Initialise a dummy MAC address. We choose the numerically ++ * largest non-broadcast address to prevent the address getting ++ * stolen by an Ethernet bridge for STP purposes. ++ * (FE:FF:FF:FF:FF:FF) ++ */ ++ memset(dev->dev_addr, 0xFF, ETH_ALEN); ++ dev->dev_addr[0] &= ~0x01; ++ ++ rtnl_lock(); ++ err = register_netdevice(dev); ++ rtnl_unlock(); ++ if (err) { ++ pr_debug("Could not register new net device %s: err=%d\n", ++ dev->name, err); ++ free_netdev(dev); ++ return ERR_PTR(err); ++ } ++ ++ pr_debug("Successfully created netif\n"); ++ return netif; ++} ++ ++static int map_frontend_pages(struct xen_netif *netif, ++ grant_ref_t tx_ring_ref, ++ grant_ref_t rx_ring_ref) ++{ ++ struct gnttab_map_grant_ref op; ++ ++ gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr, ++ GNTMAP_host_map, tx_ring_ref, netif->domid); ++ ++ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) ++ BUG(); ++ ++ if (op.status) { ++ pr_debug("Gnttab failure mapping tx_ring_ref!\n"); ++ return op.status; ++ } ++ ++ netif->tx_shmem_ref = tx_ring_ref; ++ netif->tx_shmem_handle = op.handle; ++ ++ gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr, ++ GNTMAP_host_map, rx_ring_ref, netif->domid); ++ ++ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) ++ BUG(); ++ ++ if (op.status) { ++ struct gnttab_unmap_grant_ref unop; ++ ++ gnttab_set_unmap_op(&unop, ++ (unsigned long)netif->tx_comms_area->addr, ++ GNTMAP_host_map, netif->tx_shmem_handle); ++ HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1); ++ pr_debug("Gnttab failure mapping rx_ring_ref!\n"); ++ return op.status; ++ } ++ ++ netif->rx_shmem_ref = rx_ring_ref; ++ netif->rx_shmem_handle = op.handle; ++ ++ return 0; ++} ++ ++static void unmap_frontend_pages(struct xen_netif *netif) ++{ ++ struct gnttab_unmap_grant_ref op; ++ ++ gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr, ++ GNTMAP_host_map, netif->tx_shmem_handle); ++ ++ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) ++ BUG(); ++ ++ gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr, ++ GNTMAP_host_map, netif->rx_shmem_handle); ++ ++ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) ++ BUG(); ++} ++ ++int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref, ++ unsigned long rx_ring_ref, unsigned int evtchn) ++{ ++ int err = -ENOMEM; ++ struct xen_netif_tx_sring *txs; ++ struct xen_netif_rx_sring *rxs; ++ ++ /* Already connected through? */ ++ if (netif->irq) ++ return 0; ++ ++ netif->tx_comms_area = alloc_vm_area(PAGE_SIZE); ++ if (netif->tx_comms_area == NULL) ++ return -ENOMEM; ++ netif->rx_comms_area = alloc_vm_area(PAGE_SIZE); ++ if (netif->rx_comms_area == NULL) ++ goto err_rx; ++ ++ err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref); ++ if (err) ++ goto err_map; ++ ++ err = bind_interdomain_evtchn_to_irqhandler( ++ netif->domid, evtchn, netif_be_int, 0, ++ netif->dev->name, netif); ++ if (err < 0) ++ goto err_hypervisor; ++ netif->irq = err; ++ disable_irq(netif->irq); ++ ++ txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr; ++ BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE); ++ ++ rxs = (struct xen_netif_rx_sring *) ++ ((char *)netif->rx_comms_area->addr); ++ BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE); ++ ++ netif->rx_req_cons_peek = 0; ++ ++ netif_get(netif); ++ ++ rtnl_lock(); ++ netback_carrier_on(netif); ++ if (netif_running(netif->dev)) ++ __netif_up(netif); ++ rtnl_unlock(); ++ ++ return 0; ++err_hypervisor: ++ unmap_frontend_pages(netif); ++err_map: ++ free_vm_area(netif->rx_comms_area); ++err_rx: ++ free_vm_area(netif->tx_comms_area); ++ return err; ++} ++ ++void netif_disconnect(struct xen_netif *netif) ++{ ++ if (netback_carrier_ok(netif)) { ++ rtnl_lock(); ++ netback_carrier_off(netif); ++ netif_carrier_off(netif->dev); /* discard queued packets */ ++ if (netif_running(netif->dev)) ++ __netif_down(netif); ++ rtnl_unlock(); ++ netif_put(netif); ++ } ++ ++ atomic_dec(&netif->refcnt); ++ wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0); ++ ++ del_timer_sync(&netif->credit_timeout); ++ ++ if (netif->irq) ++ unbind_from_irqhandler(netif->irq, netif); ++ ++ unregister_netdev(netif->dev); ++ ++ if (netif->tx.sring) { ++ unmap_frontend_pages(netif); ++ free_vm_area(netif->tx_comms_area); ++ free_vm_area(netif->rx_comms_area); ++ } ++ ++ free_netdev(netif->dev); ++} +diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c +new file mode 100644 +index 0000000..e0ca232 +--- /dev/null ++++ b/drivers/net/xen-netback/netback.c +@@ -0,0 +1,1909 @@ ++/* ++ * Back-end of the driver for virtual network devices. This portion of the ++ * driver exports a 'unified' network-device interface that can be accessed ++ * by any operating system that implements a compatible front end. A ++ * reference front-end implementation can be found in: ++ * drivers/net/xen-netfront.c ++ * ++ * Copyright (c) 2002-2005, K A Fraser ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation; or, when distributed ++ * separately from the Linux kernel or incorporated into other ++ * software packages, subject to the following license: ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this source file (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, copy, modify, ++ * merge, publish, distribute, sublicense, and/or sell copies of the Software, ++ * and to permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include "common.h" ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++ ++#include ++#include ++ ++/*define NETBE_DEBUG_INTERRUPT*/ ++ ++struct xen_netbk *xen_netbk; ++int xen_netbk_group_nr; ++ ++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx); ++static void make_tx_response(struct xen_netif *netif, ++ struct xen_netif_tx_request *txp, ++ s8 st); ++static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif, ++ u16 id, ++ s8 st, ++ u16 offset, ++ u16 size, ++ u16 flags); ++ ++static void net_tx_action(unsigned long data); ++ ++static void net_rx_action(unsigned long data); ++ ++static inline unsigned long idx_to_pfn(struct xen_netbk *netbk, ++ unsigned int idx) ++{ ++ return page_to_pfn(netbk->mmap_pages[idx]); ++} ++ ++static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk, ++ unsigned int idx) ++{ ++ return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx)); ++} ++ ++/* extra field used in struct page */ ++static inline void netif_set_page_ext(struct page *pg, ++ unsigned int group, unsigned int idx) ++{ ++ union page_ext ext = { .e = { .group = group + 1, .idx = idx } }; ++ ++ BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping)); ++ pg->mapping = ext.mapping; ++} ++ ++static int netif_get_page_ext(struct page *pg, ++ unsigned int *_group, unsigned int *_idx) ++{ ++ union page_ext ext = { .mapping = pg->mapping }; ++ struct xen_netbk *netbk; ++ unsigned int group, idx; ++ ++ if (!PageForeign(pg)) ++ return 0; ++ ++ group = ext.e.group - 1; ++ ++ if (group < 0 || group >= xen_netbk_group_nr) ++ return 0; ++ ++ netbk = &xen_netbk[group]; ++ ++ if (netbk->mmap_pages == NULL) ++ return 0; ++ ++ idx = ext.e.idx; ++ ++ if ((idx < 0) || (idx >= MAX_PENDING_REQS)) ++ return 0; ++ ++ if (netbk->mmap_pages[idx] != pg) ++ return 0; ++ ++ *_group = group; ++ *_idx = idx; ++ ++ return 1; ++} ++ ++/* ++ * This is the amount of packet we copy rather than map, so that the ++ * guest can't fiddle with the contents of the headers while we do ++ * packet processing on them (netfilter, routing, etc). ++ */ ++#define PKT_PROT_LEN (ETH_HLEN + \ ++ VLAN_HLEN + \ ++ sizeof(struct iphdr) + MAX_IPOPTLEN + \ ++ sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) ++ ++static inline pending_ring_idx_t pending_index(unsigned i) ++{ ++ return i & (MAX_PENDING_REQS-1); ++} ++ ++static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk) ++{ ++ return MAX_PENDING_REQS - ++ netbk->pending_prod + netbk->pending_cons; ++} ++ ++/* Setting this allows the safe use of this driver without netloop. */ ++static int MODPARM_copy_skb = 1; ++module_param_named(copy_skb, MODPARM_copy_skb, bool, 0); ++MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop"); ++ ++int netbk_copy_skb_mode; ++ ++static int MODPARM_netback_kthread; ++module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0); ++MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet"); ++ ++/* ++ * Netback bottom half handler. ++ * dir indicates the data direction. ++ * rx: 1, tx: 0. ++ */ ++static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir) ++{ ++ if (MODPARM_netback_kthread) ++ wake_up(&netbk->kthread.netbk_action_wq); ++ else if (dir) ++ tasklet_schedule(&netbk->tasklet.net_rx_tasklet); ++ else ++ tasklet_schedule(&netbk->tasklet.net_tx_tasklet); ++} ++ ++static inline void maybe_schedule_tx_action(struct xen_netbk *netbk) ++{ ++ smp_mb(); ++ if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) && ++ !list_empty(&netbk->net_schedule_list)) ++ xen_netbk_bh_handler(netbk, 0); ++} ++ ++static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) ++{ ++ struct skb_shared_info *ninfo; ++ struct sk_buff *nskb; ++ unsigned long offset; ++ int ret; ++ int len; ++ int headlen; ++ ++ BUG_ON(skb_shinfo(skb)->frag_list != NULL); ++ ++ nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN); ++ if (unlikely(!nskb)) ++ goto err; ++ ++ skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN); ++ headlen = skb_end_pointer(nskb) - nskb->data; ++ if (headlen > skb_headlen(skb)) ++ headlen = skb_headlen(skb); ++ ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); ++ BUG_ON(ret); ++ ++ ninfo = skb_shinfo(nskb); ++ ninfo->gso_size = skb_shinfo(skb)->gso_size; ++ ninfo->gso_type = skb_shinfo(skb)->gso_type; ++ ++ offset = headlen; ++ len = skb->len - headlen; ++ ++ nskb->len = skb->len; ++ nskb->data_len = len; ++ nskb->truesize += len; ++ ++ while (len) { ++ struct page *page; ++ int copy; ++ int zero; ++ ++ if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) { ++ dump_stack(); ++ goto err_free; ++ } ++ ++ copy = len >= PAGE_SIZE ? PAGE_SIZE : len; ++ zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO; ++ ++ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero); ++ if (unlikely(!page)) ++ goto err_free; ++ ++ ret = skb_copy_bits(skb, offset, page_address(page), copy); ++ BUG_ON(ret); ++ ++ ninfo->frags[ninfo->nr_frags].page = page; ++ ninfo->frags[ninfo->nr_frags].page_offset = 0; ++ ninfo->frags[ninfo->nr_frags].size = copy; ++ ninfo->nr_frags++; ++ ++ offset += copy; ++ len -= copy; ++ } ++ ++#ifdef NET_SKBUFF_DATA_USES_OFFSET ++ offset = 0; ++#else ++ offset = nskb->data - skb->data; ++#endif ++ ++ nskb->transport_header = skb->transport_header + offset; ++ nskb->network_header = skb->network_header + offset; ++ nskb->mac_header = skb->mac_header + offset; ++ ++ return nskb; ++ ++ err_free: ++ kfree_skb(nskb); ++ err: ++ return NULL; ++} ++ ++static inline int netbk_max_required_rx_slots(struct xen_netif *netif) ++{ ++ if (netif->can_sg || netif->gso || netif->gso_prefix) ++ return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */ ++ return 1; /* all in one */ ++} ++ ++static inline int netbk_queue_full(struct xen_netif *netif) ++{ ++ RING_IDX peek = netif->rx_req_cons_peek; ++ RING_IDX needed = netbk_max_required_rx_slots(netif); ++ ++ return ((netif->rx.sring->req_prod - peek) < needed) || ++ ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed); ++} ++ ++/* ++ * Returns true if we should start a new receive buffer instead of ++ * adding 'size' bytes to a buffer which currently contains 'offset' ++ * bytes. ++ */ ++static bool start_new_rx_buffer(int offset, unsigned long size, int head) ++{ ++ /* simple case: we have completely filled the current buffer. */ ++ if (offset == MAX_BUFFER_OFFSET) ++ return true; ++ ++ /* ++ * complex case: start a fresh buffer if the current frag ++ * would overflow the current buffer but only if: ++ * (i) this frag would fit completely in the next buffer ++ * and (ii) there is already some data in the current buffer ++ * and (iii) this is not the head buffer. ++ * ++ * Where: ++ * - (i) stops us splitting a frag into two copies ++ * unless the frag is too large for a single buffer. ++ * - (ii) stops us from leaving a buffer pointlessly empty. ++ * - (iii) stops us leaving the first buffer ++ * empty. Strictly speaking this is already covered ++ * by (ii) but is explicitly checked because ++ * netfront relies on the first buffer being ++ * non-empty and can crash otherwise. ++ * ++ * This means we will effectively linearise small ++ * frags but do not needlessly split large buffers ++ * into multiple copies tend to give large frags their ++ * own buffers as before. ++ */ ++ if ((offset + size > MAX_BUFFER_OFFSET) && ++ (size <= MAX_BUFFER_OFFSET) && offset && !head) ++ return true; ++ ++ return false; ++} ++ ++/* ++ * Figure out how many ring slots we're going to need to send @skb to ++ * the guest. This function is essentially a dry run of ++ * netbk_gop_frag_copy. ++ */ ++static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif) ++{ ++ unsigned int count = 1; ++ int i, copy_off = 0; ++ ++ BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET); ++ ++ copy_off = skb_headlen(skb); ++ ++ if (skb_shinfo(skb)->gso_size) ++ count++; ++ ++ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { ++ unsigned long size = skb_shinfo(skb)->frags[i].size; ++ unsigned long bytes; ++ while (size > 0) { ++ BUG_ON(copy_off > MAX_BUFFER_OFFSET); ++ ++ if (start_new_rx_buffer(copy_off, size, 0)) { ++ count++; ++ copy_off = 0; ++ } ++ ++ bytes = size; ++ if (copy_off + bytes > MAX_BUFFER_OFFSET) ++ bytes = MAX_BUFFER_OFFSET - copy_off; ++ ++ copy_off += bytes; ++ size -= bytes; ++ } ++ } ++ return count; ++} ++ ++int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) ++{ ++ struct xen_netif *netif = netdev_priv(dev); ++ struct xen_netbk *netbk; ++ ++ BUG_ON(skb->dev != dev); ++ ++ if (netif->group == -1) ++ goto drop; ++ ++ netbk = &xen_netbk[netif->group]; ++ ++ /* Drop the packet if the target domain has no receive buffers. */ ++ if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif))) ++ goto drop; ++ ++ /* ++ * XXX For now we also copy skbuffs whose head crosses a page ++ * boundary, because netbk_gop_skb can't handle them. ++ */ ++ if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) { ++ struct sk_buff *nskb = netbk_copy_skb(skb); ++ if (unlikely(nskb == NULL)) ++ goto drop; ++ /* Copy only the header fields we use in this driver. */ ++ nskb->dev = skb->dev; ++ nskb->ip_summed = skb->ip_summed; ++ dev_kfree_skb(skb); ++ skb = nskb; ++ } ++ ++ /* Reserve ring slots for the worst-case number of fragments. */ ++ netif->rx_req_cons_peek += count_skb_slots(skb, netif); ++ netif_get(netif); ++ ++ if (netbk_can_queue(dev) && netbk_queue_full(netif)) { ++ netif->rx.sring->req_event = netif->rx_req_cons_peek + ++ netbk_max_required_rx_slots(netif); ++ mb(); /* request notification /then/ check & stop the queue */ ++ if (netbk_queue_full(netif)) ++ netif_stop_queue(dev); ++ } ++ skb_queue_tail(&netbk->rx_queue, skb); ++ ++ xen_netbk_bh_handler(netbk, 1); ++ ++ return 0; ++ ++ drop: ++ netif->stats.tx_dropped++; ++ dev_kfree_skb(skb); ++ return 0; ++} ++ ++struct netrx_pending_operations { ++ unsigned copy_prod, copy_cons; ++ unsigned meta_prod, meta_cons; ++ struct gnttab_copy *copy; ++ struct netbk_rx_meta *meta; ++ int copy_off; ++ grant_ref_t copy_gref; ++}; ++ ++static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif, ++ struct netrx_pending_operations *npo) ++{ ++ struct netbk_rx_meta *meta; ++ struct xen_netif_rx_request *req; ++ ++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); ++ ++ meta = npo->meta + npo->meta_prod++; ++ meta->gso_size = 0; ++ meta->size = 0; ++ meta->id = req->id; ++ ++ npo->copy_off = 0; ++ npo->copy_gref = req->gref; ++ ++ return meta; ++} ++ ++/* ++ * Set up the grant operations for this fragment. If it's a flipping ++ * interface, we also set up the unmap request from here. ++ */ ++static void netbk_gop_frag_copy(struct xen_netif *netif, ++ struct netrx_pending_operations *npo, ++ struct page *page, unsigned long size, ++ unsigned long offset, int head) ++{ ++ struct gnttab_copy *copy_gop; ++ struct netbk_rx_meta *meta; ++ /* ++ * These variables a used iff netif_get_page_ext returns true, ++ * in which case they are guaranteed to be initialized. ++ */ ++ unsigned int uninitialized_var(group), uninitialized_var(idx); ++ int foreign = netif_get_page_ext(page, &group, &idx); ++ unsigned long bytes; ++ ++ /* Data must not cross a page boundary. */ ++ BUG_ON(size + offset > PAGE_SIZE); ++ ++ meta = npo->meta + npo->meta_prod - 1; ++ ++ while (size > 0) { ++ BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); ++ ++ if (start_new_rx_buffer(npo->copy_off, size, head)) { ++ /* ++ * Netfront requires there to be some data in ++ * the head buffer. ++ */ ++ BUG_ON(head); ++ ++ meta = get_next_rx_buffer(netif, npo); ++ } ++ ++ bytes = size; ++ if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) ++ bytes = MAX_BUFFER_OFFSET - npo->copy_off; ++ ++ copy_gop = npo->copy + npo->copy_prod++; ++ copy_gop->flags = GNTCOPY_dest_gref; ++ if (foreign) { ++ struct xen_netbk *netbk = &xen_netbk[group]; ++ struct pending_tx_info *src_pend; ++ ++ src_pend = &netbk->pending_tx_info[idx]; ++ ++ copy_gop->source.domid = src_pend->netif->domid; ++ copy_gop->source.u.ref = src_pend->req.gref; ++ copy_gop->flags |= GNTCOPY_source_gref; ++ } else { ++ void *vaddr = page_address(page); ++ copy_gop->source.domid = DOMID_SELF; ++ copy_gop->source.u.gmfn = virt_to_mfn(vaddr); ++ } ++ copy_gop->source.offset = offset; ++ copy_gop->dest.domid = netif->domid; ++ ++ copy_gop->dest.offset = npo->copy_off; ++ copy_gop->dest.u.ref = npo->copy_gref; ++ copy_gop->len = bytes; ++ ++ npo->copy_off += bytes; ++ meta->size += bytes; ++ ++ offset += bytes; ++ size -= bytes; ++ head = 0; /* There must be something in this buffer now. */ ++ } ++} ++ ++/* ++ * Prepare an SKB to be transmitted to the frontend. ++ * ++ * This function is responsible for allocating grant operations, meta ++ * structures, etc. ++ * ++ * It returns the number of meta structures consumed. The number of ++ * ring slots used is always equal to the number of meta slots used ++ * plus the number of GSO descriptors used. Currently, we use either ++ * zero GSO descriptors (for non-GSO packets) or one descriptor (for ++ * frontend-side LRO). ++ */ ++static int netbk_gop_skb(struct sk_buff *skb, ++ struct netrx_pending_operations *npo) ++{ ++ struct xen_netif *netif = netdev_priv(skb->dev); ++ int nr_frags = skb_shinfo(skb)->nr_frags; ++ int i; ++ struct xen_netif_rx_request *req; ++ struct netbk_rx_meta *meta; ++ int old_meta_prod; ++ ++ old_meta_prod = npo->meta_prod; ++ ++ /* Set up a GSO prefix descriptor, if necessary */ ++ if (skb_shinfo(skb)->gso_size && netif->gso_prefix) { ++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); ++ meta = npo->meta + npo->meta_prod++; ++ meta->gso_size = skb_shinfo(skb)->gso_size; ++ meta->size = 0; ++ meta->id = req->id; ++ } ++ ++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); ++ meta = npo->meta + npo->meta_prod++; ++ ++ if (!netif->gso_prefix) ++ meta->gso_size = skb_shinfo(skb)->gso_size; ++ else ++ meta->gso_size = 0; ++ ++ meta->size = 0; ++ meta->id = req->id; ++ npo->copy_off = 0; ++ npo->copy_gref = req->gref; ++ ++ netbk_gop_frag_copy(netif, ++ npo, virt_to_page(skb->data), ++ skb_headlen(skb), ++ offset_in_page(skb->data), 1); ++ ++ /* Leave a gap for the GSO descriptor. */ ++ if (skb_shinfo(skb)->gso_size && !netif->gso_prefix) ++ netif->rx.req_cons++; ++ ++ for (i = 0; i < nr_frags; i++) { ++ netbk_gop_frag_copy(netif, npo, ++ skb_shinfo(skb)->frags[i].page, ++ skb_shinfo(skb)->frags[i].size, ++ skb_shinfo(skb)->frags[i].page_offset, ++ 0); ++ } ++ ++ return npo->meta_prod - old_meta_prod; ++} ++ ++/* ++ * This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was ++ * used to set up the operations on the top of ++ * netrx_pending_operations, which have since been done. Check that ++ * they didn't give any errors and advance over them. ++ */ ++static int netbk_check_gop(int nr_meta_slots, domid_t domid, ++ struct netrx_pending_operations *npo) ++{ ++ struct gnttab_copy *copy_op; ++ int status = NETIF_RSP_OKAY; ++ int i; ++ ++ for (i = 0; i < nr_meta_slots; i++) { ++ copy_op = npo->copy + npo->copy_cons++; ++ if (copy_op->status != GNTST_okay) { ++ pr_debug("Bad status %d from copy to DOM%d.\n", ++ copy_op->status, domid); ++ status = NETIF_RSP_ERROR; ++ } ++ } ++ ++ return status; ++} ++ ++static void netbk_add_frag_responses(struct xen_netif *netif, int status, ++ struct netbk_rx_meta *meta, ++ int nr_meta_slots) ++{ ++ int i; ++ unsigned long offset; ++ ++ /* No fragments used */ ++ if (nr_meta_slots <= 1) ++ return; ++ ++ nr_meta_slots--; ++ ++ for (i = 0; i < nr_meta_slots; i++) { ++ int flags; ++ if (i == nr_meta_slots - 1) ++ flags = 0; ++ else ++ flags = NETRXF_more_data; ++ ++ offset = 0; ++ make_rx_response(netif, meta[i].id, status, offset, ++ meta[i].size, flags); ++ } ++} ++ ++struct skb_cb_overlay { ++ int meta_slots_used; ++}; ++ ++static void net_rx_action(unsigned long data) ++{ ++ struct xen_netif *netif = NULL; ++ struct xen_netbk *netbk = (struct xen_netbk *)data; ++ s8 status; ++ u16 irq, flags; ++ struct xen_netif_rx_response *resp; ++ struct sk_buff_head rxq; ++ struct sk_buff *skb; ++ int notify_nr = 0; ++ int ret; ++ int nr_frags; ++ int count; ++ unsigned long offset; ++ struct skb_cb_overlay *sco; ++ ++ struct netrx_pending_operations npo = { ++ .copy = netbk->grant_copy_op, ++ .meta = netbk->meta, ++ }; ++ ++ skb_queue_head_init(&rxq); ++ ++ count = 0; ++ ++ while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) { ++ netif = netdev_priv(skb->dev); ++ nr_frags = skb_shinfo(skb)->nr_frags; ++ ++ sco = (struct skb_cb_overlay *)skb->cb; ++ sco->meta_slots_used = netbk_gop_skb(skb, &npo); ++ ++ count += nr_frags + 1; ++ ++ __skb_queue_tail(&rxq, skb); ++ ++ /* Filled the batch queue? */ ++ if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE) ++ break; ++ } ++ ++ BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta)); ++ ++ if (!npo.copy_prod) ++ return; ++ ++ BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op)); ++ ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op, ++ npo.copy_prod); ++ BUG_ON(ret != 0); ++ ++ while ((skb = __skb_dequeue(&rxq)) != NULL) { ++ sco = (struct skb_cb_overlay *)skb->cb; ++ ++ netif = netdev_priv(skb->dev); ++ ++ if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) { ++ resp = RING_GET_RESPONSE(&netif->rx, ++ netif->rx.rsp_prod_pvt++); ++ ++ resp->flags = NETRXF_gso_prefix | NETRXF_more_data; ++ ++ resp->offset = netbk->meta[npo.meta_cons].gso_size; ++ resp->id = netbk->meta[npo.meta_cons].id; ++ resp->status = sco->meta_slots_used; ++ ++ npo.meta_cons++; ++ sco->meta_slots_used--; ++ } ++ ++ ++ netif->stats.tx_bytes += skb->len; ++ netif->stats.tx_packets++; ++ ++ status = netbk_check_gop(sco->meta_slots_used, ++ netif->domid, &npo); ++ ++ if (sco->meta_slots_used == 1) ++ flags = 0; ++ else ++ flags = NETRXF_more_data; ++ ++ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ ++ flags |= NETRXF_csum_blank | NETRXF_data_validated; ++ else if (skb->ip_summed == CHECKSUM_UNNECESSARY) ++ /* remote but checksummed. */ ++ flags |= NETRXF_data_validated; ++ ++ offset = 0; ++ resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id, ++ status, offset, ++ netbk->meta[npo.meta_cons].size, ++ flags); ++ ++ if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) { ++ struct xen_netif_extra_info *gso = ++ (struct xen_netif_extra_info *) ++ RING_GET_RESPONSE(&netif->rx, ++ netif->rx.rsp_prod_pvt++); ++ ++ resp->flags |= NETRXF_extra_info; ++ ++ gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size; ++ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; ++ gso->u.gso.pad = 0; ++ gso->u.gso.features = 0; ++ ++ gso->type = XEN_NETIF_EXTRA_TYPE_GSO; ++ gso->flags = 0; ++ } ++ ++ netbk_add_frag_responses(netif, status, ++ netbk->meta + npo.meta_cons + 1, ++ sco->meta_slots_used); ++ ++ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret); ++ irq = netif->irq; ++ if (ret && !netbk->rx_notify[irq]) { ++ netbk->rx_notify[irq] = 1; ++ netbk->notify_list[notify_nr++] = irq; ++ } ++ ++ if (netif_queue_stopped(netif->dev) && ++ netif_schedulable(netif) && ++ !netbk_queue_full(netif)) ++ netif_wake_queue(netif->dev); ++ ++ netif_put(netif); ++ npo.meta_cons += sco->meta_slots_used; ++ dev_kfree_skb(skb); ++ } ++ ++ while (notify_nr != 0) { ++ irq = netbk->notify_list[--notify_nr]; ++ netbk->rx_notify[irq] = 0; ++ notify_remote_via_irq(irq); ++ } ++ ++ /* More work to do? */ ++ if (!skb_queue_empty(&netbk->rx_queue) && ++ !timer_pending(&netbk->net_timer)) ++ xen_netbk_bh_handler(netbk, 1); ++} ++ ++static void net_alarm(unsigned long data) ++{ ++ struct xen_netbk *netbk = (struct xen_netbk *)data; ++ xen_netbk_bh_handler(netbk, 1); ++} ++ ++static void netbk_tx_pending_timeout(unsigned long data) ++{ ++ struct xen_netbk *netbk = (struct xen_netbk *)data; ++ xen_netbk_bh_handler(netbk, 0); ++} ++ ++struct net_device_stats *netif_be_get_stats(struct net_device *dev) ++{ ++ struct xen_netif *netif = netdev_priv(dev); ++ return &netif->stats; ++} ++ ++static int __on_net_schedule_list(struct xen_netif *netif) ++{ ++ return !list_empty(&netif->list); ++} ++ ++/* Must be called with net_schedule_list_lock held */ ++static void remove_from_net_schedule_list(struct xen_netif *netif) ++{ ++ if (likely(__on_net_schedule_list(netif))) { ++ list_del_init(&netif->list); ++ netif_put(netif); ++ } ++} ++ ++static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk) ++{ ++ struct xen_netif *netif = NULL; ++ ++ spin_lock_irq(&netbk->net_schedule_list_lock); ++ if (list_empty(&netbk->net_schedule_list)) ++ goto out; ++ ++ netif = list_first_entry(&netbk->net_schedule_list, ++ struct xen_netif, list); ++ if (!netif) ++ goto out; ++ ++ netif_get(netif); ++ ++ remove_from_net_schedule_list(netif); ++out: ++ spin_unlock_irq(&netbk->net_schedule_list_lock); ++ return netif; ++} ++ ++static void add_to_net_schedule_list_tail(struct xen_netif *netif) ++{ ++ unsigned long flags; ++ ++ struct xen_netbk *netbk = &xen_netbk[netif->group]; ++ if (__on_net_schedule_list(netif)) ++ return; ++ ++ spin_lock_irqsave(&netbk->net_schedule_list_lock, flags); ++ if (!__on_net_schedule_list(netif) && ++ likely(netif_schedulable(netif))) { ++ list_add_tail(&netif->list, &netbk->net_schedule_list); ++ netif_get(netif); ++ } ++ spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags); ++} ++ ++void netif_schedule_work(struct xen_netif *netif) ++{ ++ struct xen_netbk *netbk = &xen_netbk[netif->group]; ++ int more_to_do; ++ ++ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); ++ ++ if (more_to_do) { ++ add_to_net_schedule_list_tail(netif); ++ maybe_schedule_tx_action(netbk); ++ } ++} ++ ++void netif_deschedule_work(struct xen_netif *netif) ++{ ++ struct xen_netbk *netbk = &xen_netbk[netif->group]; ++ spin_lock_irq(&netbk->net_schedule_list_lock); ++ remove_from_net_schedule_list(netif); ++ spin_unlock_irq(&netbk->net_schedule_list_lock); ++} ++ ++ ++static void tx_add_credit(struct xen_netif *netif) ++{ ++ unsigned long max_burst, max_credit; ++ ++ /* ++ * Allow a burst big enough to transmit a jumbo packet of up to 128kB. ++ * Otherwise the interface can seize up due to insufficient credit. ++ */ ++ max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size; ++ max_burst = min(max_burst, 131072UL); ++ max_burst = max(max_burst, netif->credit_bytes); ++ ++ /* Take care that adding a new chunk of credit doesn't wrap to zero. */ ++ max_credit = netif->remaining_credit + netif->credit_bytes; ++ if (max_credit < netif->remaining_credit) ++ max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ ++ ++ netif->remaining_credit = min(max_credit, max_burst); ++} ++ ++static void tx_credit_callback(unsigned long data) ++{ ++ struct xen_netif *netif = (struct xen_netif *)data; ++ tx_add_credit(netif); ++ netif_schedule_work(netif); ++} ++ ++static inline int copy_pending_req(struct xen_netbk *netbk, ++ pending_ring_idx_t pending_idx) ++{ ++ return gnttab_copy_grant_page( ++ netbk->grant_tx_handle[pending_idx], ++ &netbk->mmap_pages[pending_idx]); ++} ++ ++static inline void net_tx_action_dealloc(struct xen_netbk *netbk) ++{ ++ struct netbk_tx_pending_inuse *inuse, *n; ++ struct gnttab_unmap_grant_ref *gop; ++ u16 pending_idx; ++ pending_ring_idx_t dc, dp; ++ struct xen_netif *netif; ++ int ret; ++ LIST_HEAD(list); ++ ++ dc = netbk->dealloc_cons; ++ gop = netbk->tx_unmap_ops; ++ ++ /* Free up any grants we have finished using. */ ++ do { ++ dp = netbk->dealloc_prod; ++ ++ /* Ensure we see all indices enqueued by netif_idx_release(). */ ++ smp_rmb(); ++ ++ while (dc != dp) { ++ unsigned long pfn; ++ struct netbk_tx_pending_inuse *pending_inuse = ++ netbk->pending_inuse; ++ ++ pending_idx = netbk->dealloc_ring[pending_index(dc++)]; ++ list_move_tail(&pending_inuse[pending_idx].list, &list); ++ ++ pfn = idx_to_pfn(netbk, pending_idx); ++ /* Already unmapped? */ ++ if (!phys_to_machine_mapping_valid(pfn)) ++ continue; ++ ++ gnttab_set_unmap_op(gop, ++ idx_to_kaddr(netbk, pending_idx), ++ GNTMAP_host_map, ++ netbk->grant_tx_handle[pending_idx]); ++ gop++; ++ } ++ ++ } while (dp != netbk->dealloc_prod); ++ ++ netbk->dealloc_cons = dc; ++ ++ ret = HYPERVISOR_grant_table_op( ++ GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops, ++ gop - netbk->tx_unmap_ops); ++ BUG_ON(ret); ++ ++ /* ++ * Copy any entries that have been pending for too long ++ */ ++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && ++ !list_empty(&netbk->pending_inuse_head)) { ++ list_for_each_entry_safe(inuse, n, ++ &netbk->pending_inuse_head, list) { ++ struct pending_tx_info *pending_tx_info; ++ pending_tx_info = netbk->pending_tx_info; ++ ++ if (time_after(inuse->alloc_time + HZ / 2, jiffies)) ++ break; ++ ++ pending_idx = inuse - netbk->pending_inuse; ++ ++ pending_tx_info[pending_idx].netif->nr_copied_skbs++; ++ ++ switch (copy_pending_req(netbk, pending_idx)) { ++ case 0: ++ list_move_tail(&inuse->list, &list); ++ continue; ++ case -EBUSY: ++ list_del_init(&inuse->list); ++ continue; ++ case -ENOENT: ++ continue; ++ } ++ ++ break; ++ } ++ } ++ ++ list_for_each_entry_safe(inuse, n, &list, list) { ++ struct pending_tx_info *pending_tx_info; ++ pending_ring_idx_t index; ++ ++ pending_tx_info = netbk->pending_tx_info; ++ pending_idx = inuse - netbk->pending_inuse; ++ ++ netif = pending_tx_info[pending_idx].netif; ++ ++ make_tx_response(netif, &pending_tx_info[pending_idx].req, ++ NETIF_RSP_OKAY); ++ ++ /* Ready for next use. */ ++ gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]); ++ ++ index = pending_index(netbk->pending_prod++); ++ netbk->pending_ring[index] = pending_idx; ++ ++ netif_put(netif); ++ ++ list_del_init(&inuse->list); ++ } ++} ++ ++static void netbk_tx_err(struct xen_netif *netif, ++ struct xen_netif_tx_request *txp, RING_IDX end) ++{ ++ RING_IDX cons = netif->tx.req_cons; ++ ++ do { ++ make_tx_response(netif, txp, NETIF_RSP_ERROR); ++ if (cons >= end) ++ break; ++ txp = RING_GET_REQUEST(&netif->tx, cons++); ++ } while (1); ++ netif->tx.req_cons = cons; ++ netif_schedule_work(netif); ++ netif_put(netif); ++} ++ ++static int netbk_count_requests(struct xen_netif *netif, ++ struct xen_netif_tx_request *first, ++ struct xen_netif_tx_request *txp, ++ int work_to_do) ++{ ++ RING_IDX cons = netif->tx.req_cons; ++ int frags = 0; ++ ++ if (!(first->flags & NETTXF_more_data)) ++ return 0; ++ ++ do { ++ if (frags >= work_to_do) { ++ DPRINTK("Need more frags\n"); ++ return -frags; ++ } ++ ++ if (unlikely(frags >= MAX_SKB_FRAGS)) { ++ DPRINTK("Too many frags\n"); ++ return -frags; ++ } ++ ++ memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags), ++ sizeof(*txp)); ++ if (txp->size > first->size) { ++ DPRINTK("Frags galore\n"); ++ return -frags; ++ } ++ ++ first->size -= txp->size; ++ frags++; ++ ++ if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { ++ DPRINTK("txp->offset: %x, size: %u\n", ++ txp->offset, txp->size); ++ return -frags; ++ } ++ } while ((txp++)->flags & NETTXF_more_data); ++ ++ return frags; ++} ++ ++static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk, ++ struct xen_netif *netif, ++ struct sk_buff *skb, ++ struct xen_netif_tx_request *txp, ++ struct gnttab_map_grant_ref *mop) ++{ ++ struct skb_shared_info *shinfo = skb_shinfo(skb); ++ skb_frag_t *frags = shinfo->frags; ++ unsigned long pending_idx = *((u16 *)skb->data); ++ int i, start; ++ ++ /* Skip first skb fragment if it is on same page as header fragment. */ ++ start = ((unsigned long)shinfo->frags[0].page == pending_idx); ++ ++ for (i = start; i < shinfo->nr_frags; i++, txp++) { ++ pending_ring_idx_t index; ++ struct pending_tx_info *pending_tx_info = ++ netbk->pending_tx_info; ++ ++ index = pending_index(netbk->pending_cons++); ++ pending_idx = netbk->pending_ring[index]; ++ ++ gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx), ++ GNTMAP_host_map | GNTMAP_readonly, ++ txp->gref, netif->domid); ++ ++ memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); ++ netif_get(netif); ++ pending_tx_info[pending_idx].netif = netif; ++ frags[i].page = (void *)pending_idx; ++ } ++ ++ return mop; ++} ++ ++static int netbk_tx_check_mop(struct xen_netbk *netbk, ++ struct sk_buff *skb, ++ struct gnttab_map_grant_ref **mopp) ++{ ++ struct gnttab_map_grant_ref *mop = *mopp; ++ int pending_idx = *((u16 *)skb->data); ++ struct pending_tx_info *pending_tx_info = netbk->pending_tx_info; ++ struct xen_netif *netif = pending_tx_info[pending_idx].netif; ++ struct xen_netif_tx_request *txp; ++ struct skb_shared_info *shinfo = skb_shinfo(skb); ++ int nr_frags = shinfo->nr_frags; ++ int i, err, start; ++ ++ /* Check status of header. */ ++ err = mop->status; ++ if (unlikely(err)) { ++ pending_ring_idx_t index; ++ index = pending_index(netbk->pending_prod++); ++ txp = &pending_tx_info[pending_idx].req; ++ make_tx_response(netif, txp, NETIF_RSP_ERROR); ++ netbk->pending_ring[index] = pending_idx; ++ netif_put(netif); ++ } else { ++ set_phys_to_machine( ++ __pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT, ++ FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); ++ netbk->grant_tx_handle[pending_idx] = mop->handle; ++ } ++ ++ /* Skip first skb fragment if it is on same page as header fragment. */ ++ start = ((unsigned long)shinfo->frags[0].page == pending_idx); ++ ++ for (i = start; i < nr_frags; i++) { ++ int j, newerr; ++ pending_ring_idx_t index; ++ ++ pending_idx = (unsigned long)shinfo->frags[i].page; ++ ++ /* Check error status: if okay then remember grant handle. */ ++ newerr = (++mop)->status; ++ if (likely(!newerr)) { ++ unsigned long addr; ++ addr = idx_to_kaddr(netbk, pending_idx); ++ set_phys_to_machine( ++ __pa(addr)>>PAGE_SHIFT, ++ FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT)); ++ netbk->grant_tx_handle[pending_idx] = mop->handle; ++ /* Had a previous error? Invalidate this fragment. */ ++ if (unlikely(err)) ++ netif_idx_release(netbk, pending_idx); ++ continue; ++ } ++ ++ /* Error on this fragment: respond to client with an error. */ ++ txp = &netbk->pending_tx_info[pending_idx].req; ++ make_tx_response(netif, txp, NETIF_RSP_ERROR); ++ index = pending_index(netbk->pending_prod++); ++ netbk->pending_ring[index] = pending_idx; ++ netif_put(netif); ++ ++ /* Not the first error? Preceding frags already invalidated. */ ++ if (err) ++ continue; ++ ++ /* First error: invalidate header and preceding fragments. */ ++ pending_idx = *((u16 *)skb->data); ++ netif_idx_release(netbk, pending_idx); ++ for (j = start; j < i; j++) { ++ pending_idx = (unsigned long)shinfo->frags[i].page; ++ netif_idx_release(netbk, pending_idx); ++ } ++ ++ /* Remember the error: invalidate all subsequent fragments. */ ++ err = newerr; ++ } ++ ++ *mopp = mop + 1; ++ return err; ++} ++ ++static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb) ++{ ++ struct skb_shared_info *shinfo = skb_shinfo(skb); ++ int nr_frags = shinfo->nr_frags; ++ int i; ++ ++ for (i = 0; i < nr_frags; i++) { ++ skb_frag_t *frag = shinfo->frags + i; ++ struct xen_netif_tx_request *txp; ++ unsigned long pending_idx; ++ ++ pending_idx = (unsigned long)frag->page; ++ ++ netbk->pending_inuse[pending_idx].alloc_time = jiffies; ++ list_add_tail(&netbk->pending_inuse[pending_idx].list, ++ &netbk->pending_inuse_head); ++ ++ txp = &netbk->pending_tx_info[pending_idx].req; ++ frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx)); ++ frag->size = txp->size; ++ frag->page_offset = txp->offset; ++ ++ skb->len += txp->size; ++ skb->data_len += txp->size; ++ skb->truesize += txp->size; ++ } ++} ++ ++int netbk_get_extras(struct xen_netif *netif, ++ struct xen_netif_extra_info *extras, ++ int work_to_do) ++{ ++ struct xen_netif_extra_info extra; ++ RING_IDX cons = netif->tx.req_cons; ++ ++ do { ++ if (unlikely(work_to_do-- <= 0)) { ++ pr_debug("Missing extra info\n"); ++ return -EBADR; ++ } ++ ++ memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons), ++ sizeof(extra)); ++ if (unlikely(!extra.type || ++ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { ++ netif->tx.req_cons = ++cons; ++ pr_debug("Invalid extra type: %d\n", extra.type); ++ return -EINVAL; ++ } ++ ++ memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); ++ netif->tx.req_cons = ++cons; ++ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); ++ ++ return work_to_do; ++} ++ ++static int netbk_set_skb_gso(struct sk_buff *skb, ++ struct xen_netif_extra_info *gso) ++{ ++ if (!gso->u.gso.size) { ++ pr_debug("GSO size must not be zero.\n"); ++ return -EINVAL; ++ } ++ ++ /* Currently only TCPv4 S.O. is supported. */ ++ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { ++ pr_debug("Bad GSO type %d.\n", gso->u.gso.type); ++ return -EINVAL; ++ } ++ ++ skb_shinfo(skb)->gso_size = gso->u.gso.size; ++ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; ++ ++ /* Header must be checked, and gso_segs computed. */ ++ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; ++ skb_shinfo(skb)->gso_segs = 0; ++ ++ return 0; ++} ++ ++static int skb_checksum_setup(struct sk_buff *skb) ++{ ++ struct iphdr *iph; ++ unsigned char *th; ++ int err = -EPROTO; ++ ++ if (skb->protocol != htons(ETH_P_IP)) ++ goto out; ++ ++ iph = (void *)skb->data; ++ th = skb->data + 4 * iph->ihl; ++ if (th >= skb_tail_pointer(skb)) ++ goto out; ++ ++ skb->csum_start = th - skb->head; ++ switch (iph->protocol) { ++ case IPPROTO_TCP: ++ skb->csum_offset = offsetof(struct tcphdr, check); ++ break; ++ case IPPROTO_UDP: ++ skb->csum_offset = offsetof(struct udphdr, check); ++ break; ++ default: ++ if (net_ratelimit()) ++ printk(KERN_ERR "Attempting to checksum a non-" ++ "TCP/UDP packet, dropping a protocol" ++ " %d packet", iph->protocol); ++ goto out; ++ } ++ ++ if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) ++ goto out; ++ ++ err = 0; ++ ++out: ++ return err; ++} ++ ++static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size) ++{ ++ unsigned long now = jiffies; ++ unsigned long next_credit = ++ netif->credit_timeout.expires + ++ msecs_to_jiffies(netif->credit_usec / 1000); ++ ++ /* Timer could already be pending in rare cases. */ ++ if (timer_pending(&netif->credit_timeout)) ++ return true; ++ ++ /* Passed the point where we can replenish credit? */ ++ if (time_after_eq(now, next_credit)) { ++ netif->credit_timeout.expires = now; ++ tx_add_credit(netif); ++ } ++ ++ /* Still too big to send right now? Set a callback. */ ++ if (size > netif->remaining_credit) { ++ netif->credit_timeout.data = ++ (unsigned long)netif; ++ netif->credit_timeout.function = ++ tx_credit_callback; ++ mod_timer(&netif->credit_timeout, ++ next_credit); ++ ++ return true; ++ } ++ ++ return false; ++} ++ ++static unsigned net_tx_build_mops(struct xen_netbk *netbk) ++{ ++ struct gnttab_map_grant_ref *mop; ++ struct sk_buff *skb; ++ int ret; ++ ++ mop = netbk->tx_map_ops; ++ while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && ++ !list_empty(&netbk->net_schedule_list)) { ++ struct xen_netif *netif; ++ struct xen_netif_tx_request txreq; ++ struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; ++ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; ++ u16 pending_idx; ++ RING_IDX idx; ++ int work_to_do; ++ unsigned int data_len; ++ pending_ring_idx_t index; ++ ++ /* Get a netif from the list with work to do. */ ++ netif = poll_net_schedule_list(netbk); ++ if (!netif) ++ continue; ++ ++ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do); ++ if (!work_to_do) { ++ netif_put(netif); ++ continue; ++ } ++ ++ idx = netif->tx.req_cons; ++ rmb(); /* Ensure that we see the request before we copy it. */ ++ memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq)); ++ ++ /* Credit-based scheduling. */ ++ if (txreq.size > netif->remaining_credit && ++ tx_credit_exceeded(netif, txreq.size)) { ++ netif_put(netif); ++ continue; ++ } ++ ++ netif->remaining_credit -= txreq.size; ++ ++ work_to_do--; ++ netif->tx.req_cons = ++idx; ++ ++ memset(extras, 0, sizeof(extras)); ++ if (txreq.flags & NETTXF_extra_info) { ++ work_to_do = netbk_get_extras(netif, extras, ++ work_to_do); ++ idx = netif->tx.req_cons; ++ if (unlikely(work_to_do < 0)) { ++ netbk_tx_err(netif, &txreq, idx); ++ continue; ++ } ++ } ++ ++ ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do); ++ if (unlikely(ret < 0)) { ++ netbk_tx_err(netif, &txreq, idx - ret); ++ continue; ++ } ++ idx += ret; ++ ++ if (unlikely(txreq.size < ETH_HLEN)) { ++ pr_debug("Bad packet size: %d\n", txreq.size); ++ netbk_tx_err(netif, &txreq, idx); ++ continue; ++ } ++ ++ /* No crossing a page as the payload mustn't fragment. */ ++ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { ++ pr_debug("txreq.offset: %x, size: %u, end: %lu\n", ++ txreq.offset, txreq.size, ++ (txreq.offset&~PAGE_MASK) + txreq.size); ++ netbk_tx_err(netif, &txreq, idx); ++ continue; ++ } ++ ++ index = pending_index(netbk->pending_cons); ++ pending_idx = netbk->pending_ring[index]; ++ ++ data_len = (txreq.size > PKT_PROT_LEN && ++ ret < MAX_SKB_FRAGS) ? ++ PKT_PROT_LEN : txreq.size; ++ ++ skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, ++ GFP_ATOMIC | __GFP_NOWARN); ++ if (unlikely(skb == NULL)) { ++ pr_debug("Can't allocate a skb in start_xmit.\n"); ++ netbk_tx_err(netif, &txreq, idx); ++ break; ++ } ++ ++ /* Packets passed to netif_rx() must have some headroom. */ ++ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); ++ ++ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { ++ struct xen_netif_extra_info *gso; ++ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; ++ ++ if (netbk_set_skb_gso(skb, gso)) { ++ kfree_skb(skb); ++ netbk_tx_err(netif, &txreq, idx); ++ continue; ++ } ++ } ++ ++ gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx), ++ GNTMAP_host_map | GNTMAP_readonly, ++ txreq.gref, netif->domid); ++ mop++; ++ ++ memcpy(&netbk->pending_tx_info[pending_idx].req, ++ &txreq, sizeof(txreq)); ++ netbk->pending_tx_info[pending_idx].netif = netif; ++ *((u16 *)skb->data) = pending_idx; ++ ++ __skb_put(skb, data_len); ++ ++ skb_shinfo(skb)->nr_frags = ret; ++ if (data_len < txreq.size) { ++ skb_shinfo(skb)->nr_frags++; ++ skb_shinfo(skb)->frags[0].page = ++ (void *)(unsigned long)pending_idx; ++ } else { ++ /* Discriminate from any valid pending_idx value. */ ++ skb_shinfo(skb)->frags[0].page = (void *)~0UL; ++ } ++ ++ __skb_queue_tail(&netbk->tx_queue, skb); ++ ++ netbk->pending_cons++; ++ ++ mop = netbk_get_requests(netbk, netif, skb, txfrags, mop); ++ ++ netif->tx.req_cons = idx; ++ netif_schedule_work(netif); ++ ++ if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops)) ++ break; ++ } ++ ++ return mop - netbk->tx_map_ops; ++} ++ ++static void net_tx_submit(struct xen_netbk *netbk) ++{ ++ struct gnttab_map_grant_ref *mop; ++ struct sk_buff *skb; ++ ++ mop = netbk->tx_map_ops; ++ while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) { ++ struct xen_netif_tx_request *txp; ++ struct xen_netif *netif; ++ u16 pending_idx; ++ unsigned data_len; ++ ++ pending_idx = *((u16 *)skb->data); ++ netif = netbk->pending_tx_info[pending_idx].netif; ++ txp = &netbk->pending_tx_info[pending_idx].req; ++ ++ /* Check the remap error code. */ ++ if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) { ++ pr_debug("netback grant failed.\n"); ++ skb_shinfo(skb)->nr_frags = 0; ++ kfree_skb(skb); ++ continue; ++ } ++ ++ data_len = skb->len; ++ memcpy(skb->data, ++ (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset), ++ data_len); ++ if (data_len < txp->size) { ++ /* Append the packet payload as a fragment. */ ++ txp->offset += data_len; ++ txp->size -= data_len; ++ } else { ++ /* Schedule a response immediately. */ ++ netif_idx_release(netbk, pending_idx); ++ } ++ ++ if (txp->flags & NETTXF_csum_blank) ++ skb->ip_summed = CHECKSUM_PARTIAL; ++ else if (txp->flags & NETTXF_data_validated) ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ ++ netbk_fill_frags(netbk, skb); ++ ++ /* ++ * If the initial fragment was < PKT_PROT_LEN then ++ * pull through some bytes from the other fragments to ++ * increase the linear region to PKT_PROT_LEN bytes. ++ */ ++ if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) { ++ int target = min_t(int, skb->len, PKT_PROT_LEN); ++ __pskb_pull_tail(skb, target - skb_headlen(skb)); ++ } ++ ++ skb->dev = netif->dev; ++ skb->protocol = eth_type_trans(skb, skb->dev); ++ ++ if (skb->ip_summed == CHECKSUM_PARTIAL) { ++ if (skb_checksum_setup(skb)) { ++ pr_debug("skb_checksum_setup failed\n"); ++ kfree_skb(skb); ++ continue; ++ } ++ } else if (skb_is_gso(skb)) { ++ pr_debug("GSO SKB checksum is not partial\n"); ++ kfree_skb(skb); ++ continue; ++ } ++ ++ if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) && ++ unlikely(skb_linearize(skb))) { ++ DPRINTK("Can't linearize skb in net_tx_action.\n"); ++ kfree_skb(skb); ++ continue; ++ } ++ ++ netif->stats.rx_bytes += skb->len; ++ netif->stats.rx_packets++; ++ ++ netif_rx_ni(skb); ++ netif->dev->last_rx = jiffies; ++ } ++} ++ ++/* Called after netfront has transmitted */ ++static void net_tx_action(unsigned long data) ++{ ++ struct xen_netbk *netbk = (struct xen_netbk *)data; ++ unsigned nr_mops; ++ int ret; ++ ++ net_tx_action_dealloc(netbk); ++ ++ nr_mops = net_tx_build_mops(netbk); ++ ++ if (nr_mops == 0) ++ goto out; ++ ++ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, ++ netbk->tx_map_ops, nr_mops); ++ BUG_ON(ret); ++ ++ net_tx_submit(netbk); ++out: ++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && ++ !list_empty(&netbk->pending_inuse_head)) { ++ struct netbk_tx_pending_inuse *oldest; ++ ++ oldest = list_entry(netbk->pending_inuse_head.next, ++ struct netbk_tx_pending_inuse, list); ++ mod_timer(&netbk->netbk_tx_pending_timer, ++ oldest->alloc_time + HZ); ++ } ++} ++ ++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx) ++{ ++ static DEFINE_SPINLOCK(_lock); ++ unsigned long flags; ++ pending_ring_idx_t index; ++ ++ spin_lock_irqsave(&_lock, flags); ++ index = pending_index(netbk->dealloc_prod); ++ netbk->dealloc_ring[index] = pending_idx; ++ /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */ ++ smp_wmb(); ++ netbk->dealloc_prod++; ++ spin_unlock_irqrestore(&_lock, flags); ++ ++ xen_netbk_bh_handler(netbk, 0); ++} ++ ++static void netif_page_release(struct page *page, unsigned int order) ++{ ++ unsigned int group, idx; ++ int foreign = netif_get_page_ext(page, &group, &idx); ++ ++ BUG_ON(!foreign); ++ BUG_ON(order); ++ ++ netif_idx_release(&xen_netbk[group], idx); ++} ++ ++irqreturn_t netif_be_int(int irq, void *dev_id) ++{ ++ struct xen_netif *netif = dev_id; ++ struct xen_netbk *netbk; ++ ++ if (netif->group == -1) ++ return IRQ_NONE; ++ ++ netbk = &xen_netbk[netif->group]; ++ ++ add_to_net_schedule_list_tail(netif); ++ maybe_schedule_tx_action(netbk); ++ ++ if (netif_schedulable(netif) && !netbk_queue_full(netif)) ++ netif_wake_queue(netif->dev); ++ ++ return IRQ_HANDLED; ++} ++ ++static void make_tx_response(struct xen_netif *netif, ++ struct xen_netif_tx_request *txp, ++ s8 st) ++{ ++ RING_IDX i = netif->tx.rsp_prod_pvt; ++ struct xen_netif_tx_response *resp; ++ int notify; ++ ++ resp = RING_GET_RESPONSE(&netif->tx, i); ++ resp->id = txp->id; ++ resp->status = st; ++ ++ if (txp->flags & NETTXF_extra_info) ++ RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL; ++ ++ netif->tx.rsp_prod_pvt = ++i; ++ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify); ++ if (notify) ++ notify_remote_via_irq(netif->irq); ++} ++ ++static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif, ++ u16 id, ++ s8 st, ++ u16 offset, ++ u16 size, ++ u16 flags) ++{ ++ RING_IDX i = netif->rx.rsp_prod_pvt; ++ struct xen_netif_rx_response *resp; ++ ++ resp = RING_GET_RESPONSE(&netif->rx, i); ++ resp->offset = offset; ++ resp->flags = flags; ++ resp->id = id; ++ resp->status = (s16)size; ++ if (st < 0) ++ resp->status = (s16)st; ++ ++ netif->rx.rsp_prod_pvt = ++i; ++ ++ return resp; ++} ++ ++#ifdef NETBE_DEBUG_INTERRUPT ++static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs) ++{ ++ struct list_head *ent; ++ struct xen_netif *netif; ++ int i = 0; ++ int group = 0; ++ ++ printk(KERN_ALERT "netif_schedule_list:\n"); ++ ++ for (group = 0; group < xen_netbk_group_nr; group++) { ++ struct xen_netbk *netbk = &xen_netbk[group]; ++ spin_lock_irq(&netbk->net_schedule_list_lock); ++ printk(KERN_ALERT "xen_netback group number: %d\n", group); ++ list_for_each(ent, &netbk->net_schedule_list) { ++ netif = list_entry(ent, struct xen_netif, list); ++ printk(KERN_ALERT " %d: private(rx_req_cons=%08x " ++ "rx_resp_prod=%08x\n", ++ i, netif->rx.req_cons, netif->rx.rsp_prod_pvt); ++ printk(KERN_ALERT ++ " tx_req_cons=%08x, tx_resp_prod=%08x)\n", ++ netif->tx.req_cons, netif->tx.rsp_prod_pvt); ++ printk(KERN_ALERT ++ " shared(rx_req_prod=%08x " ++ "rx_resp_prod=%08x\n", ++ netif->rx.sring->req_prod, ++ netif->rx.sring->rsp_prod); ++ printk(KERN_ALERT ++ " rx_event=%08x, tx_req_prod=%08x\n", ++ netif->rx.sring->rsp_event, ++ netif->tx.sring->req_prod); ++ printk(KERN_ALERT ++ " tx_resp_prod=%08x, tx_event=%08x)\n", ++ netif->tx.sring->rsp_prod, ++ netif->tx.sring->rsp_event); ++ i++; ++ } ++ spin_unlock_irq(&netbk->net_schedule_list_lock); ++ } ++ ++ printk(KERN_ALERT " ** End of netif_schedule_list **\n"); ++ ++ return IRQ_HANDLED; ++} ++#endif ++ ++static inline int rx_work_todo(struct xen_netbk *netbk) ++{ ++ return !skb_queue_empty(&netbk->rx_queue); ++} ++ ++static inline int tx_work_todo(struct xen_netbk *netbk) ++{ ++ if (netbk->dealloc_cons != netbk->dealloc_prod) ++ return 1; ++ ++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && ++ !list_empty(&netbk->pending_inuse_head)) ++ return 1; ++ ++ if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && ++ !list_empty(&netbk->net_schedule_list)) ++ return 1; ++ ++ return 0; ++} ++ ++static int netbk_action_thread(void *data) ++{ ++ struct xen_netbk *netbk = (struct xen_netbk *)data; ++ while (!kthread_should_stop()) { ++ wait_event_interruptible(netbk->kthread.netbk_action_wq, ++ rx_work_todo(netbk) ++ || tx_work_todo(netbk) ++ || kthread_should_stop()); ++ cond_resched(); ++ ++ if (kthread_should_stop()) ++ break; ++ ++ if (rx_work_todo(netbk)) ++ net_rx_action((unsigned long)netbk); ++ ++ if (tx_work_todo(netbk)) ++ net_tx_action((unsigned long)netbk); ++ } ++ ++ return 0; ++} ++ ++static int __init netback_init(void) ++{ ++ int i; ++ struct page *page; ++ int rc = 0; ++ int group; ++ ++ if (!xen_pv_domain()) ++ return -ENODEV; ++ ++ xen_netbk_group_nr = num_online_cpus(); ++ xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr); ++ if (!xen_netbk) { ++ printk(KERN_ALERT "%s: out of memory\n", __func__); ++ return -ENOMEM; ++ } ++ memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr); ++ ++ for (group = 0; group < xen_netbk_group_nr; group++) { ++ struct xen_netbk *netbk = &xen_netbk[group]; ++ skb_queue_head_init(&netbk->rx_queue); ++ skb_queue_head_init(&netbk->tx_queue); ++ ++ init_timer(&netbk->net_timer); ++ netbk->net_timer.data = (unsigned long)netbk; ++ netbk->net_timer.function = net_alarm; ++ ++ init_timer(&netbk->netbk_tx_pending_timer); ++ netbk->netbk_tx_pending_timer.data = (unsigned long)netbk; ++ netbk->netbk_tx_pending_timer.function = ++ netbk_tx_pending_timeout; ++ ++ netbk->mmap_pages = ++ alloc_empty_pages_and_pagevec(MAX_PENDING_REQS); ++ if (!netbk->mmap_pages) { ++ printk(KERN_ALERT "%s: out of memory\n", __func__); ++ del_timer(&netbk->netbk_tx_pending_timer); ++ del_timer(&netbk->net_timer); ++ rc = -ENOMEM; ++ goto failed_init; ++ } ++ ++ for (i = 0; i < MAX_PENDING_REQS; i++) { ++ page = netbk->mmap_pages[i]; ++ SetPageForeign(page, netif_page_release); ++ netif_set_page_ext(page, group, i); ++ INIT_LIST_HEAD(&netbk->pending_inuse[i].list); ++ } ++ ++ netbk->pending_cons = 0; ++ netbk->pending_prod = MAX_PENDING_REQS; ++ for (i = 0; i < MAX_PENDING_REQS; i++) ++ netbk->pending_ring[i] = i; ++ ++ if (MODPARM_netback_kthread) { ++ init_waitqueue_head(&netbk->kthread.netbk_action_wq); ++ netbk->kthread.task = ++ kthread_create(netbk_action_thread, ++ (void *)netbk, ++ "netback/%u", group); ++ ++ if (!IS_ERR(netbk->kthread.task)) { ++ kthread_bind(netbk->kthread.task, group); ++ } else { ++ printk(KERN_ALERT ++ "kthread_run() fails at netback\n"); ++ free_empty_pages_and_pagevec(netbk->mmap_pages, ++ MAX_PENDING_REQS); ++ del_timer(&netbk->netbk_tx_pending_timer); ++ del_timer(&netbk->net_timer); ++ rc = PTR_ERR(netbk->kthread.task); ++ goto failed_init; ++ } ++ } else { ++ tasklet_init(&netbk->tasklet.net_tx_tasklet, ++ net_tx_action, ++ (unsigned long)netbk); ++ tasklet_init(&netbk->tasklet.net_rx_tasklet, ++ net_rx_action, ++ (unsigned long)netbk); ++ } ++ ++ INIT_LIST_HEAD(&netbk->pending_inuse_head); ++ INIT_LIST_HEAD(&netbk->net_schedule_list); ++ ++ spin_lock_init(&netbk->net_schedule_list_lock); ++ ++ atomic_set(&netbk->netfront_count, 0); ++ ++ if (MODPARM_netback_kthread) ++ wake_up_process(netbk->kthread.task); ++ } ++ ++ netbk_copy_skb_mode = NETBK_DONT_COPY_SKB; ++ if (MODPARM_copy_skb) { ++ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, ++ NULL, 0)) ++ netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB; ++ else ++ netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB; ++ } ++ ++ rc = netif_xenbus_init(); ++ if (rc) ++ goto failed_init; ++ ++#ifdef NETBE_DEBUG_INTERRUPT ++ (void)bind_virq_to_irqhandler(VIRQ_DEBUG, ++ 0, ++ netif_be_dbg, ++ IRQF_SHARED, ++ "net-be-dbg", ++ &netif_be_dbg); ++#endif ++ ++ return 0; ++ ++failed_init: ++ for (i = 0; i < group; i++) { ++ struct xen_netbk *netbk = &xen_netbk[i]; ++ free_empty_pages_and_pagevec(netbk->mmap_pages, ++ MAX_PENDING_REQS); ++ del_timer(&netbk->netbk_tx_pending_timer); ++ del_timer(&netbk->net_timer); ++ if (MODPARM_netback_kthread) ++ kthread_stop(netbk->kthread.task); ++ } ++ vfree(xen_netbk); ++ return rc; ++ ++} ++ ++module_init(netback_init); ++ ++MODULE_LICENSE("Dual BSD/GPL"); +diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c +new file mode 100644 +index 0000000..640c696 +--- /dev/null ++++ b/drivers/net/xen-netback/xenbus.c +@@ -0,0 +1,487 @@ ++/* Xenbus code for netif backend ++ * Copyright (C) 2005 Rusty Russell ++ * Copyright (C) 2005 XenSource Ltd ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++*/ ++ ++#include "common.h" ++ ++static int connect_rings(struct backend_info *); ++static void connect(struct backend_info *); ++static void backend_create_netif(struct backend_info *be); ++static void unregister_hotplug_status_watch(struct backend_info *be); ++ ++static int netback_remove(struct xenbus_device *dev) ++{ ++ struct backend_info *be = dev_get_drvdata(&dev->dev); ++ ++ unregister_hotplug_status_watch(be); ++ if (be->netif) { ++ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); ++ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); ++ netif_disconnect(be->netif); ++ be->netif = NULL; ++ } ++ kfree(be); ++ dev_set_drvdata(&dev->dev, NULL); ++ return 0; ++} ++ ++ ++/** ++ * Entry point to this code when a new device is created. Allocate the basic ++ * structures and switch to InitWait. ++ */ ++static int netback_probe(struct xenbus_device *dev, ++ const struct xenbus_device_id *id) ++{ ++ const char *message; ++ struct xenbus_transaction xbt; ++ int err; ++ int sg; ++ struct backend_info *be = kzalloc(sizeof(struct backend_info), ++ GFP_KERNEL); ++ if (!be) { ++ xenbus_dev_fatal(dev, -ENOMEM, ++ "allocating backend structure"); ++ return -ENOMEM; ++ } ++ ++ be->dev = dev; ++ dev_set_drvdata(&dev->dev, be); ++ ++ sg = 1; ++ if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) ++ sg = 0; ++ ++ do { ++ err = xenbus_transaction_start(&xbt); ++ if (err) { ++ xenbus_dev_fatal(dev, err, "starting transaction"); ++ goto fail; ++ } ++ ++ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg); ++ if (err) { ++ message = "writing feature-sg"; ++ goto abort_transaction; ++ } ++ ++ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", ++ "%d", sg); ++ if (err) { ++ message = "writing feature-gso-tcpv4"; ++ goto abort_transaction; ++ } ++ ++ /* We support rx-copy path. */ ++ err = xenbus_printf(xbt, dev->nodename, ++ "feature-rx-copy", "%d", 1); ++ if (err) { ++ message = "writing feature-rx-copy"; ++ goto abort_transaction; ++ } ++ ++ /* ++ * We don't support rx-flip path (except old guests who don't ++ * grok this feature flag). ++ */ ++ err = xenbus_printf(xbt, dev->nodename, ++ "feature-rx-flip", "%d", 0); ++ if (err) { ++ message = "writing feature-rx-flip"; ++ goto abort_transaction; ++ } ++ ++ err = xenbus_transaction_end(xbt, 0); ++ } while (err == -EAGAIN); ++ ++ if (err) { ++ xenbus_dev_fatal(dev, err, "completing transaction"); ++ goto fail; ++ } ++ ++ err = xenbus_switch_state(dev, XenbusStateInitWait); ++ if (err) ++ goto fail; ++ ++ /* This kicks hotplug scripts, so do it immediately. */ ++ backend_create_netif(be); ++ ++ return 0; ++ ++abort_transaction: ++ xenbus_transaction_end(xbt, 1); ++ xenbus_dev_fatal(dev, err, "%s", message); ++fail: ++ pr_debug("failed"); ++ netback_remove(dev); ++ return err; ++} ++ ++ ++/* ++ * Handle the creation of the hotplug script environment. We add the script ++ * and vif variables to the environment, for the benefit of the vif-* hotplug ++ * scripts. ++ */ ++static int netback_uevent(struct xenbus_device *xdev, ++ struct kobj_uevent_env *env) ++{ ++ struct backend_info *be = dev_get_drvdata(&xdev->dev); ++ char *val; ++ ++ val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL); ++ if (IS_ERR(val)) { ++ int err = PTR_ERR(val); ++ xenbus_dev_fatal(xdev, err, "reading script"); ++ return err; ++ } else { ++ if (add_uevent_var(env, "script=%s", val)) { ++ kfree(val); ++ return -ENOMEM; ++ } ++ kfree(val); ++ } ++ ++ if (!be || !be->netif) ++ return 0; ++ ++ return add_uevent_var(env, "vif=%s", be->netif->dev->name); ++} ++ ++ ++static void backend_create_netif(struct backend_info *be) ++{ ++ int err; ++ long handle; ++ struct xenbus_device *dev = be->dev; ++ ++ if (be->netif != NULL) ++ return; ++ ++ err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle); ++ if (err != 1) { ++ xenbus_dev_fatal(dev, err, "reading handle"); ++ return; ++ } ++ ++ be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle); ++ if (IS_ERR(be->netif)) { ++ err = PTR_ERR(be->netif); ++ be->netif = NULL; ++ xenbus_dev_fatal(dev, err, "creating interface"); ++ return; ++ } ++ ++ kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE); ++} ++ ++ ++static void disconnect_backend(struct xenbus_device *dev) ++{ ++ struct backend_info *be = dev_get_drvdata(&dev->dev); ++ ++ if (be->netif) { ++ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); ++ netif_disconnect(be->netif); ++ be->netif = NULL; ++ } ++} ++ ++/** ++ * Callback received when the frontend's state changes. ++ */ ++static void frontend_changed(struct xenbus_device *dev, ++ enum xenbus_state frontend_state) ++{ ++ struct backend_info *be = dev_get_drvdata(&dev->dev); ++ ++ pr_debug("frontend state %s", xenbus_strstate(frontend_state)); ++ ++ be->frontend_state = frontend_state; ++ ++ switch (frontend_state) { ++ case XenbusStateInitialising: ++ if (dev->state == XenbusStateClosed) { ++ printk(KERN_INFO "%s: %s: prepare for reconnect\n", ++ __func__, dev->nodename); ++ xenbus_switch_state(dev, XenbusStateInitWait); ++ } ++ break; ++ ++ case XenbusStateInitialised: ++ break; ++ ++ case XenbusStateConnected: ++ if (dev->state == XenbusStateConnected) ++ break; ++ backend_create_netif(be); ++ if (be->netif) ++ connect(be); ++ break; ++ ++ case XenbusStateClosing: ++ if (be->netif) ++ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); ++ disconnect_backend(dev); ++ xenbus_switch_state(dev, XenbusStateClosing); ++ break; ++ ++ case XenbusStateClosed: ++ xenbus_switch_state(dev, XenbusStateClosed); ++ if (xenbus_dev_is_online(dev)) ++ break; ++ /* fall through if not online */ ++ case XenbusStateUnknown: ++ device_unregister(&dev->dev); ++ break; ++ ++ default: ++ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", ++ frontend_state); ++ break; ++ } ++} ++ ++ ++static void xen_net_read_rate(struct xenbus_device *dev, ++ unsigned long *bytes, unsigned long *usec) ++{ ++ char *s, *e; ++ unsigned long b, u; ++ char *ratestr; ++ ++ /* Default to unlimited bandwidth. */ ++ *bytes = ~0UL; ++ *usec = 0; ++ ++ ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL); ++ if (IS_ERR(ratestr)) ++ return; ++ ++ s = ratestr; ++ b = simple_strtoul(s, &e, 10); ++ if ((s == e) || (*e != ',')) ++ goto fail; ++ ++ s = e + 1; ++ u = simple_strtoul(s, &e, 10); ++ if ((s == e) || (*e != '\0')) ++ goto fail; ++ ++ *bytes = b; ++ *usec = u; ++ ++ kfree(ratestr); ++ return; ++ ++ fail: ++ pr_warn("Failed to parse network rate limit. Traffic unlimited.\n"); ++ kfree(ratestr); ++} ++ ++static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) ++{ ++ char *s, *e, *macstr; ++ int i; ++ ++ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL); ++ if (IS_ERR(macstr)) ++ return PTR_ERR(macstr); ++ ++ for (i = 0; i < ETH_ALEN; i++) { ++ mac[i] = simple_strtoul(s, &e, 16); ++ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { ++ kfree(macstr); ++ return -ENOENT; ++ } ++ s = e+1; ++ } ++ ++ kfree(macstr); ++ return 0; ++} ++ ++static void unregister_hotplug_status_watch(struct backend_info *be) ++{ ++ if (be->have_hotplug_status_watch) { ++ unregister_xenbus_watch(&be->hotplug_status_watch); ++ kfree(be->hotplug_status_watch.node); ++ } ++ be->have_hotplug_status_watch = 0; ++} ++ ++static void hotplug_status_changed(struct xenbus_watch *watch, ++ const char **vec, ++ unsigned int vec_size) ++{ ++ struct backend_info *be = container_of(watch, ++ struct backend_info, ++ hotplug_status_watch); ++ char *str; ++ unsigned int len; ++ ++ str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len); ++ if (IS_ERR(str)) ++ return; ++ if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) { ++ xenbus_switch_state(be->dev, XenbusStateConnected); ++ /* Not interested in this watch anymore. */ ++ unregister_hotplug_status_watch(be); ++ } ++ kfree(str); ++} ++ ++static void connect(struct backend_info *be) ++{ ++ int err; ++ struct xenbus_device *dev = be->dev; ++ ++ err = connect_rings(be); ++ if (err) ++ return; ++ ++ err = xen_net_read_mac(dev, be->netif->fe_dev_addr); ++ if (err) { ++ xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); ++ return; ++ } ++ ++ xen_net_read_rate(dev, &be->netif->credit_bytes, ++ &be->netif->credit_usec); ++ be->netif->remaining_credit = be->netif->credit_bytes; ++ ++ unregister_hotplug_status_watch(be); ++ err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, ++ hotplug_status_changed, ++ "%s/%s", dev->nodename, "hotplug-status"); ++ if (err) { ++ /* Switch now, since we can't do a watch. */ ++ xenbus_switch_state(dev, XenbusStateConnected); ++ } else { ++ be->have_hotplug_status_watch = 1; ++ } ++ ++ netif_wake_queue(be->netif->dev); ++} ++ ++ ++static int connect_rings(struct backend_info *be) ++{ ++ struct xen_netif *netif = be->netif; ++ struct xenbus_device *dev = be->dev; ++ unsigned long tx_ring_ref, rx_ring_ref; ++ unsigned int evtchn, rx_copy; ++ int err; ++ int val; ++ ++ err = xenbus_gather(XBT_NIL, dev->otherend, ++ "tx-ring-ref", "%lu", &tx_ring_ref, ++ "rx-ring-ref", "%lu", &rx_ring_ref, ++ "event-channel", "%u", &evtchn, NULL); ++ if (err) { ++ xenbus_dev_fatal(dev, err, ++ "reading %s/ring-ref and event-channel", ++ dev->otherend); ++ return err; ++ } ++ ++ err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u", ++ &rx_copy); ++ if (err == -ENOENT) { ++ err = 0; ++ rx_copy = 0; ++ } ++ if (err < 0) { ++ xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy", ++ dev->otherend); ++ return err; ++ } ++ if (!rx_copy) ++ return -EOPNOTSUPP; ++ ++ if (netif->dev->tx_queue_len != 0) { ++ if (xenbus_scanf(XBT_NIL, dev->otherend, ++ "feature-rx-notify", "%d", &val) < 0) ++ val = 0; ++ if (val) ++ netif->can_queue = 1; ++ else ++ /* Must be non-zero for pfifo_fast to work. */ ++ netif->dev->tx_queue_len = 1; ++ } ++ ++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", ++ "%d", &val) < 0) ++ val = 0; ++ netif->can_sg = !!val; ++ ++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", ++ "%d", &val) < 0) ++ val = 0; ++ netif->gso = !!val; ++ ++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix", ++ "%d", &val) < 0) ++ val = 0; ++ netif->gso_prefix = !!val; ++ ++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload", ++ "%d", &val) < 0) ++ val = 0; ++ netif->csum = !val; ++ ++ /* Set dev->features */ ++ netif_set_features(netif); ++ ++ /* Map the shared frame, irq etc. */ ++ err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn); ++ if (err) { ++ xenbus_dev_fatal(dev, err, ++ "mapping shared-frames %lu/%lu port %u", ++ tx_ring_ref, rx_ring_ref, evtchn); ++ return err; ++ } ++ return 0; ++} ++ ++ ++/* ** Driver Registration ** */ ++ ++ ++static const struct xenbus_device_id netback_ids[] = { ++ { "vif" }, ++ { "" } ++}; ++ ++ ++static struct xenbus_driver netback = { ++ .name = "vif", ++ .owner = THIS_MODULE, ++ .ids = netback_ids, ++ .probe = netback_probe, ++ .remove = netback_remove, ++ .uevent = netback_uevent, ++ .otherend_changed = frontend_changed, ++}; ++ ++ ++int netif_xenbus_init(void) ++{ ++ printk(KERN_CRIT "registering netback\n"); ++ return xenbus_register_backend(&netback); ++} +diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig +index 30290a8..5a48ce9 100644 +--- a/drivers/xen/Kconfig ++++ b/drivers/xen/Kconfig +@@ -37,13 +37,6 @@ config XEN_BACKEND + depends on XEN_PCIDEV_BACKEND + + +-config XEN_NETDEV_BACKEND +- tristate "Xen backend network device" +- depends on XEN_BACKEND && NET +- help +- Implement the network backend driver, which passes packets +- from the guest domain's frontend drivers to the network. +- + config XENFS + tristate "Xen filesystem" + default y +diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile +index c0e0509..533a199 100644 +--- a/drivers/xen/Makefile ++++ b/drivers/xen/Makefile +@@ -9,7 +9,6 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o + obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o + obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o + obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/ +-obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ + obj-$(CONFIG_XENFS) += xenfs/ + obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o + obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o +diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile +deleted file mode 100644 +index e346e81..0000000 +--- a/drivers/xen/netback/Makefile ++++ /dev/null +@@ -1,3 +0,0 @@ +-obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o +- +-xen-netback-y := netback.o xenbus.o interface.o +diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h +deleted file mode 100644 +index 079e1de..0000000 +--- a/drivers/xen/netback/common.h ++++ /dev/null +@@ -1,275 +0,0 @@ +-/****************************************************************************** +- * arch/xen/drivers/netif/backend/common.h +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License version 2 +- * as published by the Free Software Foundation; or, when distributed +- * separately from the Linux kernel or incorporated into other +- * software packages, subject to the following license: +- * +- * Permission is hereby granted, free of charge, to any person obtaining a copy +- * of this source file (the "Software"), to deal in the Software without +- * restriction, including without limitation the rights to use, copy, modify, +- * merge, publish, distribute, sublicense, and/or sell copies of the Software, +- * and to permit persons to whom the Software is furnished to do so, subject to +- * the following conditions: +- * +- * The above copyright notice and this permission notice shall be included in +- * all copies or substantial portions of the Software. +- * +- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +- * IN THE SOFTWARE. +- */ +- +-#ifndef __NETIF__BACKEND__COMMON_H__ +-#define __NETIF__BACKEND__COMMON_H__ +- +-#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +-#include +-#include +- +-struct xen_netif { +- /* Unique identifier for this interface. */ +- domid_t domid; +- int group; +- unsigned int handle; +- +- u8 fe_dev_addr[6]; +- +- /* Physical parameters of the comms window. */ +- grant_handle_t tx_shmem_handle; +- grant_ref_t tx_shmem_ref; +- grant_handle_t rx_shmem_handle; +- grant_ref_t rx_shmem_ref; +- unsigned int irq; +- +- /* The shared rings and indexes. */ +- struct xen_netif_tx_back_ring tx; +- struct xen_netif_rx_back_ring rx; +- struct vm_struct *tx_comms_area; +- struct vm_struct *rx_comms_area; +- +- /* Flags that must not be set in dev->features */ +- int features_disabled; +- +- /* Frontend feature information. */ +- u8 can_sg:1; +- u8 gso:1; +- u8 gso_prefix:1; +- u8 csum:1; +- +- /* Internal feature information. */ +- u8 can_queue:1; /* can queue packets for receiver? */ +- +- /* Allow netif_be_start_xmit() to peek ahead in the rx request +- * ring. This is a prediction of what rx_req_cons will be once +- * all queued skbs are put on the ring. */ +- RING_IDX rx_req_cons_peek; +- +- /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */ +- unsigned long credit_bytes; +- unsigned long credit_usec; +- unsigned long remaining_credit; +- struct timer_list credit_timeout; +- +- /* Statistics */ +- int nr_copied_skbs; +- +- /* Miscellaneous private stuff. */ +- struct list_head list; /* scheduling list */ +- atomic_t refcnt; +- struct net_device *dev; +- struct net_device_stats stats; +- +- unsigned int carrier; +- +- wait_queue_head_t waiting_to_free; +-}; +- +-/* +- * Implement our own carrier flag: the network stack's version causes delays +- * when the carrier is re-enabled (in particular, dev_activate() may not +- * immediately be called, which can cause packet loss; also the etherbridge +- * can be rather lazy in activating its port). +- */ +-#define netback_carrier_on(netif) ((netif)->carrier = 1) +-#define netback_carrier_off(netif) ((netif)->carrier = 0) +-#define netback_carrier_ok(netif) ((netif)->carrier) +- +-enum { +- NETBK_DONT_COPY_SKB, +- NETBK_DELAYED_COPY_SKB, +- NETBK_ALWAYS_COPY_SKB, +-}; +- +-extern int netbk_copy_skb_mode; +- +-struct backend_info { +- struct xenbus_device *dev; +- struct xen_netif *netif; +- enum xenbus_state frontend_state; +- struct xenbus_watch hotplug_status_watch; +- int have_hotplug_status_watch:1; +-}; +- +-#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE) +-#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE) +- +-void netif_disconnect(struct xen_netif *netif); +- +-void netif_set_features(struct xen_netif *netif); +-struct xen_netif *netif_alloc(struct device *parent, domid_t domid, +- unsigned int handle); +-int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref, +- unsigned long rx_ring_ref, unsigned int evtchn); +- +-static inline void netif_get(struct xen_netif *netif) +-{ +- atomic_inc(&netif->refcnt); +-} +- +-static inline void netif_put(struct xen_netif *netif) +-{ +- if (atomic_dec_and_test(&netif->refcnt)) +- wake_up(&netif->waiting_to_free); +-} +- +-int netif_xenbus_init(void); +- +-#define netif_schedulable(netif) \ +- (netif_running((netif)->dev) && netback_carrier_ok(netif)) +- +-void netif_schedule_work(struct xen_netif *netif); +-void netif_deschedule_work(struct xen_netif *netif); +- +-int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev); +-struct net_device_stats *netif_be_get_stats(struct net_device *dev); +-irqreturn_t netif_be_int(int irq, void *dev_id); +- +-static inline int netbk_can_queue(struct net_device *dev) +-{ +- struct xen_netif *netif = netdev_priv(dev); +- return netif->can_queue; +-} +- +-static inline int netbk_can_sg(struct net_device *dev) +-{ +- struct xen_netif *netif = netdev_priv(dev); +- return netif->can_sg; +-} +- +-struct pending_tx_info { +- struct xen_netif_tx_request req; +- struct xen_netif *netif; +-}; +-typedef unsigned int pending_ring_idx_t; +- +-struct netbk_rx_meta { +- int id; +- int size; +- int gso_size; +-}; +- +-struct netbk_tx_pending_inuse { +- struct list_head list; +- unsigned long alloc_time; +-}; +- +-#define MAX_PENDING_REQS 256 +- +-#define MAX_BUFFER_OFFSET PAGE_SIZE +- +-/* extra field used in struct page */ +-union page_ext { +- struct { +-#if BITS_PER_LONG < 64 +-#define IDX_WIDTH 8 +-#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH) +- unsigned int group:GROUP_WIDTH; +- unsigned int idx:IDX_WIDTH; +-#else +- unsigned int group, idx; +-#endif +- } e; +- void *mapping; +-}; +- +-struct xen_netbk { +- union { +- struct { +- struct tasklet_struct net_tx_tasklet; +- struct tasklet_struct net_rx_tasklet; +- } tasklet; +- +- struct { +- wait_queue_head_t netbk_action_wq; +- struct task_struct *task; +- } kthread; +- }; +- +- struct sk_buff_head rx_queue; +- struct sk_buff_head tx_queue; +- +- struct timer_list net_timer; +- struct timer_list netbk_tx_pending_timer; +- +- struct page **mmap_pages; +- +- pending_ring_idx_t pending_prod; +- pending_ring_idx_t pending_cons; +- pending_ring_idx_t dealloc_prod; +- pending_ring_idx_t dealloc_cons; +- +- struct list_head pending_inuse_head; +- struct list_head net_schedule_list; +- +- /* Protect the net_schedule_list in netif. */ +- spinlock_t net_schedule_list_lock; +- +- atomic_t netfront_count; +- +- struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; +- struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS]; +- struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; +- struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS]; +- +- grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; +- u16 pending_ring[MAX_PENDING_REQS]; +- u16 dealloc_ring[MAX_PENDING_REQS]; +- +- /* +- * Each head or fragment can be up to 4096 bytes. Given +- * MAX_BUFFER_OFFSET of 4096 the worst case is that each +- * head/fragment uses 2 copy operation. +- */ +- struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE]; +- unsigned char rx_notify[NR_IRQS]; +- u16 notify_list[NET_RX_RING_SIZE]; +- struct netbk_rx_meta meta[2*NET_RX_RING_SIZE]; +-}; +- +-extern struct xen_netbk *xen_netbk; +-extern int xen_netbk_group_nr; +- +-#endif /* __NETIF__BACKEND__COMMON_H__ */ +diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c +deleted file mode 100644 +index c36db26..0000000 +--- a/drivers/xen/netback/interface.c ++++ /dev/null +@@ -1,465 +0,0 @@ +-/****************************************************************************** +- * arch/xen/drivers/netif/backend/interface.c +- * +- * Network-device interface management. +- * +- * Copyright (c) 2004-2005, Keir Fraser +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License version 2 +- * as published by the Free Software Foundation; or, when distributed +- * separately from the Linux kernel or incorporated into other +- * software packages, subject to the following license: +- * +- * Permission is hereby granted, free of charge, to any person obtaining a copy +- * of this source file (the "Software"), to deal in the Software without +- * restriction, including without limitation the rights to use, copy, modify, +- * merge, publish, distribute, sublicense, and/or sell copies of the Software, +- * and to permit persons to whom the Software is furnished to do so, subject to +- * the following conditions: +- * +- * The above copyright notice and this permission notice shall be included in +- * all copies or substantial portions of the Software. +- * +- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +- * IN THE SOFTWARE. +- */ +- +-#include "common.h" +- +-#include +-#include +- +-#include +-#include +- +-/* +- * Module parameter 'queue_length': +- * +- * Enables queuing in the network stack when a client has run out of receive +- * descriptors. +- */ +-static unsigned long netbk_queue_length = 32; +-module_param_named(queue_length, netbk_queue_length, ulong, 0644); +- +-static void netbk_add_netif(struct xen_netbk *netbk, int group_nr, +- struct xen_netif *netif) +-{ +- int i; +- int min_netfront_count; +- int min_group = 0; +- min_netfront_count = atomic_read(&netbk[0].netfront_count); +- for (i = 0; i < group_nr; i++) { +- int netfront_count = atomic_read(&netbk[i].netfront_count); +- if (netfront_count < min_netfront_count) { +- min_group = i; +- min_netfront_count = netfront_count; +- } +- } +- +- netif->group = min_group; +- atomic_inc(&netbk[netif->group].netfront_count); +-} +- +-static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif) +-{ +- atomic_dec(&netbk[netif->group].netfront_count); +-} +- +-static void __netif_up(struct xen_netif *netif) +-{ +- netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif); +- enable_irq(netif->irq); +- netif_schedule_work(netif); +-} +- +-static void __netif_down(struct xen_netif *netif) +-{ +- disable_irq(netif->irq); +- netif_deschedule_work(netif); +- netbk_remove_netif(xen_netbk, netif); +-} +- +-static int net_open(struct net_device *dev) +-{ +- struct xen_netif *netif = netdev_priv(dev); +- if (netback_carrier_ok(netif)) { +- __netif_up(netif); +- netif_start_queue(dev); +- } +- return 0; +-} +- +-static int net_close(struct net_device *dev) +-{ +- struct xen_netif *netif = netdev_priv(dev); +- if (netback_carrier_ok(netif)) +- __netif_down(netif); +- netif_stop_queue(dev); +- return 0; +-} +- +-static int netbk_change_mtu(struct net_device *dev, int mtu) +-{ +- int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; +- +- if (mtu > max) +- return -EINVAL; +- dev->mtu = mtu; +- return 0; +-} +- +-void netif_set_features(struct xen_netif *netif) +-{ +- struct net_device *dev = netif->dev; +- int features = dev->features; +- +- if (netif->can_sg) +- features |= NETIF_F_SG; +- if (netif->gso || netif->gso_prefix) +- features |= NETIF_F_TSO; +- if (netif->csum) +- features |= NETIF_F_IP_CSUM; +- +- features &= ~(netif->features_disabled); +- +- if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) +- dev->mtu = ETH_DATA_LEN; +- +- dev->features = features; +-} +- +-static int netbk_set_tx_csum(struct net_device *dev, u32 data) +-{ +- struct xen_netif *netif = netdev_priv(dev); +- if (data) { +- if (!netif->csum) +- return -ENOSYS; +- netif->features_disabled &= ~NETIF_F_IP_CSUM; +- } else { +- netif->features_disabled |= NETIF_F_IP_CSUM; +- } +- +- netif_set_features(netif); +- return 0; +-} +- +-static int netbk_set_sg(struct net_device *dev, u32 data) +-{ +- struct xen_netif *netif = netdev_priv(dev); +- if (data) { +- if (!netif->can_sg) +- return -ENOSYS; +- netif->features_disabled &= ~NETIF_F_SG; +- } else { +- netif->features_disabled |= NETIF_F_SG; +- } +- +- netif_set_features(netif); +- return 0; +-} +- +-static int netbk_set_tso(struct net_device *dev, u32 data) +-{ +- struct xen_netif *netif = netdev_priv(dev); +- if (data) { +- if (!netif->gso && !netif->gso_prefix) +- return -ENOSYS; +- netif->features_disabled &= ~NETIF_F_TSO; +- } else { +- netif->features_disabled |= NETIF_F_TSO; +- } +- +- netif_set_features(netif); +- return 0; +-} +- +-static void netbk_get_drvinfo(struct net_device *dev, +- struct ethtool_drvinfo *info) +-{ +- strcpy(info->driver, "netbk"); +- strcpy(info->bus_info, dev_name(dev->dev.parent)); +-} +- +-static const struct netif_stat { +- char name[ETH_GSTRING_LEN]; +- u16 offset; +-} netbk_stats[] = { +- { "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) }, +-}; +- +-static int netbk_get_sset_count(struct net_device *dev, int string_set) +-{ +- switch (string_set) { +- case ETH_SS_STATS: +- return ARRAY_SIZE(netbk_stats); +- default: +- return -EINVAL; +- } +-} +- +-static void netbk_get_ethtool_stats(struct net_device *dev, +- struct ethtool_stats *stats, u64 * data) +-{ +- void *netif = netdev_priv(dev); +- int i; +- +- for (i = 0; i < ARRAY_SIZE(netbk_stats); i++) +- data[i] = *(int *)(netif + netbk_stats[i].offset); +-} +- +-static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data) +-{ +- int i; +- +- switch (stringset) { +- case ETH_SS_STATS: +- for (i = 0; i < ARRAY_SIZE(netbk_stats); i++) +- memcpy(data + i * ETH_GSTRING_LEN, +- netbk_stats[i].name, ETH_GSTRING_LEN); +- break; +- } +-} +- +-static struct ethtool_ops network_ethtool_ops = { +- .get_drvinfo = netbk_get_drvinfo, +- +- .get_tx_csum = ethtool_op_get_tx_csum, +- .set_tx_csum = netbk_set_tx_csum, +- .get_sg = ethtool_op_get_sg, +- .set_sg = netbk_set_sg, +- .get_tso = ethtool_op_get_tso, +- .set_tso = netbk_set_tso, +- .get_link = ethtool_op_get_link, +- +- .get_sset_count = netbk_get_sset_count, +- .get_ethtool_stats = netbk_get_ethtool_stats, +- .get_strings = netbk_get_strings, +-}; +- +-static struct net_device_ops netback_ops = { +- .ndo_start_xmit = netif_be_start_xmit, +- .ndo_get_stats = netif_be_get_stats, +- .ndo_open = net_open, +- .ndo_stop = net_close, +- .ndo_change_mtu = netbk_change_mtu, +-}; +- +-struct xen_netif *netif_alloc(struct device *parent, domid_t domid, +- unsigned int handle) +-{ +- int err = 0; +- struct net_device *dev; +- struct xen_netif *netif; +- char name[IFNAMSIZ] = {}; +- +- snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle); +- dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup); +- if (dev == NULL) { +- pr_debug("Could not allocate netdev\n"); +- return ERR_PTR(-ENOMEM); +- } +- +- SET_NETDEV_DEV(dev, parent); +- +- netif = netdev_priv(dev); +- memset(netif, 0, sizeof(*netif)); +- netif->domid = domid; +- netif->group = -1; +- netif->handle = handle; +- netif->can_sg = 1; +- netif->csum = 1; +- atomic_set(&netif->refcnt, 1); +- init_waitqueue_head(&netif->waiting_to_free); +- netif->dev = dev; +- INIT_LIST_HEAD(&netif->list); +- +- netback_carrier_off(netif); +- +- netif->credit_bytes = netif->remaining_credit = ~0UL; +- netif->credit_usec = 0UL; +- init_timer(&netif->credit_timeout); +- /* Initialize 'expires' now: it's used to track the credit window. */ +- netif->credit_timeout.expires = jiffies; +- +- dev->netdev_ops = &netback_ops; +- netif_set_features(netif); +- SET_ETHTOOL_OPS(dev, &network_ethtool_ops); +- +- dev->tx_queue_len = netbk_queue_length; +- +- /* +- * Initialise a dummy MAC address. We choose the numerically +- * largest non-broadcast address to prevent the address getting +- * stolen by an Ethernet bridge for STP purposes. +- * (FE:FF:FF:FF:FF:FF) +- */ +- memset(dev->dev_addr, 0xFF, ETH_ALEN); +- dev->dev_addr[0] &= ~0x01; +- +- rtnl_lock(); +- err = register_netdevice(dev); +- rtnl_unlock(); +- if (err) { +- pr_debug("Could not register new net device %s: err=%d\n", +- dev->name, err); +- free_netdev(dev); +- return ERR_PTR(err); +- } +- +- pr_debug("Successfully created netif\n"); +- return netif; +-} +- +-static int map_frontend_pages(struct xen_netif *netif, +- grant_ref_t tx_ring_ref, +- grant_ref_t rx_ring_ref) +-{ +- struct gnttab_map_grant_ref op; +- +- gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr, +- GNTMAP_host_map, tx_ring_ref, netif->domid); +- +- if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) +- BUG(); +- +- if (op.status) { +- pr_debug("Gnttab failure mapping tx_ring_ref!\n"); +- return op.status; +- } +- +- netif->tx_shmem_ref = tx_ring_ref; +- netif->tx_shmem_handle = op.handle; +- +- gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr, +- GNTMAP_host_map, rx_ring_ref, netif->domid); +- +- if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) +- BUG(); +- +- if (op.status) { +- struct gnttab_unmap_grant_ref unop; +- +- gnttab_set_unmap_op(&unop, +- (unsigned long)netif->tx_comms_area->addr, +- GNTMAP_host_map, netif->tx_shmem_handle); +- HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1); +- pr_debug("Gnttab failure mapping rx_ring_ref!\n"); +- return op.status; +- } +- +- netif->rx_shmem_ref = rx_ring_ref; +- netif->rx_shmem_handle = op.handle; +- +- return 0; +-} +- +-static void unmap_frontend_pages(struct xen_netif *netif) +-{ +- struct gnttab_unmap_grant_ref op; +- +- gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr, +- GNTMAP_host_map, netif->tx_shmem_handle); +- +- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) +- BUG(); +- +- gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr, +- GNTMAP_host_map, netif->rx_shmem_handle); +- +- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) +- BUG(); +-} +- +-int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref, +- unsigned long rx_ring_ref, unsigned int evtchn) +-{ +- int err = -ENOMEM; +- struct xen_netif_tx_sring *txs; +- struct xen_netif_rx_sring *rxs; +- +- /* Already connected through? */ +- if (netif->irq) +- return 0; +- +- netif->tx_comms_area = alloc_vm_area(PAGE_SIZE); +- if (netif->tx_comms_area == NULL) +- return -ENOMEM; +- netif->rx_comms_area = alloc_vm_area(PAGE_SIZE); +- if (netif->rx_comms_area == NULL) +- goto err_rx; +- +- err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref); +- if (err) +- goto err_map; +- +- err = bind_interdomain_evtchn_to_irqhandler( +- netif->domid, evtchn, netif_be_int, 0, +- netif->dev->name, netif); +- if (err < 0) +- goto err_hypervisor; +- netif->irq = err; +- disable_irq(netif->irq); +- +- txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr; +- BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE); +- +- rxs = (struct xen_netif_rx_sring *) +- ((char *)netif->rx_comms_area->addr); +- BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE); +- +- netif->rx_req_cons_peek = 0; +- +- netif_get(netif); +- +- rtnl_lock(); +- netback_carrier_on(netif); +- if (netif_running(netif->dev)) +- __netif_up(netif); +- rtnl_unlock(); +- +- return 0; +-err_hypervisor: +- unmap_frontend_pages(netif); +-err_map: +- free_vm_area(netif->rx_comms_area); +-err_rx: +- free_vm_area(netif->tx_comms_area); +- return err; +-} +- +-void netif_disconnect(struct xen_netif *netif) +-{ +- if (netback_carrier_ok(netif)) { +- rtnl_lock(); +- netback_carrier_off(netif); +- netif_carrier_off(netif->dev); /* discard queued packets */ +- if (netif_running(netif->dev)) +- __netif_down(netif); +- rtnl_unlock(); +- netif_put(netif); +- } +- +- atomic_dec(&netif->refcnt); +- wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0); +- +- del_timer_sync(&netif->credit_timeout); +- +- if (netif->irq) +- unbind_from_irqhandler(netif->irq, netif); +- +- unregister_netdev(netif->dev); +- +- if (netif->tx.sring) { +- unmap_frontend_pages(netif); +- free_vm_area(netif->tx_comms_area); +- free_vm_area(netif->rx_comms_area); +- } +- +- free_netdev(netif->dev); +-} +diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c +deleted file mode 100644 +index e0ca232..0000000 +--- a/drivers/xen/netback/netback.c ++++ /dev/null +@@ -1,1909 +0,0 @@ +-/* +- * Back-end of the driver for virtual network devices. This portion of the +- * driver exports a 'unified' network-device interface that can be accessed +- * by any operating system that implements a compatible front end. A +- * reference front-end implementation can be found in: +- * drivers/net/xen-netfront.c +- * +- * Copyright (c) 2002-2005, K A Fraser +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License version 2 +- * as published by the Free Software Foundation; or, when distributed +- * separately from the Linux kernel or incorporated into other +- * software packages, subject to the following license: +- * +- * Permission is hereby granted, free of charge, to any person obtaining a copy +- * of this source file (the "Software"), to deal in the Software without +- * restriction, including without limitation the rights to use, copy, modify, +- * merge, publish, distribute, sublicense, and/or sell copies of the Software, +- * and to permit persons to whom the Software is furnished to do so, subject to +- * the following conditions: +- * +- * The above copyright notice and this permission notice shall be included in +- * all copies or substantial portions of the Software. +- * +- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +- * IN THE SOFTWARE. +- */ +- +-#include "common.h" +- +-#include +-#include +-#include +- +-#include +- +-#include +-#include +-#include +- +-#include +-#include +- +-/*define NETBE_DEBUG_INTERRUPT*/ +- +-struct xen_netbk *xen_netbk; +-int xen_netbk_group_nr; +- +-static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx); +-static void make_tx_response(struct xen_netif *netif, +- struct xen_netif_tx_request *txp, +- s8 st); +-static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif, +- u16 id, +- s8 st, +- u16 offset, +- u16 size, +- u16 flags); +- +-static void net_tx_action(unsigned long data); +- +-static void net_rx_action(unsigned long data); +- +-static inline unsigned long idx_to_pfn(struct xen_netbk *netbk, +- unsigned int idx) +-{ +- return page_to_pfn(netbk->mmap_pages[idx]); +-} +- +-static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk, +- unsigned int idx) +-{ +- return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx)); +-} +- +-/* extra field used in struct page */ +-static inline void netif_set_page_ext(struct page *pg, +- unsigned int group, unsigned int idx) +-{ +- union page_ext ext = { .e = { .group = group + 1, .idx = idx } }; +- +- BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping)); +- pg->mapping = ext.mapping; +-} +- +-static int netif_get_page_ext(struct page *pg, +- unsigned int *_group, unsigned int *_idx) +-{ +- union page_ext ext = { .mapping = pg->mapping }; +- struct xen_netbk *netbk; +- unsigned int group, idx; +- +- if (!PageForeign(pg)) +- return 0; +- +- group = ext.e.group - 1; +- +- if (group < 0 || group >= xen_netbk_group_nr) +- return 0; +- +- netbk = &xen_netbk[group]; +- +- if (netbk->mmap_pages == NULL) +- return 0; +- +- idx = ext.e.idx; +- +- if ((idx < 0) || (idx >= MAX_PENDING_REQS)) +- return 0; +- +- if (netbk->mmap_pages[idx] != pg) +- return 0; +- +- *_group = group; +- *_idx = idx; +- +- return 1; +-} +- +-/* +- * This is the amount of packet we copy rather than map, so that the +- * guest can't fiddle with the contents of the headers while we do +- * packet processing on them (netfilter, routing, etc). +- */ +-#define PKT_PROT_LEN (ETH_HLEN + \ +- VLAN_HLEN + \ +- sizeof(struct iphdr) + MAX_IPOPTLEN + \ +- sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) +- +-static inline pending_ring_idx_t pending_index(unsigned i) +-{ +- return i & (MAX_PENDING_REQS-1); +-} +- +-static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk) +-{ +- return MAX_PENDING_REQS - +- netbk->pending_prod + netbk->pending_cons; +-} +- +-/* Setting this allows the safe use of this driver without netloop. */ +-static int MODPARM_copy_skb = 1; +-module_param_named(copy_skb, MODPARM_copy_skb, bool, 0); +-MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop"); +- +-int netbk_copy_skb_mode; +- +-static int MODPARM_netback_kthread; +-module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0); +-MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet"); +- +-/* +- * Netback bottom half handler. +- * dir indicates the data direction. +- * rx: 1, tx: 0. +- */ +-static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir) +-{ +- if (MODPARM_netback_kthread) +- wake_up(&netbk->kthread.netbk_action_wq); +- else if (dir) +- tasklet_schedule(&netbk->tasklet.net_rx_tasklet); +- else +- tasklet_schedule(&netbk->tasklet.net_tx_tasklet); +-} +- +-static inline void maybe_schedule_tx_action(struct xen_netbk *netbk) +-{ +- smp_mb(); +- if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) && +- !list_empty(&netbk->net_schedule_list)) +- xen_netbk_bh_handler(netbk, 0); +-} +- +-static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) +-{ +- struct skb_shared_info *ninfo; +- struct sk_buff *nskb; +- unsigned long offset; +- int ret; +- int len; +- int headlen; +- +- BUG_ON(skb_shinfo(skb)->frag_list != NULL); +- +- nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN); +- if (unlikely(!nskb)) +- goto err; +- +- skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN); +- headlen = skb_end_pointer(nskb) - nskb->data; +- if (headlen > skb_headlen(skb)) +- headlen = skb_headlen(skb); +- ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); +- BUG_ON(ret); +- +- ninfo = skb_shinfo(nskb); +- ninfo->gso_size = skb_shinfo(skb)->gso_size; +- ninfo->gso_type = skb_shinfo(skb)->gso_type; +- +- offset = headlen; +- len = skb->len - headlen; +- +- nskb->len = skb->len; +- nskb->data_len = len; +- nskb->truesize += len; +- +- while (len) { +- struct page *page; +- int copy; +- int zero; +- +- if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) { +- dump_stack(); +- goto err_free; +- } +- +- copy = len >= PAGE_SIZE ? PAGE_SIZE : len; +- zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO; +- +- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero); +- if (unlikely(!page)) +- goto err_free; +- +- ret = skb_copy_bits(skb, offset, page_address(page), copy); +- BUG_ON(ret); +- +- ninfo->frags[ninfo->nr_frags].page = page; +- ninfo->frags[ninfo->nr_frags].page_offset = 0; +- ninfo->frags[ninfo->nr_frags].size = copy; +- ninfo->nr_frags++; +- +- offset += copy; +- len -= copy; +- } +- +-#ifdef NET_SKBUFF_DATA_USES_OFFSET +- offset = 0; +-#else +- offset = nskb->data - skb->data; +-#endif +- +- nskb->transport_header = skb->transport_header + offset; +- nskb->network_header = skb->network_header + offset; +- nskb->mac_header = skb->mac_header + offset; +- +- return nskb; +- +- err_free: +- kfree_skb(nskb); +- err: +- return NULL; +-} +- +-static inline int netbk_max_required_rx_slots(struct xen_netif *netif) +-{ +- if (netif->can_sg || netif->gso || netif->gso_prefix) +- return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */ +- return 1; /* all in one */ +-} +- +-static inline int netbk_queue_full(struct xen_netif *netif) +-{ +- RING_IDX peek = netif->rx_req_cons_peek; +- RING_IDX needed = netbk_max_required_rx_slots(netif); +- +- return ((netif->rx.sring->req_prod - peek) < needed) || +- ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed); +-} +- +-/* +- * Returns true if we should start a new receive buffer instead of +- * adding 'size' bytes to a buffer which currently contains 'offset' +- * bytes. +- */ +-static bool start_new_rx_buffer(int offset, unsigned long size, int head) +-{ +- /* simple case: we have completely filled the current buffer. */ +- if (offset == MAX_BUFFER_OFFSET) +- return true; +- +- /* +- * complex case: start a fresh buffer if the current frag +- * would overflow the current buffer but only if: +- * (i) this frag would fit completely in the next buffer +- * and (ii) there is already some data in the current buffer +- * and (iii) this is not the head buffer. +- * +- * Where: +- * - (i) stops us splitting a frag into two copies +- * unless the frag is too large for a single buffer. +- * - (ii) stops us from leaving a buffer pointlessly empty. +- * - (iii) stops us leaving the first buffer +- * empty. Strictly speaking this is already covered +- * by (ii) but is explicitly checked because +- * netfront relies on the first buffer being +- * non-empty and can crash otherwise. +- * +- * This means we will effectively linearise small +- * frags but do not needlessly split large buffers +- * into multiple copies tend to give large frags their +- * own buffers as before. +- */ +- if ((offset + size > MAX_BUFFER_OFFSET) && +- (size <= MAX_BUFFER_OFFSET) && offset && !head) +- return true; +- +- return false; +-} +- +-/* +- * Figure out how many ring slots we're going to need to send @skb to +- * the guest. This function is essentially a dry run of +- * netbk_gop_frag_copy. +- */ +-static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif) +-{ +- unsigned int count = 1; +- int i, copy_off = 0; +- +- BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET); +- +- copy_off = skb_headlen(skb); +- +- if (skb_shinfo(skb)->gso_size) +- count++; +- +- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { +- unsigned long size = skb_shinfo(skb)->frags[i].size; +- unsigned long bytes; +- while (size > 0) { +- BUG_ON(copy_off > MAX_BUFFER_OFFSET); +- +- if (start_new_rx_buffer(copy_off, size, 0)) { +- count++; +- copy_off = 0; +- } +- +- bytes = size; +- if (copy_off + bytes > MAX_BUFFER_OFFSET) +- bytes = MAX_BUFFER_OFFSET - copy_off; +- +- copy_off += bytes; +- size -= bytes; +- } +- } +- return count; +-} +- +-int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) +-{ +- struct xen_netif *netif = netdev_priv(dev); +- struct xen_netbk *netbk; +- +- BUG_ON(skb->dev != dev); +- +- if (netif->group == -1) +- goto drop; +- +- netbk = &xen_netbk[netif->group]; +- +- /* Drop the packet if the target domain has no receive buffers. */ +- if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif))) +- goto drop; +- +- /* +- * XXX For now we also copy skbuffs whose head crosses a page +- * boundary, because netbk_gop_skb can't handle them. +- */ +- if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) { +- struct sk_buff *nskb = netbk_copy_skb(skb); +- if (unlikely(nskb == NULL)) +- goto drop; +- /* Copy only the header fields we use in this driver. */ +- nskb->dev = skb->dev; +- nskb->ip_summed = skb->ip_summed; +- dev_kfree_skb(skb); +- skb = nskb; +- } +- +- /* Reserve ring slots for the worst-case number of fragments. */ +- netif->rx_req_cons_peek += count_skb_slots(skb, netif); +- netif_get(netif); +- +- if (netbk_can_queue(dev) && netbk_queue_full(netif)) { +- netif->rx.sring->req_event = netif->rx_req_cons_peek + +- netbk_max_required_rx_slots(netif); +- mb(); /* request notification /then/ check & stop the queue */ +- if (netbk_queue_full(netif)) +- netif_stop_queue(dev); +- } +- skb_queue_tail(&netbk->rx_queue, skb); +- +- xen_netbk_bh_handler(netbk, 1); +- +- return 0; +- +- drop: +- netif->stats.tx_dropped++; +- dev_kfree_skb(skb); +- return 0; +-} +- +-struct netrx_pending_operations { +- unsigned copy_prod, copy_cons; +- unsigned meta_prod, meta_cons; +- struct gnttab_copy *copy; +- struct netbk_rx_meta *meta; +- int copy_off; +- grant_ref_t copy_gref; +-}; +- +-static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif, +- struct netrx_pending_operations *npo) +-{ +- struct netbk_rx_meta *meta; +- struct xen_netif_rx_request *req; +- +- req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); +- +- meta = npo->meta + npo->meta_prod++; +- meta->gso_size = 0; +- meta->size = 0; +- meta->id = req->id; +- +- npo->copy_off = 0; +- npo->copy_gref = req->gref; +- +- return meta; +-} +- +-/* +- * Set up the grant operations for this fragment. If it's a flipping +- * interface, we also set up the unmap request from here. +- */ +-static void netbk_gop_frag_copy(struct xen_netif *netif, +- struct netrx_pending_operations *npo, +- struct page *page, unsigned long size, +- unsigned long offset, int head) +-{ +- struct gnttab_copy *copy_gop; +- struct netbk_rx_meta *meta; +- /* +- * These variables a used iff netif_get_page_ext returns true, +- * in which case they are guaranteed to be initialized. +- */ +- unsigned int uninitialized_var(group), uninitialized_var(idx); +- int foreign = netif_get_page_ext(page, &group, &idx); +- unsigned long bytes; +- +- /* Data must not cross a page boundary. */ +- BUG_ON(size + offset > PAGE_SIZE); +- +- meta = npo->meta + npo->meta_prod - 1; +- +- while (size > 0) { +- BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); +- +- if (start_new_rx_buffer(npo->copy_off, size, head)) { +- /* +- * Netfront requires there to be some data in +- * the head buffer. +- */ +- BUG_ON(head); +- +- meta = get_next_rx_buffer(netif, npo); +- } +- +- bytes = size; +- if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) +- bytes = MAX_BUFFER_OFFSET - npo->copy_off; +- +- copy_gop = npo->copy + npo->copy_prod++; +- copy_gop->flags = GNTCOPY_dest_gref; +- if (foreign) { +- struct xen_netbk *netbk = &xen_netbk[group]; +- struct pending_tx_info *src_pend; +- +- src_pend = &netbk->pending_tx_info[idx]; +- +- copy_gop->source.domid = src_pend->netif->domid; +- copy_gop->source.u.ref = src_pend->req.gref; +- copy_gop->flags |= GNTCOPY_source_gref; +- } else { +- void *vaddr = page_address(page); +- copy_gop->source.domid = DOMID_SELF; +- copy_gop->source.u.gmfn = virt_to_mfn(vaddr); +- } +- copy_gop->source.offset = offset; +- copy_gop->dest.domid = netif->domid; +- +- copy_gop->dest.offset = npo->copy_off; +- copy_gop->dest.u.ref = npo->copy_gref; +- copy_gop->len = bytes; +- +- npo->copy_off += bytes; +- meta->size += bytes; +- +- offset += bytes; +- size -= bytes; +- head = 0; /* There must be something in this buffer now. */ +- } +-} +- +-/* +- * Prepare an SKB to be transmitted to the frontend. +- * +- * This function is responsible for allocating grant operations, meta +- * structures, etc. +- * +- * It returns the number of meta structures consumed. The number of +- * ring slots used is always equal to the number of meta slots used +- * plus the number of GSO descriptors used. Currently, we use either +- * zero GSO descriptors (for non-GSO packets) or one descriptor (for +- * frontend-side LRO). +- */ +-static int netbk_gop_skb(struct sk_buff *skb, +- struct netrx_pending_operations *npo) +-{ +- struct xen_netif *netif = netdev_priv(skb->dev); +- int nr_frags = skb_shinfo(skb)->nr_frags; +- int i; +- struct xen_netif_rx_request *req; +- struct netbk_rx_meta *meta; +- int old_meta_prod; +- +- old_meta_prod = npo->meta_prod; +- +- /* Set up a GSO prefix descriptor, if necessary */ +- if (skb_shinfo(skb)->gso_size && netif->gso_prefix) { +- req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); +- meta = npo->meta + npo->meta_prod++; +- meta->gso_size = skb_shinfo(skb)->gso_size; +- meta->size = 0; +- meta->id = req->id; +- } +- +- req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); +- meta = npo->meta + npo->meta_prod++; +- +- if (!netif->gso_prefix) +- meta->gso_size = skb_shinfo(skb)->gso_size; +- else +- meta->gso_size = 0; +- +- meta->size = 0; +- meta->id = req->id; +- npo->copy_off = 0; +- npo->copy_gref = req->gref; +- +- netbk_gop_frag_copy(netif, +- npo, virt_to_page(skb->data), +- skb_headlen(skb), +- offset_in_page(skb->data), 1); +- +- /* Leave a gap for the GSO descriptor. */ +- if (skb_shinfo(skb)->gso_size && !netif->gso_prefix) +- netif->rx.req_cons++; +- +- for (i = 0; i < nr_frags; i++) { +- netbk_gop_frag_copy(netif, npo, +- skb_shinfo(skb)->frags[i].page, +- skb_shinfo(skb)->frags[i].size, +- skb_shinfo(skb)->frags[i].page_offset, +- 0); +- } +- +- return npo->meta_prod - old_meta_prod; +-} +- +-/* +- * This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was +- * used to set up the operations on the top of +- * netrx_pending_operations, which have since been done. Check that +- * they didn't give any errors and advance over them. +- */ +-static int netbk_check_gop(int nr_meta_slots, domid_t domid, +- struct netrx_pending_operations *npo) +-{ +- struct gnttab_copy *copy_op; +- int status = NETIF_RSP_OKAY; +- int i; +- +- for (i = 0; i < nr_meta_slots; i++) { +- copy_op = npo->copy + npo->copy_cons++; +- if (copy_op->status != GNTST_okay) { +- pr_debug("Bad status %d from copy to DOM%d.\n", +- copy_op->status, domid); +- status = NETIF_RSP_ERROR; +- } +- } +- +- return status; +-} +- +-static void netbk_add_frag_responses(struct xen_netif *netif, int status, +- struct netbk_rx_meta *meta, +- int nr_meta_slots) +-{ +- int i; +- unsigned long offset; +- +- /* No fragments used */ +- if (nr_meta_slots <= 1) +- return; +- +- nr_meta_slots--; +- +- for (i = 0; i < nr_meta_slots; i++) { +- int flags; +- if (i == nr_meta_slots - 1) +- flags = 0; +- else +- flags = NETRXF_more_data; +- +- offset = 0; +- make_rx_response(netif, meta[i].id, status, offset, +- meta[i].size, flags); +- } +-} +- +-struct skb_cb_overlay { +- int meta_slots_used; +-}; +- +-static void net_rx_action(unsigned long data) +-{ +- struct xen_netif *netif = NULL; +- struct xen_netbk *netbk = (struct xen_netbk *)data; +- s8 status; +- u16 irq, flags; +- struct xen_netif_rx_response *resp; +- struct sk_buff_head rxq; +- struct sk_buff *skb; +- int notify_nr = 0; +- int ret; +- int nr_frags; +- int count; +- unsigned long offset; +- struct skb_cb_overlay *sco; +- +- struct netrx_pending_operations npo = { +- .copy = netbk->grant_copy_op, +- .meta = netbk->meta, +- }; +- +- skb_queue_head_init(&rxq); +- +- count = 0; +- +- while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) { +- netif = netdev_priv(skb->dev); +- nr_frags = skb_shinfo(skb)->nr_frags; +- +- sco = (struct skb_cb_overlay *)skb->cb; +- sco->meta_slots_used = netbk_gop_skb(skb, &npo); +- +- count += nr_frags + 1; +- +- __skb_queue_tail(&rxq, skb); +- +- /* Filled the batch queue? */ +- if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE) +- break; +- } +- +- BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta)); +- +- if (!npo.copy_prod) +- return; +- +- BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op)); +- ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op, +- npo.copy_prod); +- BUG_ON(ret != 0); +- +- while ((skb = __skb_dequeue(&rxq)) != NULL) { +- sco = (struct skb_cb_overlay *)skb->cb; +- +- netif = netdev_priv(skb->dev); +- +- if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) { +- resp = RING_GET_RESPONSE(&netif->rx, +- netif->rx.rsp_prod_pvt++); +- +- resp->flags = NETRXF_gso_prefix | NETRXF_more_data; +- +- resp->offset = netbk->meta[npo.meta_cons].gso_size; +- resp->id = netbk->meta[npo.meta_cons].id; +- resp->status = sco->meta_slots_used; +- +- npo.meta_cons++; +- sco->meta_slots_used--; +- } +- +- +- netif->stats.tx_bytes += skb->len; +- netif->stats.tx_packets++; +- +- status = netbk_check_gop(sco->meta_slots_used, +- netif->domid, &npo); +- +- if (sco->meta_slots_used == 1) +- flags = 0; +- else +- flags = NETRXF_more_data; +- +- if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ +- flags |= NETRXF_csum_blank | NETRXF_data_validated; +- else if (skb->ip_summed == CHECKSUM_UNNECESSARY) +- /* remote but checksummed. */ +- flags |= NETRXF_data_validated; +- +- offset = 0; +- resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id, +- status, offset, +- netbk->meta[npo.meta_cons].size, +- flags); +- +- if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) { +- struct xen_netif_extra_info *gso = +- (struct xen_netif_extra_info *) +- RING_GET_RESPONSE(&netif->rx, +- netif->rx.rsp_prod_pvt++); +- +- resp->flags |= NETRXF_extra_info; +- +- gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size; +- gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; +- gso->u.gso.pad = 0; +- gso->u.gso.features = 0; +- +- gso->type = XEN_NETIF_EXTRA_TYPE_GSO; +- gso->flags = 0; +- } +- +- netbk_add_frag_responses(netif, status, +- netbk->meta + npo.meta_cons + 1, +- sco->meta_slots_used); +- +- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret); +- irq = netif->irq; +- if (ret && !netbk->rx_notify[irq]) { +- netbk->rx_notify[irq] = 1; +- netbk->notify_list[notify_nr++] = irq; +- } +- +- if (netif_queue_stopped(netif->dev) && +- netif_schedulable(netif) && +- !netbk_queue_full(netif)) +- netif_wake_queue(netif->dev); +- +- netif_put(netif); +- npo.meta_cons += sco->meta_slots_used; +- dev_kfree_skb(skb); +- } +- +- while (notify_nr != 0) { +- irq = netbk->notify_list[--notify_nr]; +- netbk->rx_notify[irq] = 0; +- notify_remote_via_irq(irq); +- } +- +- /* More work to do? */ +- if (!skb_queue_empty(&netbk->rx_queue) && +- !timer_pending(&netbk->net_timer)) +- xen_netbk_bh_handler(netbk, 1); +-} +- +-static void net_alarm(unsigned long data) +-{ +- struct xen_netbk *netbk = (struct xen_netbk *)data; +- xen_netbk_bh_handler(netbk, 1); +-} +- +-static void netbk_tx_pending_timeout(unsigned long data) +-{ +- struct xen_netbk *netbk = (struct xen_netbk *)data; +- xen_netbk_bh_handler(netbk, 0); +-} +- +-struct net_device_stats *netif_be_get_stats(struct net_device *dev) +-{ +- struct xen_netif *netif = netdev_priv(dev); +- return &netif->stats; +-} +- +-static int __on_net_schedule_list(struct xen_netif *netif) +-{ +- return !list_empty(&netif->list); +-} +- +-/* Must be called with net_schedule_list_lock held */ +-static void remove_from_net_schedule_list(struct xen_netif *netif) +-{ +- if (likely(__on_net_schedule_list(netif))) { +- list_del_init(&netif->list); +- netif_put(netif); +- } +-} +- +-static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk) +-{ +- struct xen_netif *netif = NULL; +- +- spin_lock_irq(&netbk->net_schedule_list_lock); +- if (list_empty(&netbk->net_schedule_list)) +- goto out; +- +- netif = list_first_entry(&netbk->net_schedule_list, +- struct xen_netif, list); +- if (!netif) +- goto out; +- +- netif_get(netif); +- +- remove_from_net_schedule_list(netif); +-out: +- spin_unlock_irq(&netbk->net_schedule_list_lock); +- return netif; +-} +- +-static void add_to_net_schedule_list_tail(struct xen_netif *netif) +-{ +- unsigned long flags; +- +- struct xen_netbk *netbk = &xen_netbk[netif->group]; +- if (__on_net_schedule_list(netif)) +- return; +- +- spin_lock_irqsave(&netbk->net_schedule_list_lock, flags); +- if (!__on_net_schedule_list(netif) && +- likely(netif_schedulable(netif))) { +- list_add_tail(&netif->list, &netbk->net_schedule_list); +- netif_get(netif); +- } +- spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags); +-} +- +-void netif_schedule_work(struct xen_netif *netif) +-{ +- struct xen_netbk *netbk = &xen_netbk[netif->group]; +- int more_to_do; +- +- RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); +- +- if (more_to_do) { +- add_to_net_schedule_list_tail(netif); +- maybe_schedule_tx_action(netbk); +- } +-} +- +-void netif_deschedule_work(struct xen_netif *netif) +-{ +- struct xen_netbk *netbk = &xen_netbk[netif->group]; +- spin_lock_irq(&netbk->net_schedule_list_lock); +- remove_from_net_schedule_list(netif); +- spin_unlock_irq(&netbk->net_schedule_list_lock); +-} +- +- +-static void tx_add_credit(struct xen_netif *netif) +-{ +- unsigned long max_burst, max_credit; +- +- /* +- * Allow a burst big enough to transmit a jumbo packet of up to 128kB. +- * Otherwise the interface can seize up due to insufficient credit. +- */ +- max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size; +- max_burst = min(max_burst, 131072UL); +- max_burst = max(max_burst, netif->credit_bytes); +- +- /* Take care that adding a new chunk of credit doesn't wrap to zero. */ +- max_credit = netif->remaining_credit + netif->credit_bytes; +- if (max_credit < netif->remaining_credit) +- max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ +- +- netif->remaining_credit = min(max_credit, max_burst); +-} +- +-static void tx_credit_callback(unsigned long data) +-{ +- struct xen_netif *netif = (struct xen_netif *)data; +- tx_add_credit(netif); +- netif_schedule_work(netif); +-} +- +-static inline int copy_pending_req(struct xen_netbk *netbk, +- pending_ring_idx_t pending_idx) +-{ +- return gnttab_copy_grant_page( +- netbk->grant_tx_handle[pending_idx], +- &netbk->mmap_pages[pending_idx]); +-} +- +-static inline void net_tx_action_dealloc(struct xen_netbk *netbk) +-{ +- struct netbk_tx_pending_inuse *inuse, *n; +- struct gnttab_unmap_grant_ref *gop; +- u16 pending_idx; +- pending_ring_idx_t dc, dp; +- struct xen_netif *netif; +- int ret; +- LIST_HEAD(list); +- +- dc = netbk->dealloc_cons; +- gop = netbk->tx_unmap_ops; +- +- /* Free up any grants we have finished using. */ +- do { +- dp = netbk->dealloc_prod; +- +- /* Ensure we see all indices enqueued by netif_idx_release(). */ +- smp_rmb(); +- +- while (dc != dp) { +- unsigned long pfn; +- struct netbk_tx_pending_inuse *pending_inuse = +- netbk->pending_inuse; +- +- pending_idx = netbk->dealloc_ring[pending_index(dc++)]; +- list_move_tail(&pending_inuse[pending_idx].list, &list); +- +- pfn = idx_to_pfn(netbk, pending_idx); +- /* Already unmapped? */ +- if (!phys_to_machine_mapping_valid(pfn)) +- continue; +- +- gnttab_set_unmap_op(gop, +- idx_to_kaddr(netbk, pending_idx), +- GNTMAP_host_map, +- netbk->grant_tx_handle[pending_idx]); +- gop++; +- } +- +- } while (dp != netbk->dealloc_prod); +- +- netbk->dealloc_cons = dc; +- +- ret = HYPERVISOR_grant_table_op( +- GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops, +- gop - netbk->tx_unmap_ops); +- BUG_ON(ret); +- +- /* +- * Copy any entries that have been pending for too long +- */ +- if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && +- !list_empty(&netbk->pending_inuse_head)) { +- list_for_each_entry_safe(inuse, n, +- &netbk->pending_inuse_head, list) { +- struct pending_tx_info *pending_tx_info; +- pending_tx_info = netbk->pending_tx_info; +- +- if (time_after(inuse->alloc_time + HZ / 2, jiffies)) +- break; +- +- pending_idx = inuse - netbk->pending_inuse; +- +- pending_tx_info[pending_idx].netif->nr_copied_skbs++; +- +- switch (copy_pending_req(netbk, pending_idx)) { +- case 0: +- list_move_tail(&inuse->list, &list); +- continue; +- case -EBUSY: +- list_del_init(&inuse->list); +- continue; +- case -ENOENT: +- continue; +- } +- +- break; +- } +- } +- +- list_for_each_entry_safe(inuse, n, &list, list) { +- struct pending_tx_info *pending_tx_info; +- pending_ring_idx_t index; +- +- pending_tx_info = netbk->pending_tx_info; +- pending_idx = inuse - netbk->pending_inuse; +- +- netif = pending_tx_info[pending_idx].netif; +- +- make_tx_response(netif, &pending_tx_info[pending_idx].req, +- NETIF_RSP_OKAY); +- +- /* Ready for next use. */ +- gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]); +- +- index = pending_index(netbk->pending_prod++); +- netbk->pending_ring[index] = pending_idx; +- +- netif_put(netif); +- +- list_del_init(&inuse->list); +- } +-} +- +-static void netbk_tx_err(struct xen_netif *netif, +- struct xen_netif_tx_request *txp, RING_IDX end) +-{ +- RING_IDX cons = netif->tx.req_cons; +- +- do { +- make_tx_response(netif, txp, NETIF_RSP_ERROR); +- if (cons >= end) +- break; +- txp = RING_GET_REQUEST(&netif->tx, cons++); +- } while (1); +- netif->tx.req_cons = cons; +- netif_schedule_work(netif); +- netif_put(netif); +-} +- +-static int netbk_count_requests(struct xen_netif *netif, +- struct xen_netif_tx_request *first, +- struct xen_netif_tx_request *txp, +- int work_to_do) +-{ +- RING_IDX cons = netif->tx.req_cons; +- int frags = 0; +- +- if (!(first->flags & NETTXF_more_data)) +- return 0; +- +- do { +- if (frags >= work_to_do) { +- DPRINTK("Need more frags\n"); +- return -frags; +- } +- +- if (unlikely(frags >= MAX_SKB_FRAGS)) { +- DPRINTK("Too many frags\n"); +- return -frags; +- } +- +- memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags), +- sizeof(*txp)); +- if (txp->size > first->size) { +- DPRINTK("Frags galore\n"); +- return -frags; +- } +- +- first->size -= txp->size; +- frags++; +- +- if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { +- DPRINTK("txp->offset: %x, size: %u\n", +- txp->offset, txp->size); +- return -frags; +- } +- } while ((txp++)->flags & NETTXF_more_data); +- +- return frags; +-} +- +-static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk, +- struct xen_netif *netif, +- struct sk_buff *skb, +- struct xen_netif_tx_request *txp, +- struct gnttab_map_grant_ref *mop) +-{ +- struct skb_shared_info *shinfo = skb_shinfo(skb); +- skb_frag_t *frags = shinfo->frags; +- unsigned long pending_idx = *((u16 *)skb->data); +- int i, start; +- +- /* Skip first skb fragment if it is on same page as header fragment. */ +- start = ((unsigned long)shinfo->frags[0].page == pending_idx); +- +- for (i = start; i < shinfo->nr_frags; i++, txp++) { +- pending_ring_idx_t index; +- struct pending_tx_info *pending_tx_info = +- netbk->pending_tx_info; +- +- index = pending_index(netbk->pending_cons++); +- pending_idx = netbk->pending_ring[index]; +- +- gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx), +- GNTMAP_host_map | GNTMAP_readonly, +- txp->gref, netif->domid); +- +- memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); +- netif_get(netif); +- pending_tx_info[pending_idx].netif = netif; +- frags[i].page = (void *)pending_idx; +- } +- +- return mop; +-} +- +-static int netbk_tx_check_mop(struct xen_netbk *netbk, +- struct sk_buff *skb, +- struct gnttab_map_grant_ref **mopp) +-{ +- struct gnttab_map_grant_ref *mop = *mopp; +- int pending_idx = *((u16 *)skb->data); +- struct pending_tx_info *pending_tx_info = netbk->pending_tx_info; +- struct xen_netif *netif = pending_tx_info[pending_idx].netif; +- struct xen_netif_tx_request *txp; +- struct skb_shared_info *shinfo = skb_shinfo(skb); +- int nr_frags = shinfo->nr_frags; +- int i, err, start; +- +- /* Check status of header. */ +- err = mop->status; +- if (unlikely(err)) { +- pending_ring_idx_t index; +- index = pending_index(netbk->pending_prod++); +- txp = &pending_tx_info[pending_idx].req; +- make_tx_response(netif, txp, NETIF_RSP_ERROR); +- netbk->pending_ring[index] = pending_idx; +- netif_put(netif); +- } else { +- set_phys_to_machine( +- __pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT, +- FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); +- netbk->grant_tx_handle[pending_idx] = mop->handle; +- } +- +- /* Skip first skb fragment if it is on same page as header fragment. */ +- start = ((unsigned long)shinfo->frags[0].page == pending_idx); +- +- for (i = start; i < nr_frags; i++) { +- int j, newerr; +- pending_ring_idx_t index; +- +- pending_idx = (unsigned long)shinfo->frags[i].page; +- +- /* Check error status: if okay then remember grant handle. */ +- newerr = (++mop)->status; +- if (likely(!newerr)) { +- unsigned long addr; +- addr = idx_to_kaddr(netbk, pending_idx); +- set_phys_to_machine( +- __pa(addr)>>PAGE_SHIFT, +- FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT)); +- netbk->grant_tx_handle[pending_idx] = mop->handle; +- /* Had a previous error? Invalidate this fragment. */ +- if (unlikely(err)) +- netif_idx_release(netbk, pending_idx); +- continue; +- } +- +- /* Error on this fragment: respond to client with an error. */ +- txp = &netbk->pending_tx_info[pending_idx].req; +- make_tx_response(netif, txp, NETIF_RSP_ERROR); +- index = pending_index(netbk->pending_prod++); +- netbk->pending_ring[index] = pending_idx; +- netif_put(netif); +- +- /* Not the first error? Preceding frags already invalidated. */ +- if (err) +- continue; +- +- /* First error: invalidate header and preceding fragments. */ +- pending_idx = *((u16 *)skb->data); +- netif_idx_release(netbk, pending_idx); +- for (j = start; j < i; j++) { +- pending_idx = (unsigned long)shinfo->frags[i].page; +- netif_idx_release(netbk, pending_idx); +- } +- +- /* Remember the error: invalidate all subsequent fragments. */ +- err = newerr; +- } +- +- *mopp = mop + 1; +- return err; +-} +- +-static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb) +-{ +- struct skb_shared_info *shinfo = skb_shinfo(skb); +- int nr_frags = shinfo->nr_frags; +- int i; +- +- for (i = 0; i < nr_frags; i++) { +- skb_frag_t *frag = shinfo->frags + i; +- struct xen_netif_tx_request *txp; +- unsigned long pending_idx; +- +- pending_idx = (unsigned long)frag->page; +- +- netbk->pending_inuse[pending_idx].alloc_time = jiffies; +- list_add_tail(&netbk->pending_inuse[pending_idx].list, +- &netbk->pending_inuse_head); +- +- txp = &netbk->pending_tx_info[pending_idx].req; +- frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx)); +- frag->size = txp->size; +- frag->page_offset = txp->offset; +- +- skb->len += txp->size; +- skb->data_len += txp->size; +- skb->truesize += txp->size; +- } +-} +- +-int netbk_get_extras(struct xen_netif *netif, +- struct xen_netif_extra_info *extras, +- int work_to_do) +-{ +- struct xen_netif_extra_info extra; +- RING_IDX cons = netif->tx.req_cons; +- +- do { +- if (unlikely(work_to_do-- <= 0)) { +- pr_debug("Missing extra info\n"); +- return -EBADR; +- } +- +- memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons), +- sizeof(extra)); +- if (unlikely(!extra.type || +- extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { +- netif->tx.req_cons = ++cons; +- pr_debug("Invalid extra type: %d\n", extra.type); +- return -EINVAL; +- } +- +- memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); +- netif->tx.req_cons = ++cons; +- } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); +- +- return work_to_do; +-} +- +-static int netbk_set_skb_gso(struct sk_buff *skb, +- struct xen_netif_extra_info *gso) +-{ +- if (!gso->u.gso.size) { +- pr_debug("GSO size must not be zero.\n"); +- return -EINVAL; +- } +- +- /* Currently only TCPv4 S.O. is supported. */ +- if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { +- pr_debug("Bad GSO type %d.\n", gso->u.gso.type); +- return -EINVAL; +- } +- +- skb_shinfo(skb)->gso_size = gso->u.gso.size; +- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; +- +- /* Header must be checked, and gso_segs computed. */ +- skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; +- skb_shinfo(skb)->gso_segs = 0; +- +- return 0; +-} +- +-static int skb_checksum_setup(struct sk_buff *skb) +-{ +- struct iphdr *iph; +- unsigned char *th; +- int err = -EPROTO; +- +- if (skb->protocol != htons(ETH_P_IP)) +- goto out; +- +- iph = (void *)skb->data; +- th = skb->data + 4 * iph->ihl; +- if (th >= skb_tail_pointer(skb)) +- goto out; +- +- skb->csum_start = th - skb->head; +- switch (iph->protocol) { +- case IPPROTO_TCP: +- skb->csum_offset = offsetof(struct tcphdr, check); +- break; +- case IPPROTO_UDP: +- skb->csum_offset = offsetof(struct udphdr, check); +- break; +- default: +- if (net_ratelimit()) +- printk(KERN_ERR "Attempting to checksum a non-" +- "TCP/UDP packet, dropping a protocol" +- " %d packet", iph->protocol); +- goto out; +- } +- +- if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) +- goto out; +- +- err = 0; +- +-out: +- return err; +-} +- +-static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size) +-{ +- unsigned long now = jiffies; +- unsigned long next_credit = +- netif->credit_timeout.expires + +- msecs_to_jiffies(netif->credit_usec / 1000); +- +- /* Timer could already be pending in rare cases. */ +- if (timer_pending(&netif->credit_timeout)) +- return true; +- +- /* Passed the point where we can replenish credit? */ +- if (time_after_eq(now, next_credit)) { +- netif->credit_timeout.expires = now; +- tx_add_credit(netif); +- } +- +- /* Still too big to send right now? Set a callback. */ +- if (size > netif->remaining_credit) { +- netif->credit_timeout.data = +- (unsigned long)netif; +- netif->credit_timeout.function = +- tx_credit_callback; +- mod_timer(&netif->credit_timeout, +- next_credit); +- +- return true; +- } +- +- return false; +-} +- +-static unsigned net_tx_build_mops(struct xen_netbk *netbk) +-{ +- struct gnttab_map_grant_ref *mop; +- struct sk_buff *skb; +- int ret; +- +- mop = netbk->tx_map_ops; +- while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && +- !list_empty(&netbk->net_schedule_list)) { +- struct xen_netif *netif; +- struct xen_netif_tx_request txreq; +- struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; +- struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; +- u16 pending_idx; +- RING_IDX idx; +- int work_to_do; +- unsigned int data_len; +- pending_ring_idx_t index; +- +- /* Get a netif from the list with work to do. */ +- netif = poll_net_schedule_list(netbk); +- if (!netif) +- continue; +- +- RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do); +- if (!work_to_do) { +- netif_put(netif); +- continue; +- } +- +- idx = netif->tx.req_cons; +- rmb(); /* Ensure that we see the request before we copy it. */ +- memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq)); +- +- /* Credit-based scheduling. */ +- if (txreq.size > netif->remaining_credit && +- tx_credit_exceeded(netif, txreq.size)) { +- netif_put(netif); +- continue; +- } +- +- netif->remaining_credit -= txreq.size; +- +- work_to_do--; +- netif->tx.req_cons = ++idx; +- +- memset(extras, 0, sizeof(extras)); +- if (txreq.flags & NETTXF_extra_info) { +- work_to_do = netbk_get_extras(netif, extras, +- work_to_do); +- idx = netif->tx.req_cons; +- if (unlikely(work_to_do < 0)) { +- netbk_tx_err(netif, &txreq, idx); +- continue; +- } +- } +- +- ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do); +- if (unlikely(ret < 0)) { +- netbk_tx_err(netif, &txreq, idx - ret); +- continue; +- } +- idx += ret; +- +- if (unlikely(txreq.size < ETH_HLEN)) { +- pr_debug("Bad packet size: %d\n", txreq.size); +- netbk_tx_err(netif, &txreq, idx); +- continue; +- } +- +- /* No crossing a page as the payload mustn't fragment. */ +- if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { +- pr_debug("txreq.offset: %x, size: %u, end: %lu\n", +- txreq.offset, txreq.size, +- (txreq.offset&~PAGE_MASK) + txreq.size); +- netbk_tx_err(netif, &txreq, idx); +- continue; +- } +- +- index = pending_index(netbk->pending_cons); +- pending_idx = netbk->pending_ring[index]; +- +- data_len = (txreq.size > PKT_PROT_LEN && +- ret < MAX_SKB_FRAGS) ? +- PKT_PROT_LEN : txreq.size; +- +- skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, +- GFP_ATOMIC | __GFP_NOWARN); +- if (unlikely(skb == NULL)) { +- pr_debug("Can't allocate a skb in start_xmit.\n"); +- netbk_tx_err(netif, &txreq, idx); +- break; +- } +- +- /* Packets passed to netif_rx() must have some headroom. */ +- skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); +- +- if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { +- struct xen_netif_extra_info *gso; +- gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; +- +- if (netbk_set_skb_gso(skb, gso)) { +- kfree_skb(skb); +- netbk_tx_err(netif, &txreq, idx); +- continue; +- } +- } +- +- gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx), +- GNTMAP_host_map | GNTMAP_readonly, +- txreq.gref, netif->domid); +- mop++; +- +- memcpy(&netbk->pending_tx_info[pending_idx].req, +- &txreq, sizeof(txreq)); +- netbk->pending_tx_info[pending_idx].netif = netif; +- *((u16 *)skb->data) = pending_idx; +- +- __skb_put(skb, data_len); +- +- skb_shinfo(skb)->nr_frags = ret; +- if (data_len < txreq.size) { +- skb_shinfo(skb)->nr_frags++; +- skb_shinfo(skb)->frags[0].page = +- (void *)(unsigned long)pending_idx; +- } else { +- /* Discriminate from any valid pending_idx value. */ +- skb_shinfo(skb)->frags[0].page = (void *)~0UL; +- } +- +- __skb_queue_tail(&netbk->tx_queue, skb); +- +- netbk->pending_cons++; +- +- mop = netbk_get_requests(netbk, netif, skb, txfrags, mop); +- +- netif->tx.req_cons = idx; +- netif_schedule_work(netif); +- +- if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops)) +- break; +- } +- +- return mop - netbk->tx_map_ops; +-} +- +-static void net_tx_submit(struct xen_netbk *netbk) +-{ +- struct gnttab_map_grant_ref *mop; +- struct sk_buff *skb; +- +- mop = netbk->tx_map_ops; +- while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) { +- struct xen_netif_tx_request *txp; +- struct xen_netif *netif; +- u16 pending_idx; +- unsigned data_len; +- +- pending_idx = *((u16 *)skb->data); +- netif = netbk->pending_tx_info[pending_idx].netif; +- txp = &netbk->pending_tx_info[pending_idx].req; +- +- /* Check the remap error code. */ +- if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) { +- pr_debug("netback grant failed.\n"); +- skb_shinfo(skb)->nr_frags = 0; +- kfree_skb(skb); +- continue; +- } +- +- data_len = skb->len; +- memcpy(skb->data, +- (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset), +- data_len); +- if (data_len < txp->size) { +- /* Append the packet payload as a fragment. */ +- txp->offset += data_len; +- txp->size -= data_len; +- } else { +- /* Schedule a response immediately. */ +- netif_idx_release(netbk, pending_idx); +- } +- +- if (txp->flags & NETTXF_csum_blank) +- skb->ip_summed = CHECKSUM_PARTIAL; +- else if (txp->flags & NETTXF_data_validated) +- skb->ip_summed = CHECKSUM_UNNECESSARY; +- +- netbk_fill_frags(netbk, skb); +- +- /* +- * If the initial fragment was < PKT_PROT_LEN then +- * pull through some bytes from the other fragments to +- * increase the linear region to PKT_PROT_LEN bytes. +- */ +- if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) { +- int target = min_t(int, skb->len, PKT_PROT_LEN); +- __pskb_pull_tail(skb, target - skb_headlen(skb)); +- } +- +- skb->dev = netif->dev; +- skb->protocol = eth_type_trans(skb, skb->dev); +- +- if (skb->ip_summed == CHECKSUM_PARTIAL) { +- if (skb_checksum_setup(skb)) { +- pr_debug("skb_checksum_setup failed\n"); +- kfree_skb(skb); +- continue; +- } +- } else if (skb_is_gso(skb)) { +- pr_debug("GSO SKB checksum is not partial\n"); +- kfree_skb(skb); +- continue; +- } +- +- if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) && +- unlikely(skb_linearize(skb))) { +- DPRINTK("Can't linearize skb in net_tx_action.\n"); +- kfree_skb(skb); +- continue; +- } +- +- netif->stats.rx_bytes += skb->len; +- netif->stats.rx_packets++; +- +- netif_rx_ni(skb); +- netif->dev->last_rx = jiffies; +- } +-} +- +-/* Called after netfront has transmitted */ +-static void net_tx_action(unsigned long data) +-{ +- struct xen_netbk *netbk = (struct xen_netbk *)data; +- unsigned nr_mops; +- int ret; +- +- net_tx_action_dealloc(netbk); +- +- nr_mops = net_tx_build_mops(netbk); +- +- if (nr_mops == 0) +- goto out; +- +- ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, +- netbk->tx_map_ops, nr_mops); +- BUG_ON(ret); +- +- net_tx_submit(netbk); +-out: +- if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && +- !list_empty(&netbk->pending_inuse_head)) { +- struct netbk_tx_pending_inuse *oldest; +- +- oldest = list_entry(netbk->pending_inuse_head.next, +- struct netbk_tx_pending_inuse, list); +- mod_timer(&netbk->netbk_tx_pending_timer, +- oldest->alloc_time + HZ); +- } +-} +- +-static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx) +-{ +- static DEFINE_SPINLOCK(_lock); +- unsigned long flags; +- pending_ring_idx_t index; +- +- spin_lock_irqsave(&_lock, flags); +- index = pending_index(netbk->dealloc_prod); +- netbk->dealloc_ring[index] = pending_idx; +- /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */ +- smp_wmb(); +- netbk->dealloc_prod++; +- spin_unlock_irqrestore(&_lock, flags); +- +- xen_netbk_bh_handler(netbk, 0); +-} +- +-static void netif_page_release(struct page *page, unsigned int order) +-{ +- unsigned int group, idx; +- int foreign = netif_get_page_ext(page, &group, &idx); +- +- BUG_ON(!foreign); +- BUG_ON(order); +- +- netif_idx_release(&xen_netbk[group], idx); +-} +- +-irqreturn_t netif_be_int(int irq, void *dev_id) +-{ +- struct xen_netif *netif = dev_id; +- struct xen_netbk *netbk; +- +- if (netif->group == -1) +- return IRQ_NONE; +- +- netbk = &xen_netbk[netif->group]; +- +- add_to_net_schedule_list_tail(netif); +- maybe_schedule_tx_action(netbk); +- +- if (netif_schedulable(netif) && !netbk_queue_full(netif)) +- netif_wake_queue(netif->dev); +- +- return IRQ_HANDLED; +-} +- +-static void make_tx_response(struct xen_netif *netif, +- struct xen_netif_tx_request *txp, +- s8 st) +-{ +- RING_IDX i = netif->tx.rsp_prod_pvt; +- struct xen_netif_tx_response *resp; +- int notify; +- +- resp = RING_GET_RESPONSE(&netif->tx, i); +- resp->id = txp->id; +- resp->status = st; +- +- if (txp->flags & NETTXF_extra_info) +- RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL; +- +- netif->tx.rsp_prod_pvt = ++i; +- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify); +- if (notify) +- notify_remote_via_irq(netif->irq); +-} +- +-static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif, +- u16 id, +- s8 st, +- u16 offset, +- u16 size, +- u16 flags) +-{ +- RING_IDX i = netif->rx.rsp_prod_pvt; +- struct xen_netif_rx_response *resp; +- +- resp = RING_GET_RESPONSE(&netif->rx, i); +- resp->offset = offset; +- resp->flags = flags; +- resp->id = id; +- resp->status = (s16)size; +- if (st < 0) +- resp->status = (s16)st; +- +- netif->rx.rsp_prod_pvt = ++i; +- +- return resp; +-} +- +-#ifdef NETBE_DEBUG_INTERRUPT +-static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs) +-{ +- struct list_head *ent; +- struct xen_netif *netif; +- int i = 0; +- int group = 0; +- +- printk(KERN_ALERT "netif_schedule_list:\n"); +- +- for (group = 0; group < xen_netbk_group_nr; group++) { +- struct xen_netbk *netbk = &xen_netbk[group]; +- spin_lock_irq(&netbk->net_schedule_list_lock); +- printk(KERN_ALERT "xen_netback group number: %d\n", group); +- list_for_each(ent, &netbk->net_schedule_list) { +- netif = list_entry(ent, struct xen_netif, list); +- printk(KERN_ALERT " %d: private(rx_req_cons=%08x " +- "rx_resp_prod=%08x\n", +- i, netif->rx.req_cons, netif->rx.rsp_prod_pvt); +- printk(KERN_ALERT +- " tx_req_cons=%08x, tx_resp_prod=%08x)\n", +- netif->tx.req_cons, netif->tx.rsp_prod_pvt); +- printk(KERN_ALERT +- " shared(rx_req_prod=%08x " +- "rx_resp_prod=%08x\n", +- netif->rx.sring->req_prod, +- netif->rx.sring->rsp_prod); +- printk(KERN_ALERT +- " rx_event=%08x, tx_req_prod=%08x\n", +- netif->rx.sring->rsp_event, +- netif->tx.sring->req_prod); +- printk(KERN_ALERT +- " tx_resp_prod=%08x, tx_event=%08x)\n", +- netif->tx.sring->rsp_prod, +- netif->tx.sring->rsp_event); +- i++; +- } +- spin_unlock_irq(&netbk->net_schedule_list_lock); +- } +- +- printk(KERN_ALERT " ** End of netif_schedule_list **\n"); +- +- return IRQ_HANDLED; +-} +-#endif +- +-static inline int rx_work_todo(struct xen_netbk *netbk) +-{ +- return !skb_queue_empty(&netbk->rx_queue); +-} +- +-static inline int tx_work_todo(struct xen_netbk *netbk) +-{ +- if (netbk->dealloc_cons != netbk->dealloc_prod) +- return 1; +- +- if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && +- !list_empty(&netbk->pending_inuse_head)) +- return 1; +- +- if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && +- !list_empty(&netbk->net_schedule_list)) +- return 1; +- +- return 0; +-} +- +-static int netbk_action_thread(void *data) +-{ +- struct xen_netbk *netbk = (struct xen_netbk *)data; +- while (!kthread_should_stop()) { +- wait_event_interruptible(netbk->kthread.netbk_action_wq, +- rx_work_todo(netbk) +- || tx_work_todo(netbk) +- || kthread_should_stop()); +- cond_resched(); +- +- if (kthread_should_stop()) +- break; +- +- if (rx_work_todo(netbk)) +- net_rx_action((unsigned long)netbk); +- +- if (tx_work_todo(netbk)) +- net_tx_action((unsigned long)netbk); +- } +- +- return 0; +-} +- +-static int __init netback_init(void) +-{ +- int i; +- struct page *page; +- int rc = 0; +- int group; +- +- if (!xen_pv_domain()) +- return -ENODEV; +- +- xen_netbk_group_nr = num_online_cpus(); +- xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr); +- if (!xen_netbk) { +- printk(KERN_ALERT "%s: out of memory\n", __func__); +- return -ENOMEM; +- } +- memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr); +- +- for (group = 0; group < xen_netbk_group_nr; group++) { +- struct xen_netbk *netbk = &xen_netbk[group]; +- skb_queue_head_init(&netbk->rx_queue); +- skb_queue_head_init(&netbk->tx_queue); +- +- init_timer(&netbk->net_timer); +- netbk->net_timer.data = (unsigned long)netbk; +- netbk->net_timer.function = net_alarm; +- +- init_timer(&netbk->netbk_tx_pending_timer); +- netbk->netbk_tx_pending_timer.data = (unsigned long)netbk; +- netbk->netbk_tx_pending_timer.function = +- netbk_tx_pending_timeout; +- +- netbk->mmap_pages = +- alloc_empty_pages_and_pagevec(MAX_PENDING_REQS); +- if (!netbk->mmap_pages) { +- printk(KERN_ALERT "%s: out of memory\n", __func__); +- del_timer(&netbk->netbk_tx_pending_timer); +- del_timer(&netbk->net_timer); +- rc = -ENOMEM; +- goto failed_init; +- } +- +- for (i = 0; i < MAX_PENDING_REQS; i++) { +- page = netbk->mmap_pages[i]; +- SetPageForeign(page, netif_page_release); +- netif_set_page_ext(page, group, i); +- INIT_LIST_HEAD(&netbk->pending_inuse[i].list); +- } +- +- netbk->pending_cons = 0; +- netbk->pending_prod = MAX_PENDING_REQS; +- for (i = 0; i < MAX_PENDING_REQS; i++) +- netbk->pending_ring[i] = i; +- +- if (MODPARM_netback_kthread) { +- init_waitqueue_head(&netbk->kthread.netbk_action_wq); +- netbk->kthread.task = +- kthread_create(netbk_action_thread, +- (void *)netbk, +- "netback/%u", group); +- +- if (!IS_ERR(netbk->kthread.task)) { +- kthread_bind(netbk->kthread.task, group); +- } else { +- printk(KERN_ALERT +- "kthread_run() fails at netback\n"); +- free_empty_pages_and_pagevec(netbk->mmap_pages, +- MAX_PENDING_REQS); +- del_timer(&netbk->netbk_tx_pending_timer); +- del_timer(&netbk->net_timer); +- rc = PTR_ERR(netbk->kthread.task); +- goto failed_init; +- } +- } else { +- tasklet_init(&netbk->tasklet.net_tx_tasklet, +- net_tx_action, +- (unsigned long)netbk); +- tasklet_init(&netbk->tasklet.net_rx_tasklet, +- net_rx_action, +- (unsigned long)netbk); +- } +- +- INIT_LIST_HEAD(&netbk->pending_inuse_head); +- INIT_LIST_HEAD(&netbk->net_schedule_list); +- +- spin_lock_init(&netbk->net_schedule_list_lock); +- +- atomic_set(&netbk->netfront_count, 0); +- +- if (MODPARM_netback_kthread) +- wake_up_process(netbk->kthread.task); +- } +- +- netbk_copy_skb_mode = NETBK_DONT_COPY_SKB; +- if (MODPARM_copy_skb) { +- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, +- NULL, 0)) +- netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB; +- else +- netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB; +- } +- +- rc = netif_xenbus_init(); +- if (rc) +- goto failed_init; +- +-#ifdef NETBE_DEBUG_INTERRUPT +- (void)bind_virq_to_irqhandler(VIRQ_DEBUG, +- 0, +- netif_be_dbg, +- IRQF_SHARED, +- "net-be-dbg", +- &netif_be_dbg); +-#endif +- +- return 0; +- +-failed_init: +- for (i = 0; i < group; i++) { +- struct xen_netbk *netbk = &xen_netbk[i]; +- free_empty_pages_and_pagevec(netbk->mmap_pages, +- MAX_PENDING_REQS); +- del_timer(&netbk->netbk_tx_pending_timer); +- del_timer(&netbk->net_timer); +- if (MODPARM_netback_kthread) +- kthread_stop(netbk->kthread.task); +- } +- vfree(xen_netbk); +- return rc; +- +-} +- +-module_init(netback_init); +- +-MODULE_LICENSE("Dual BSD/GPL"); +diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c +deleted file mode 100644 +index 640c696..0000000 +--- a/drivers/xen/netback/xenbus.c ++++ /dev/null +@@ -1,487 +0,0 @@ +-/* Xenbus code for netif backend +- * Copyright (C) 2005 Rusty Russell +- * Copyright (C) 2005 XenSource Ltd +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, write to the Free Software +- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-*/ +- +-#include "common.h" +- +-static int connect_rings(struct backend_info *); +-static void connect(struct backend_info *); +-static void backend_create_netif(struct backend_info *be); +-static void unregister_hotplug_status_watch(struct backend_info *be); +- +-static int netback_remove(struct xenbus_device *dev) +-{ +- struct backend_info *be = dev_get_drvdata(&dev->dev); +- +- unregister_hotplug_status_watch(be); +- if (be->netif) { +- kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); +- xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); +- netif_disconnect(be->netif); +- be->netif = NULL; +- } +- kfree(be); +- dev_set_drvdata(&dev->dev, NULL); +- return 0; +-} +- +- +-/** +- * Entry point to this code when a new device is created. Allocate the basic +- * structures and switch to InitWait. +- */ +-static int netback_probe(struct xenbus_device *dev, +- const struct xenbus_device_id *id) +-{ +- const char *message; +- struct xenbus_transaction xbt; +- int err; +- int sg; +- struct backend_info *be = kzalloc(sizeof(struct backend_info), +- GFP_KERNEL); +- if (!be) { +- xenbus_dev_fatal(dev, -ENOMEM, +- "allocating backend structure"); +- return -ENOMEM; +- } +- +- be->dev = dev; +- dev_set_drvdata(&dev->dev, be); +- +- sg = 1; +- if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) +- sg = 0; +- +- do { +- err = xenbus_transaction_start(&xbt); +- if (err) { +- xenbus_dev_fatal(dev, err, "starting transaction"); +- goto fail; +- } +- +- err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg); +- if (err) { +- message = "writing feature-sg"; +- goto abort_transaction; +- } +- +- err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", +- "%d", sg); +- if (err) { +- message = "writing feature-gso-tcpv4"; +- goto abort_transaction; +- } +- +- /* We support rx-copy path. */ +- err = xenbus_printf(xbt, dev->nodename, +- "feature-rx-copy", "%d", 1); +- if (err) { +- message = "writing feature-rx-copy"; +- goto abort_transaction; +- } +- +- /* +- * We don't support rx-flip path (except old guests who don't +- * grok this feature flag). +- */ +- err = xenbus_printf(xbt, dev->nodename, +- "feature-rx-flip", "%d", 0); +- if (err) { +- message = "writing feature-rx-flip"; +- goto abort_transaction; +- } +- +- err = xenbus_transaction_end(xbt, 0); +- } while (err == -EAGAIN); +- +- if (err) { +- xenbus_dev_fatal(dev, err, "completing transaction"); +- goto fail; +- } +- +- err = xenbus_switch_state(dev, XenbusStateInitWait); +- if (err) +- goto fail; +- +- /* This kicks hotplug scripts, so do it immediately. */ +- backend_create_netif(be); +- +- return 0; +- +-abort_transaction: +- xenbus_transaction_end(xbt, 1); +- xenbus_dev_fatal(dev, err, "%s", message); +-fail: +- pr_debug("failed"); +- netback_remove(dev); +- return err; +-} +- +- +-/* +- * Handle the creation of the hotplug script environment. We add the script +- * and vif variables to the environment, for the benefit of the vif-* hotplug +- * scripts. +- */ +-static int netback_uevent(struct xenbus_device *xdev, +- struct kobj_uevent_env *env) +-{ +- struct backend_info *be = dev_get_drvdata(&xdev->dev); +- char *val; +- +- val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL); +- if (IS_ERR(val)) { +- int err = PTR_ERR(val); +- xenbus_dev_fatal(xdev, err, "reading script"); +- return err; +- } else { +- if (add_uevent_var(env, "script=%s", val)) { +- kfree(val); +- return -ENOMEM; +- } +- kfree(val); +- } +- +- if (!be || !be->netif) +- return 0; +- +- return add_uevent_var(env, "vif=%s", be->netif->dev->name); +-} +- +- +-static void backend_create_netif(struct backend_info *be) +-{ +- int err; +- long handle; +- struct xenbus_device *dev = be->dev; +- +- if (be->netif != NULL) +- return; +- +- err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle); +- if (err != 1) { +- xenbus_dev_fatal(dev, err, "reading handle"); +- return; +- } +- +- be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle); +- if (IS_ERR(be->netif)) { +- err = PTR_ERR(be->netif); +- be->netif = NULL; +- xenbus_dev_fatal(dev, err, "creating interface"); +- return; +- } +- +- kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE); +-} +- +- +-static void disconnect_backend(struct xenbus_device *dev) +-{ +- struct backend_info *be = dev_get_drvdata(&dev->dev); +- +- if (be->netif) { +- xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); +- netif_disconnect(be->netif); +- be->netif = NULL; +- } +-} +- +-/** +- * Callback received when the frontend's state changes. +- */ +-static void frontend_changed(struct xenbus_device *dev, +- enum xenbus_state frontend_state) +-{ +- struct backend_info *be = dev_get_drvdata(&dev->dev); +- +- pr_debug("frontend state %s", xenbus_strstate(frontend_state)); +- +- be->frontend_state = frontend_state; +- +- switch (frontend_state) { +- case XenbusStateInitialising: +- if (dev->state == XenbusStateClosed) { +- printk(KERN_INFO "%s: %s: prepare for reconnect\n", +- __func__, dev->nodename); +- xenbus_switch_state(dev, XenbusStateInitWait); +- } +- break; +- +- case XenbusStateInitialised: +- break; +- +- case XenbusStateConnected: +- if (dev->state == XenbusStateConnected) +- break; +- backend_create_netif(be); +- if (be->netif) +- connect(be); +- break; +- +- case XenbusStateClosing: +- if (be->netif) +- kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); +- disconnect_backend(dev); +- xenbus_switch_state(dev, XenbusStateClosing); +- break; +- +- case XenbusStateClosed: +- xenbus_switch_state(dev, XenbusStateClosed); +- if (xenbus_dev_is_online(dev)) +- break; +- /* fall through if not online */ +- case XenbusStateUnknown: +- device_unregister(&dev->dev); +- break; +- +- default: +- xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", +- frontend_state); +- break; +- } +-} +- +- +-static void xen_net_read_rate(struct xenbus_device *dev, +- unsigned long *bytes, unsigned long *usec) +-{ +- char *s, *e; +- unsigned long b, u; +- char *ratestr; +- +- /* Default to unlimited bandwidth. */ +- *bytes = ~0UL; +- *usec = 0; +- +- ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL); +- if (IS_ERR(ratestr)) +- return; +- +- s = ratestr; +- b = simple_strtoul(s, &e, 10); +- if ((s == e) || (*e != ',')) +- goto fail; +- +- s = e + 1; +- u = simple_strtoul(s, &e, 10); +- if ((s == e) || (*e != '\0')) +- goto fail; +- +- *bytes = b; +- *usec = u; +- +- kfree(ratestr); +- return; +- +- fail: +- pr_warn("Failed to parse network rate limit. Traffic unlimited.\n"); +- kfree(ratestr); +-} +- +-static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) +-{ +- char *s, *e, *macstr; +- int i; +- +- macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL); +- if (IS_ERR(macstr)) +- return PTR_ERR(macstr); +- +- for (i = 0; i < ETH_ALEN; i++) { +- mac[i] = simple_strtoul(s, &e, 16); +- if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { +- kfree(macstr); +- return -ENOENT; +- } +- s = e+1; +- } +- +- kfree(macstr); +- return 0; +-} +- +-static void unregister_hotplug_status_watch(struct backend_info *be) +-{ +- if (be->have_hotplug_status_watch) { +- unregister_xenbus_watch(&be->hotplug_status_watch); +- kfree(be->hotplug_status_watch.node); +- } +- be->have_hotplug_status_watch = 0; +-} +- +-static void hotplug_status_changed(struct xenbus_watch *watch, +- const char **vec, +- unsigned int vec_size) +-{ +- struct backend_info *be = container_of(watch, +- struct backend_info, +- hotplug_status_watch); +- char *str; +- unsigned int len; +- +- str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len); +- if (IS_ERR(str)) +- return; +- if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) { +- xenbus_switch_state(be->dev, XenbusStateConnected); +- /* Not interested in this watch anymore. */ +- unregister_hotplug_status_watch(be); +- } +- kfree(str); +-} +- +-static void connect(struct backend_info *be) +-{ +- int err; +- struct xenbus_device *dev = be->dev; +- +- err = connect_rings(be); +- if (err) +- return; +- +- err = xen_net_read_mac(dev, be->netif->fe_dev_addr); +- if (err) { +- xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); +- return; +- } +- +- xen_net_read_rate(dev, &be->netif->credit_bytes, +- &be->netif->credit_usec); +- be->netif->remaining_credit = be->netif->credit_bytes; +- +- unregister_hotplug_status_watch(be); +- err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, +- hotplug_status_changed, +- "%s/%s", dev->nodename, "hotplug-status"); +- if (err) { +- /* Switch now, since we can't do a watch. */ +- xenbus_switch_state(dev, XenbusStateConnected); +- } else { +- be->have_hotplug_status_watch = 1; +- } +- +- netif_wake_queue(be->netif->dev); +-} +- +- +-static int connect_rings(struct backend_info *be) +-{ +- struct xen_netif *netif = be->netif; +- struct xenbus_device *dev = be->dev; +- unsigned long tx_ring_ref, rx_ring_ref; +- unsigned int evtchn, rx_copy; +- int err; +- int val; +- +- err = xenbus_gather(XBT_NIL, dev->otherend, +- "tx-ring-ref", "%lu", &tx_ring_ref, +- "rx-ring-ref", "%lu", &rx_ring_ref, +- "event-channel", "%u", &evtchn, NULL); +- if (err) { +- xenbus_dev_fatal(dev, err, +- "reading %s/ring-ref and event-channel", +- dev->otherend); +- return err; +- } +- +- err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u", +- &rx_copy); +- if (err == -ENOENT) { +- err = 0; +- rx_copy = 0; +- } +- if (err < 0) { +- xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy", +- dev->otherend); +- return err; +- } +- if (!rx_copy) +- return -EOPNOTSUPP; +- +- if (netif->dev->tx_queue_len != 0) { +- if (xenbus_scanf(XBT_NIL, dev->otherend, +- "feature-rx-notify", "%d", &val) < 0) +- val = 0; +- if (val) +- netif->can_queue = 1; +- else +- /* Must be non-zero for pfifo_fast to work. */ +- netif->dev->tx_queue_len = 1; +- } +- +- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", +- "%d", &val) < 0) +- val = 0; +- netif->can_sg = !!val; +- +- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", +- "%d", &val) < 0) +- val = 0; +- netif->gso = !!val; +- +- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix", +- "%d", &val) < 0) +- val = 0; +- netif->gso_prefix = !!val; +- +- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload", +- "%d", &val) < 0) +- val = 0; +- netif->csum = !val; +- +- /* Set dev->features */ +- netif_set_features(netif); +- +- /* Map the shared frame, irq etc. */ +- err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn); +- if (err) { +- xenbus_dev_fatal(dev, err, +- "mapping shared-frames %lu/%lu port %u", +- tx_ring_ref, rx_ring_ref, evtchn); +- return err; +- } +- return 0; +-} +- +- +-/* ** Driver Registration ** */ +- +- +-static const struct xenbus_device_id netback_ids[] = { +- { "vif" }, +- { "" } +-}; +- +- +-static struct xenbus_driver netback = { +- .name = "vif", +- .owner = THIS_MODULE, +- .ids = netback_ids, +- .probe = netback_probe, +- .remove = netback_remove, +- .uevent = netback_uevent, +- .otherend_changed = frontend_changed, +-}; +- +- +-int netif_xenbus_init(void) +-{ +- printk(KERN_CRIT "registering netback\n"); +- return xenbus_register_backend(&netback); +-} +-- +1.7.3.4 + + +From 01d93054b9f5223c8ed9a3c11ea5a89ce7db442c Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Sat, 11 Dec 2010 10:15:50 +0000 +Subject: [PATCH 123/139] xen: netback: Make dependency on PageForeign conditional + +When PageForeign is not available we fallback to a copying TX mode. + +All uses of PageForeign are now gated with HAVE_XEN_PAGE_FOREIGN, this should +allow for easier removal of the dependency for upstream, e.g. using unifdef. + +Signed-off-by: Ian Campbell +--- + drivers/net/xen-netback/common.h | 21 +++ + drivers/net/xen-netback/interface.c | 4 + + drivers/net/xen-netback/netback.c | 272 +++++++++++++++++++++++++++++----- + drivers/net/xen-netback/xenbus.c | 2 + + 4 files changed, 259 insertions(+), 40 deletions(-) + +diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h +index 079e1de..f45bac8 100644 +--- a/drivers/net/xen-netback/common.h ++++ b/drivers/net/xen-netback/common.h +@@ -93,8 +93,10 @@ struct xen_netif { + unsigned long remaining_credit; + struct timer_list credit_timeout; + ++#ifdef HAVE_XEN_PAGE_FOREIGN + /* Statistics */ + int nr_copied_skbs; ++#endif + + /* Miscellaneous private stuff. */ + struct list_head list; /* scheduling list */ +@@ -117,6 +119,7 @@ struct xen_netif { + #define netback_carrier_off(netif) ((netif)->carrier = 0) + #define netback_carrier_ok(netif) ((netif)->carrier) + ++#ifdef HAVE_XEN_PAGE_FOREIGN + enum { + NETBK_DONT_COPY_SKB, + NETBK_DELAYED_COPY_SKB, +@@ -124,6 +127,7 @@ enum { + }; + + extern int netbk_copy_skb_mode; ++#endif + + struct backend_info { + struct xenbus_device *dev; +@@ -191,10 +195,12 @@ struct netbk_rx_meta { + int gso_size; + }; + ++#ifdef HAVE_XEN_PAGE_FOREIGN + struct netbk_tx_pending_inuse { + struct list_head list; + unsigned long alloc_time; + }; ++#endif + + #define MAX_PENDING_REQS 256 + +@@ -232,16 +238,24 @@ struct xen_netbk { + struct sk_buff_head tx_queue; + + struct timer_list net_timer; ++#ifdef HAVE_XEN_PAGE_FOREIGN + struct timer_list netbk_tx_pending_timer; ++#endif + ++#ifdef HAVE_XEN_PAGE_FOREIGN + struct page **mmap_pages; ++#else ++ struct page *mmap_pages[MAX_PENDING_REQS]; ++#endif + + pending_ring_idx_t pending_prod; + pending_ring_idx_t pending_cons; ++#ifdef HAVE_XEN_PAGE_FOREIGN + pending_ring_idx_t dealloc_prod; + pending_ring_idx_t dealloc_cons; + + struct list_head pending_inuse_head; ++#endif + struct list_head net_schedule_list; + + /* Protect the net_schedule_list in netif. */ +@@ -250,13 +264,20 @@ struct xen_netbk { + atomic_t netfront_count; + + struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; ++#ifdef HAVE_XEN_PAGE_FOREIGN + struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS]; + struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; + struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS]; + + grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; ++#else ++ struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS]; ++#endif ++ + u16 pending_ring[MAX_PENDING_REQS]; ++#ifdef HAVE_XEN_PAGE_FOREIGN + u16 dealloc_ring[MAX_PENDING_REQS]; ++#endif + + /* + * Each head or fragment can be up to 4096 bytes. Given +diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c +index c36db26..3ff3aff 100644 +--- a/drivers/net/xen-netback/interface.c ++++ b/drivers/net/xen-netback/interface.c +@@ -186,6 +186,7 @@ static void netbk_get_drvinfo(struct net_device *dev, + strcpy(info->bus_info, dev_name(dev->dev.parent)); + } + ++#ifdef HAVE_XEN_PAGE_FOREIGN + static const struct netif_stat { + char name[ETH_GSTRING_LEN]; + u16 offset; +@@ -225,6 +226,7 @@ static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data) + break; + } + } ++#endif + + static struct ethtool_ops network_ethtool_ops = { + .get_drvinfo = netbk_get_drvinfo, +@@ -237,9 +239,11 @@ static struct ethtool_ops network_ethtool_ops = { + .set_tso = netbk_set_tso, + .get_link = ethtool_op_get_link, + ++#ifdef HAVE_XEN_PAGE_FOREIGN + .get_sset_count = netbk_get_sset_count, + .get_ethtool_stats = netbk_get_ethtool_stats, + .get_strings = netbk_get_strings, ++#endif + }; + + static struct net_device_ops netback_ops = { +diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c +index e0ca232..6a1aa5c 100644 +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -40,7 +40,9 @@ + + #include + ++#ifdef HAVE_XEN_PAGE_FOREIGN + #include ++#endif + #include + #include + +@@ -80,9 +82,10 @@ static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk, + } + + /* extra field used in struct page */ +-static inline void netif_set_page_ext(struct page *pg, +- unsigned int group, unsigned int idx) ++static inline void netif_set_page_ext(struct page *pg, struct xen_netbk *netbk, ++ unsigned int idx) + { ++ unsigned int group = netbk - xen_netbk; + union page_ext ext = { .e = { .group = group + 1, .idx = idx } }; + + BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping)); +@@ -96,8 +99,10 @@ static int netif_get_page_ext(struct page *pg, + struct xen_netbk *netbk; + unsigned int group, idx; + ++#ifdef HAVE_XEN_PAGE_FOREIGN + if (!PageForeign(pg)) + return 0; ++#endif + + group = ext.e.group - 1; + +@@ -106,8 +111,10 @@ static int netif_get_page_ext(struct page *pg, + + netbk = &xen_netbk[group]; + ++#ifdef HAVE_XEN_PAGE_FOREIGN + if (netbk->mmap_pages == NULL) + return 0; ++#endif + + idx = ext.e.idx; + +@@ -144,12 +151,14 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk) + netbk->pending_prod + netbk->pending_cons; + } + ++#ifdef HAVE_XEN_PAGE_FOREIGN + /* Setting this allows the safe use of this driver without netloop. */ + static int MODPARM_copy_skb = 1; + module_param_named(copy_skb, MODPARM_copy_skb, bool, 0); + MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop"); + + int netbk_copy_skb_mode; ++#endif + + static int MODPARM_netback_kthread; + module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0); +@@ -779,11 +788,13 @@ static void net_alarm(unsigned long data) + xen_netbk_bh_handler(netbk, 1); + } + ++#ifdef HAVE_XEN_PAGE_FOREIGN + static void netbk_tx_pending_timeout(unsigned long data) + { + struct xen_netbk *netbk = (struct xen_netbk *)data; + xen_netbk_bh_handler(netbk, 0); + } ++#endif + + struct net_device_stats *netif_be_get_stats(struct net_device *dev) + { +@@ -892,6 +903,7 @@ static void tx_credit_callback(unsigned long data) + netif_schedule_work(netif); + } + ++#ifdef HAVE_XEN_PAGE_FOREIGN + static inline int copy_pending_req(struct xen_netbk *netbk, + pending_ring_idx_t pending_idx) + { +@@ -899,7 +911,9 @@ static inline int copy_pending_req(struct xen_netbk *netbk, + netbk->grant_tx_handle[pending_idx], + &netbk->mmap_pages[pending_idx]); + } ++#endif + ++#ifdef HAVE_XEN_PAGE_FOREIGN + static inline void net_tx_action_dealloc(struct xen_netbk *netbk) + { + struct netbk_tx_pending_inuse *inuse, *n; +@@ -1004,6 +1018,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk) + list_del_init(&inuse->list); + } + } ++#endif + + static void netbk_tx_err(struct xen_netif *netif, + struct xen_netif_tx_request *txp, RING_IDX end) +@@ -1034,19 +1049,19 @@ static int netbk_count_requests(struct xen_netif *netif, + + do { + if (frags >= work_to_do) { +- DPRINTK("Need more frags\n"); ++ printk(KERN_CRIT "Need more frags\n"); + return -frags; + } + + if (unlikely(frags >= MAX_SKB_FRAGS)) { +- DPRINTK("Too many frags\n"); ++ printk(KERN_CRIT "Too many frags\n"); + return -frags; + } + + memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags), + sizeof(*txp)); + if (txp->size > first->size) { +- DPRINTK("Frags galore\n"); ++ printk(KERN_CRIT "Frags galore\n"); + return -frags; + } + +@@ -1054,20 +1069,42 @@ static int netbk_count_requests(struct xen_netif *netif, + frags++; + + if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { +- DPRINTK("txp->offset: %x, size: %u\n", ++ printk(KERN_CRIT "txp->offset: %x, size: %u\n", + txp->offset, txp->size); + return -frags; + } + } while ((txp++)->flags & NETTXF_more_data); +- + return frags; + } + ++#ifndef HAVE_XEN_PAGE_FOREIGN ++static struct page *netif_alloc_page(struct xen_netbk *netbk, ++ struct sk_buff *skb, ++ unsigned long pending_idx) ++{ ++ struct page *page; ++ page = alloc_page(GFP_KERNEL|__GFP_COLD); ++ if (!page) ++ return NULL; ++ netif_set_page_ext(page, netbk, pending_idx); ++ netbk->mmap_pages[pending_idx] = page; ++ return page; ++} ++#endif ++ ++#ifdef HAVE_XEN_PAGE_FOREIGN + static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk, + struct xen_netif *netif, + struct sk_buff *skb, + struct xen_netif_tx_request *txp, +- struct gnttab_map_grant_ref *mop) ++ struct gnttab_map_grant_ref *gop) ++#else ++static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk, ++ struct xen_netif *netif, ++ struct sk_buff *skb, ++ struct xen_netif_tx_request *txp, ++ struct gnttab_copy *gop) ++#endif + { + struct skb_shared_info *shinfo = skb_shinfo(skb); + skb_frag_t *frags = shinfo->frags; +@@ -1078,16 +1115,39 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk, + start = ((unsigned long)shinfo->frags[0].page == pending_idx); + + for (i = start; i < shinfo->nr_frags; i++, txp++) { ++#ifndef HAVE_XEN_PAGE_FOREIGN ++ struct page *page; ++#endif + pending_ring_idx_t index; + struct pending_tx_info *pending_tx_info = + netbk->pending_tx_info; + + index = pending_index(netbk->pending_cons++); + pending_idx = netbk->pending_ring[index]; +- +- gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx), ++#ifdef HAVE_XEN_PAGE_FOREIGN ++ gnttab_set_map_op(gop++, idx_to_kaddr(netbk, pending_idx), + GNTMAP_host_map | GNTMAP_readonly, + txp->gref, netif->domid); ++#else ++ page = netif_alloc_page(netbk, skb, pending_idx); ++ if (!page) ++ return NULL; ++ ++ netbk->mmap_pages[pending_idx] = page; ++ ++ gop->source.u.ref = txp->gref; ++ gop->source.domid = netif->domid; ++ gop->source.offset = txp->offset; ++ ++ gop->dest.u.gmfn = virt_to_mfn(page_address(page)); ++ gop->dest.domid = DOMID_SELF; ++ gop->dest.offset = txp->offset; ++ ++ gop->len = txp->size; ++ gop->flags = GNTCOPY_source_gref; ++ ++ gop++; ++#endif + + memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); + netif_get(netif); +@@ -1095,14 +1155,24 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk, + frags[i].page = (void *)pending_idx; + } + +- return mop; ++ return gop; + } + +-static int netbk_tx_check_mop(struct xen_netbk *netbk, ++#ifdef HAVE_XEN_PAGE_FOREIGN ++static int netbk_tx_check_gop(struct xen_netbk *netbk, + struct sk_buff *skb, +- struct gnttab_map_grant_ref **mopp) ++ struct gnttab_map_grant_ref **gopp) ++#else ++static int netbk_tx_check_gop(struct xen_netbk *netbk, ++ struct sk_buff *skb, ++ struct gnttab_copy **gopp) ++#endif + { +- struct gnttab_map_grant_ref *mop = *mopp; ++#ifdef HAVE_XEN_PAGE_FOREIGN ++ struct gnttab_map_grant_ref *gop = *gopp; ++#else ++ struct gnttab_copy *gop = *gopp; ++#endif + int pending_idx = *((u16 *)skb->data); + struct pending_tx_info *pending_tx_info = netbk->pending_tx_info; + struct xen_netif *netif = pending_tx_info[pending_idx].netif; +@@ -1112,7 +1182,7 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk, + int i, err, start; + + /* Check status of header. */ +- err = mop->status; ++ err = gop->status; + if (unlikely(err)) { + pending_ring_idx_t index; + index = pending_index(netbk->pending_prod++); +@@ -1120,11 +1190,13 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk, + make_tx_response(netif, txp, NETIF_RSP_ERROR); + netbk->pending_ring[index] = pending_idx; + netif_put(netif); ++#ifdef HAVE_XEN_PAGE_FOREIGN + } else { + set_phys_to_machine( + __pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT, +- FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); +- netbk->grant_tx_handle[pending_idx] = mop->handle; ++ FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT)); ++ netbk->grant_tx_handle[pending_idx] = gop->handle; ++#endif + } + + /* Skip first skb fragment if it is on same page as header fragment. */ +@@ -1137,14 +1209,16 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk, + pending_idx = (unsigned long)shinfo->frags[i].page; + + /* Check error status: if okay then remember grant handle. */ +- newerr = (++mop)->status; ++ newerr = (++gop)->status; + if (likely(!newerr)) { ++#ifdef HAVE_XEN_PAGE_FOREIGN + unsigned long addr; + addr = idx_to_kaddr(netbk, pending_idx); + set_phys_to_machine( + __pa(addr)>>PAGE_SHIFT, +- FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT)); +- netbk->grant_tx_handle[pending_idx] = mop->handle; ++ FOREIGN_FRAME(gop->dev_bus_addr>>PAGE_SHIFT)); ++ netbk->grant_tx_handle[pending_idx] = gop->handle; ++#endif + /* Had a previous error? Invalidate this fragment. */ + if (unlikely(err)) + netif_idx_release(netbk, pending_idx); +@@ -1174,7 +1248,7 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk, + err = newerr; + } + +- *mopp = mop + 1; ++ *gopp = gop + 1; + return err; + } + +@@ -1190,10 +1264,11 @@ static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb) + unsigned long pending_idx; + + pending_idx = (unsigned long)frag->page; +- ++#ifdef HAVE_XEN_PAGE_FOREIGN + netbk->pending_inuse[pending_idx].alloc_time = jiffies; + list_add_tail(&netbk->pending_inuse[pending_idx].list, + &netbk->pending_inuse_head); ++#endif + + txp = &netbk->pending_tx_info[pending_idx].req; + frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx)); +@@ -1203,6 +1278,10 @@ static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb) + skb->len += txp->size; + skb->data_len += txp->size; + skb->truesize += txp->size; ++ ++ /* Take an extra reference to offset netif_idx_release */ ++ get_page(netbk->mmap_pages[pending_idx]); ++ netif_idx_release(netbk, pending_idx); + } + } + +@@ -1330,18 +1409,24 @@ static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size) + return false; + } + +-static unsigned net_tx_build_mops(struct xen_netbk *netbk) ++static unsigned net_tx_build_gops(struct xen_netbk *netbk) + { +- struct gnttab_map_grant_ref *mop; ++#ifdef HAVE_XEN_PAGE_FOREIGN ++ struct gnttab_map_grant_ref *gop = netbk->tx_map_ops, *request_gop; ++#else ++ struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop; ++#endif + struct sk_buff *skb; + int ret; + +- mop = netbk->tx_map_ops; + while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && + !list_empty(&netbk->net_schedule_list)) { + struct xen_netif *netif; + struct xen_netif_tx_request txreq; + struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; ++#ifndef HAVE_XEN_PAGE_FOREIGN ++ struct page *page; ++#endif + struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; + u16 pending_idx; + RING_IDX idx; +@@ -1438,10 +1523,35 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk) + } + } + +- gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx), ++#ifdef HAVE_XEN_PAGE_FOREIGN ++ gnttab_set_map_op(gop, idx_to_kaddr(netbk, pending_idx), + GNTMAP_host_map | GNTMAP_readonly, + txreq.gref, netif->domid); +- mop++; ++ gop++; ++#else ++ /* XXX could copy straight to head */ ++ page = netif_alloc_page(netbk, skb, pending_idx); ++ if (!page) { ++ kfree_skb(skb); ++ netbk_tx_err(netif, &txreq, idx); ++ continue; ++ } ++ ++ netbk->mmap_pages[pending_idx] = page; ++ ++ gop->source.u.ref = txreq.gref; ++ gop->source.domid = netif->domid; ++ gop->source.offset = txreq.offset; ++ ++ gop->dest.u.gmfn = virt_to_mfn(page_address(page)); ++ gop->dest.domid = DOMID_SELF; ++ gop->dest.offset = txreq.offset; ++ ++ gop->len = txreq.size; ++ gop->flags = GNTCOPY_source_gref; ++ ++ gop++; ++#endif + + memcpy(&netbk->pending_tx_info[pending_idx].req, + &txreq, sizeof(txreq)); +@@ -1464,24 +1574,43 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk) + + netbk->pending_cons++; + +- mop = netbk_get_requests(netbk, netif, skb, txfrags, mop); ++ request_gop = netbk_get_requests(netbk, netif, ++ skb, txfrags, gop); ++ if (request_gop == NULL) { ++ kfree_skb(skb); ++ netbk_tx_err(netif, &txreq, idx); ++ continue; ++ } ++ gop = request_gop; + + netif->tx.req_cons = idx; + netif_schedule_work(netif); + +- if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops)) ++#ifdef HAVE_XEN_PAGE_FOREIGN ++ if ((gop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops)) + break; ++#else ++ if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops)) ++ break; ++#endif + } + +- return mop - netbk->tx_map_ops; ++#ifdef HAVE_XEN_PAGE_FOREIGN ++ return gop - netbk->tx_map_ops; ++#else ++ return gop - netbk->tx_copy_ops; ++#endif + } + + static void net_tx_submit(struct xen_netbk *netbk) + { +- struct gnttab_map_grant_ref *mop; ++#ifdef HAVE_XEN_PAGE_FOREIGN ++ struct gnttab_map_grant_ref *gop = netbk->tx_map_ops; ++#else ++ struct gnttab_copy *gop = netbk->tx_copy_ops; ++#endif + struct sk_buff *skb; + +- mop = netbk->tx_map_ops; + while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) { + struct xen_netif_tx_request *txp; + struct xen_netif *netif; +@@ -1493,7 +1622,7 @@ static void net_tx_submit(struct xen_netbk *netbk) + txp = &netbk->pending_tx_info[pending_idx].req; + + /* Check the remap error code. */ +- if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) { ++ if (unlikely(netbk_tx_check_gop(netbk, skb, &gop))) { + pr_debug("netback grant failed.\n"); + skb_shinfo(skb)->nr_frags = 0; + kfree_skb(skb); +@@ -1545,12 +1674,14 @@ static void net_tx_submit(struct xen_netbk *netbk) + continue; + } + ++#ifdef HAVE_XEN_PAGE_FOREIGN + if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) && + unlikely(skb_linearize(skb))) { + DPRINTK("Can't linearize skb in net_tx_action.\n"); + kfree_skb(skb); + continue; + } ++#endif + + netif->stats.rx_bytes += skb->len; + netif->stats.rx_packets++; +@@ -1564,21 +1695,31 @@ static void net_tx_submit(struct xen_netbk *netbk) + static void net_tx_action(unsigned long data) + { + struct xen_netbk *netbk = (struct xen_netbk *)data; +- unsigned nr_mops; ++ unsigned nr_gops; + int ret; + ++#ifdef HAVE_XEN_PAGE_FOREIGN + net_tx_action_dealloc(netbk); ++#endif + +- nr_mops = net_tx_build_mops(netbk); ++ nr_gops = net_tx_build_gops(netbk); + +- if (nr_mops == 0) ++#ifdef HAVE_XEN_PAGE_FOREIGN ++ if (nr_gops == 0) + goto out; +- + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, +- netbk->tx_map_ops, nr_mops); ++ netbk->tx_map_ops, nr_gops); ++#else ++ if (nr_gops == 0) ++ return; ++ ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, ++ netbk->tx_copy_ops, nr_gops); ++#endif + BUG_ON(ret); + + net_tx_submit(netbk); ++ ++#ifdef HAVE_XEN_PAGE_FOREIGN + out: + if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && + !list_empty(&netbk->pending_inuse_head)) { +@@ -1589,8 +1730,10 @@ out: + mod_timer(&netbk->netbk_tx_pending_timer, + oldest->alloc_time + HZ); + } ++#endif + } + ++#ifdef HAVE_XEN_PAGE_FOREIGN + static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx) + { + static DEFINE_SPINLOCK(_lock); +@@ -1618,6 +1761,34 @@ static void netif_page_release(struct page *page, unsigned int order) + + netif_idx_release(&xen_netbk[group], idx); + } ++#else ++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx) ++{ ++ struct xen_netif *netif; ++ struct pending_tx_info *pending_tx_info; ++ pending_ring_idx_t index; ++ ++ /* Already complete? */ ++ if (netbk->mmap_pages[pending_idx] == NULL) ++ return; ++ ++ pending_tx_info = &netbk->pending_tx_info[pending_idx]; ++ ++ netif = pending_tx_info->netif; ++ ++ make_tx_response(netif, &pending_tx_info->req, ++ NETIF_RSP_OKAY); ++ ++ index = pending_index(netbk->pending_prod++); ++ netbk->pending_ring[index] = pending_idx; ++ ++ netif_put(netif); ++ ++ netbk->mmap_pages[pending_idx]->mapping = 0; ++ put_page(netbk->mmap_pages[pending_idx]); ++ netbk->mmap_pages[pending_idx] = NULL; ++} ++#endif + + irqreturn_t netif_be_int(int irq, void *dev_id) + { +@@ -1735,12 +1906,14 @@ static inline int rx_work_todo(struct xen_netbk *netbk) + + static inline int tx_work_todo(struct xen_netbk *netbk) + { ++#ifdef HAVE_XEN_PAGE_FOREIGN + if (netbk->dealloc_cons != netbk->dealloc_prod) + return 1; + + if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && + !list_empty(&netbk->pending_inuse_head)) + return 1; ++#endif + + if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && + !list_empty(&netbk->net_schedule_list)) +@@ -1775,7 +1948,9 @@ static int netbk_action_thread(void *data) + static int __init netback_init(void) + { + int i; ++#ifdef HAVE_XEN_PAGE_FOREIGN + struct page *page; ++#endif + int rc = 0; + int group; + +@@ -1799,11 +1974,14 @@ static int __init netback_init(void) + netbk->net_timer.data = (unsigned long)netbk; + netbk->net_timer.function = net_alarm; + ++#ifdef HAVE_XEN_PAGE_FOREIGN + init_timer(&netbk->netbk_tx_pending_timer); + netbk->netbk_tx_pending_timer.data = (unsigned long)netbk; + netbk->netbk_tx_pending_timer.function = + netbk_tx_pending_timeout; ++#endif + ++#ifdef HAVE_XEN_PAGE_FOREIGN + netbk->mmap_pages = + alloc_empty_pages_and_pagevec(MAX_PENDING_REQS); + if (!netbk->mmap_pages) { +@@ -1817,9 +1995,10 @@ static int __init netback_init(void) + for (i = 0; i < MAX_PENDING_REQS; i++) { + page = netbk->mmap_pages[i]; + SetPageForeign(page, netif_page_release); +- netif_set_page_ext(page, group, i); ++ netif_set_page_ext(page, netbk, i); + INIT_LIST_HEAD(&netbk->pending_inuse[i].list); + } ++#endif + + netbk->pending_cons = 0; + netbk->pending_prod = MAX_PENDING_REQS; +@@ -1838,9 +2017,11 @@ static int __init netback_init(void) + } else { + printk(KERN_ALERT + "kthread_run() fails at netback\n"); ++#ifdef HAVE_XEN_PAGE_FOREIGN + free_empty_pages_and_pagevec(netbk->mmap_pages, + MAX_PENDING_REQS); + del_timer(&netbk->netbk_tx_pending_timer); ++#endif + del_timer(&netbk->net_timer); + rc = PTR_ERR(netbk->kthread.task); + goto failed_init; +@@ -1854,17 +2035,19 @@ static int __init netback_init(void) + (unsigned long)netbk); + } + ++#ifdef HAVE_XEN_PAGE_FOREIGN + INIT_LIST_HEAD(&netbk->pending_inuse_head); ++#endif + INIT_LIST_HEAD(&netbk->net_schedule_list); + + spin_lock_init(&netbk->net_schedule_list_lock); + + atomic_set(&netbk->netfront_count, 0); +- + if (MODPARM_netback_kthread) + wake_up_process(netbk->kthread.task); + } + ++#ifdef HAVE_XEN_PAGE_FOREIGN + netbk_copy_skb_mode = NETBK_DONT_COPY_SKB; + if (MODPARM_copy_skb) { + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, +@@ -1873,6 +2056,7 @@ static int __init netback_init(void) + else + netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB; + } ++#endif + + rc = netif_xenbus_init(); + if (rc) +@@ -1892,9 +2076,17 @@ static int __init netback_init(void) + failed_init: + for (i = 0; i < group; i++) { + struct xen_netbk *netbk = &xen_netbk[i]; ++#ifdef HAVE_XEN_PAGE_FOREIGN + free_empty_pages_and_pagevec(netbk->mmap_pages, + MAX_PENDING_REQS); + del_timer(&netbk->netbk_tx_pending_timer); ++#else ++ int j; ++ for (j = 0; j < MAX_PENDING_REQS; j++) { ++ if (netbk->mmap_pages[i]) ++ __free_page(netbk->mmap_pages[i]); ++ } ++#endif + del_timer(&netbk->net_timer); + if (MODPARM_netback_kthread) + kthread_stop(netbk->kthread.task); +diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c +index 640c696..f6bf50e 100644 +--- a/drivers/net/xen-netback/xenbus.c ++++ b/drivers/net/xen-netback/xenbus.c +@@ -64,8 +64,10 @@ static int netback_probe(struct xenbus_device *dev, + dev_set_drvdata(&dev->dev, be); + + sg = 1; ++#ifdef HAVE_XEN_PAGE_FOREIGN + if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) + sg = 0; ++#endif + + do { + err = xenbus_transaction_start(&xbt); +-- +1.7.3.4 + + +From 03ff29cd511480cae999d204ec068ee72075edcc Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Wed, 19 Jan 2011 09:43:44 +0000 +Subject: [PATCH 124/139] xen: netback: completely drop foreign page support + +for i in drivers/net/xen-netback/*.[ch] ; do + echo $i + ./scripts/unifdef -B -UHAVE_XEN_PAGE_FOREIGN $i > $i.unifdef + mv $i.unifdef $i +done + +Signed-off-by: Ian Campbell +--- + drivers/net/xen-netback/common.h | 46 ----- + drivers/net/xen-netback/interface.c | 47 ----- + drivers/net/xen-netback/netback.c | 345 ----------------------------------- + drivers/net/xen-netback/xenbus.c | 4 - + 4 files changed, 0 insertions(+), 442 deletions(-) + +diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h +index f45bac8..2d55ed6 100644 +--- a/drivers/net/xen-netback/common.h ++++ b/drivers/net/xen-netback/common.h +@@ -93,11 +93,6 @@ struct xen_netif { + unsigned long remaining_credit; + struct timer_list credit_timeout; + +-#ifdef HAVE_XEN_PAGE_FOREIGN +- /* Statistics */ +- int nr_copied_skbs; +-#endif +- + /* Miscellaneous private stuff. */ + struct list_head list; /* scheduling list */ + atomic_t refcnt; +@@ -119,16 +114,6 @@ struct xen_netif { + #define netback_carrier_off(netif) ((netif)->carrier = 0) + #define netback_carrier_ok(netif) ((netif)->carrier) + +-#ifdef HAVE_XEN_PAGE_FOREIGN +-enum { +- NETBK_DONT_COPY_SKB, +- NETBK_DELAYED_COPY_SKB, +- NETBK_ALWAYS_COPY_SKB, +-}; +- +-extern int netbk_copy_skb_mode; +-#endif +- + struct backend_info { + struct xenbus_device *dev; + struct xen_netif *netif; +@@ -195,13 +180,6 @@ struct netbk_rx_meta { + int gso_size; + }; + +-#ifdef HAVE_XEN_PAGE_FOREIGN +-struct netbk_tx_pending_inuse { +- struct list_head list; +- unsigned long alloc_time; +-}; +-#endif +- + #define MAX_PENDING_REQS 256 + + #define MAX_BUFFER_OFFSET PAGE_SIZE +@@ -238,24 +216,11 @@ struct xen_netbk { + struct sk_buff_head tx_queue; + + struct timer_list net_timer; +-#ifdef HAVE_XEN_PAGE_FOREIGN +- struct timer_list netbk_tx_pending_timer; +-#endif + +-#ifdef HAVE_XEN_PAGE_FOREIGN +- struct page **mmap_pages; +-#else + struct page *mmap_pages[MAX_PENDING_REQS]; +-#endif + + pending_ring_idx_t pending_prod; + pending_ring_idx_t pending_cons; +-#ifdef HAVE_XEN_PAGE_FOREIGN +- pending_ring_idx_t dealloc_prod; +- pending_ring_idx_t dealloc_cons; +- +- struct list_head pending_inuse_head; +-#endif + struct list_head net_schedule_list; + + /* Protect the net_schedule_list in netif. */ +@@ -264,20 +229,9 @@ struct xen_netbk { + atomic_t netfront_count; + + struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; +-#ifdef HAVE_XEN_PAGE_FOREIGN +- struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS]; +- struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; +- struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS]; +- +- grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; +-#else + struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS]; +-#endif + + u16 pending_ring[MAX_PENDING_REQS]; +-#ifdef HAVE_XEN_PAGE_FOREIGN +- u16 dealloc_ring[MAX_PENDING_REQS]; +-#endif + + /* + * Each head or fragment can be up to 4096 bytes. Given +diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c +index 3ff3aff..eae5cf8 100644 +--- a/drivers/net/xen-netback/interface.c ++++ b/drivers/net/xen-netback/interface.c +@@ -186,48 +186,6 @@ static void netbk_get_drvinfo(struct net_device *dev, + strcpy(info->bus_info, dev_name(dev->dev.parent)); + } + +-#ifdef HAVE_XEN_PAGE_FOREIGN +-static const struct netif_stat { +- char name[ETH_GSTRING_LEN]; +- u16 offset; +-} netbk_stats[] = { +- { "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) }, +-}; +- +-static int netbk_get_sset_count(struct net_device *dev, int string_set) +-{ +- switch (string_set) { +- case ETH_SS_STATS: +- return ARRAY_SIZE(netbk_stats); +- default: +- return -EINVAL; +- } +-} +- +-static void netbk_get_ethtool_stats(struct net_device *dev, +- struct ethtool_stats *stats, u64 * data) +-{ +- void *netif = netdev_priv(dev); +- int i; +- +- for (i = 0; i < ARRAY_SIZE(netbk_stats); i++) +- data[i] = *(int *)(netif + netbk_stats[i].offset); +-} +- +-static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data) +-{ +- int i; +- +- switch (stringset) { +- case ETH_SS_STATS: +- for (i = 0; i < ARRAY_SIZE(netbk_stats); i++) +- memcpy(data + i * ETH_GSTRING_LEN, +- netbk_stats[i].name, ETH_GSTRING_LEN); +- break; +- } +-} +-#endif +- + static struct ethtool_ops network_ethtool_ops = { + .get_drvinfo = netbk_get_drvinfo, + +@@ -239,11 +197,6 @@ static struct ethtool_ops network_ethtool_ops = { + .set_tso = netbk_set_tso, + .get_link = ethtool_op_get_link, + +-#ifdef HAVE_XEN_PAGE_FOREIGN +- .get_sset_count = netbk_get_sset_count, +- .get_ethtool_stats = netbk_get_ethtool_stats, +- .get_strings = netbk_get_strings, +-#endif + }; + + static struct net_device_ops netback_ops = { +diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c +index 6a1aa5c..b7b9341 100644 +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -40,9 +40,6 @@ + + #include + +-#ifdef HAVE_XEN_PAGE_FOREIGN +-#include +-#endif + #include + #include + +@@ -99,11 +96,6 @@ static int netif_get_page_ext(struct page *pg, + struct xen_netbk *netbk; + unsigned int group, idx; + +-#ifdef HAVE_XEN_PAGE_FOREIGN +- if (!PageForeign(pg)) +- return 0; +-#endif +- + group = ext.e.group - 1; + + if (group < 0 || group >= xen_netbk_group_nr) +@@ -111,11 +103,6 @@ static int netif_get_page_ext(struct page *pg, + + netbk = &xen_netbk[group]; + +-#ifdef HAVE_XEN_PAGE_FOREIGN +- if (netbk->mmap_pages == NULL) +- return 0; +-#endif +- + idx = ext.e.idx; + + if ((idx < 0) || (idx >= MAX_PENDING_REQS)) +@@ -151,15 +138,6 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk) + netbk->pending_prod + netbk->pending_cons; + } + +-#ifdef HAVE_XEN_PAGE_FOREIGN +-/* Setting this allows the safe use of this driver without netloop. */ +-static int MODPARM_copy_skb = 1; +-module_param_named(copy_skb, MODPARM_copy_skb, bool, 0); +-MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop"); +- +-int netbk_copy_skb_mode; +-#endif +- + static int MODPARM_netback_kthread; + module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0); + MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet"); +@@ -788,14 +766,6 @@ static void net_alarm(unsigned long data) + xen_netbk_bh_handler(netbk, 1); + } + +-#ifdef HAVE_XEN_PAGE_FOREIGN +-static void netbk_tx_pending_timeout(unsigned long data) +-{ +- struct xen_netbk *netbk = (struct xen_netbk *)data; +- xen_netbk_bh_handler(netbk, 0); +-} +-#endif +- + struct net_device_stats *netif_be_get_stats(struct net_device *dev) + { + struct xen_netif *netif = netdev_priv(dev); +@@ -903,123 +873,6 @@ static void tx_credit_callback(unsigned long data) + netif_schedule_work(netif); + } + +-#ifdef HAVE_XEN_PAGE_FOREIGN +-static inline int copy_pending_req(struct xen_netbk *netbk, +- pending_ring_idx_t pending_idx) +-{ +- return gnttab_copy_grant_page( +- netbk->grant_tx_handle[pending_idx], +- &netbk->mmap_pages[pending_idx]); +-} +-#endif +- +-#ifdef HAVE_XEN_PAGE_FOREIGN +-static inline void net_tx_action_dealloc(struct xen_netbk *netbk) +-{ +- struct netbk_tx_pending_inuse *inuse, *n; +- struct gnttab_unmap_grant_ref *gop; +- u16 pending_idx; +- pending_ring_idx_t dc, dp; +- struct xen_netif *netif; +- int ret; +- LIST_HEAD(list); +- +- dc = netbk->dealloc_cons; +- gop = netbk->tx_unmap_ops; +- +- /* Free up any grants we have finished using. */ +- do { +- dp = netbk->dealloc_prod; +- +- /* Ensure we see all indices enqueued by netif_idx_release(). */ +- smp_rmb(); +- +- while (dc != dp) { +- unsigned long pfn; +- struct netbk_tx_pending_inuse *pending_inuse = +- netbk->pending_inuse; +- +- pending_idx = netbk->dealloc_ring[pending_index(dc++)]; +- list_move_tail(&pending_inuse[pending_idx].list, &list); +- +- pfn = idx_to_pfn(netbk, pending_idx); +- /* Already unmapped? */ +- if (!phys_to_machine_mapping_valid(pfn)) +- continue; +- +- gnttab_set_unmap_op(gop, +- idx_to_kaddr(netbk, pending_idx), +- GNTMAP_host_map, +- netbk->grant_tx_handle[pending_idx]); +- gop++; +- } +- +- } while (dp != netbk->dealloc_prod); +- +- netbk->dealloc_cons = dc; +- +- ret = HYPERVISOR_grant_table_op( +- GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops, +- gop - netbk->tx_unmap_ops); +- BUG_ON(ret); +- +- /* +- * Copy any entries that have been pending for too long +- */ +- if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && +- !list_empty(&netbk->pending_inuse_head)) { +- list_for_each_entry_safe(inuse, n, +- &netbk->pending_inuse_head, list) { +- struct pending_tx_info *pending_tx_info; +- pending_tx_info = netbk->pending_tx_info; +- +- if (time_after(inuse->alloc_time + HZ / 2, jiffies)) +- break; +- +- pending_idx = inuse - netbk->pending_inuse; +- +- pending_tx_info[pending_idx].netif->nr_copied_skbs++; +- +- switch (copy_pending_req(netbk, pending_idx)) { +- case 0: +- list_move_tail(&inuse->list, &list); +- continue; +- case -EBUSY: +- list_del_init(&inuse->list); +- continue; +- case -ENOENT: +- continue; +- } +- +- break; +- } +- } +- +- list_for_each_entry_safe(inuse, n, &list, list) { +- struct pending_tx_info *pending_tx_info; +- pending_ring_idx_t index; +- +- pending_tx_info = netbk->pending_tx_info; +- pending_idx = inuse - netbk->pending_inuse; +- +- netif = pending_tx_info[pending_idx].netif; +- +- make_tx_response(netif, &pending_tx_info[pending_idx].req, +- NETIF_RSP_OKAY); +- +- /* Ready for next use. */ +- gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]); +- +- index = pending_index(netbk->pending_prod++); +- netbk->pending_ring[index] = pending_idx; +- +- netif_put(netif); +- +- list_del_init(&inuse->list); +- } +-} +-#endif +- + static void netbk_tx_err(struct xen_netif *netif, + struct xen_netif_tx_request *txp, RING_IDX end) + { +@@ -1077,7 +930,6 @@ static int netbk_count_requests(struct xen_netif *netif, + return frags; + } + +-#ifndef HAVE_XEN_PAGE_FOREIGN + static struct page *netif_alloc_page(struct xen_netbk *netbk, + struct sk_buff *skb, + unsigned long pending_idx) +@@ -1090,21 +942,12 @@ static struct page *netif_alloc_page(struct xen_netbk *netbk, + netbk->mmap_pages[pending_idx] = page; + return page; + } +-#endif + +-#ifdef HAVE_XEN_PAGE_FOREIGN +-static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk, +- struct xen_netif *netif, +- struct sk_buff *skb, +- struct xen_netif_tx_request *txp, +- struct gnttab_map_grant_ref *gop) +-#else + static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk, + struct xen_netif *netif, + struct sk_buff *skb, + struct xen_netif_tx_request *txp, + struct gnttab_copy *gop) +-#endif + { + struct skb_shared_info *shinfo = skb_shinfo(skb); + skb_frag_t *frags = shinfo->frags; +@@ -1115,20 +958,13 @@ static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk, + start = ((unsigned long)shinfo->frags[0].page == pending_idx); + + for (i = start; i < shinfo->nr_frags; i++, txp++) { +-#ifndef HAVE_XEN_PAGE_FOREIGN + struct page *page; +-#endif + pending_ring_idx_t index; + struct pending_tx_info *pending_tx_info = + netbk->pending_tx_info; + + index = pending_index(netbk->pending_cons++); + pending_idx = netbk->pending_ring[index]; +-#ifdef HAVE_XEN_PAGE_FOREIGN +- gnttab_set_map_op(gop++, idx_to_kaddr(netbk, pending_idx), +- GNTMAP_host_map | GNTMAP_readonly, +- txp->gref, netif->domid); +-#else + page = netif_alloc_page(netbk, skb, pending_idx); + if (!page) + return NULL; +@@ -1147,7 +983,6 @@ static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk, + gop->flags = GNTCOPY_source_gref; + + gop++; +-#endif + + memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); + netif_get(netif); +@@ -1158,21 +993,11 @@ static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk, + return gop; + } + +-#ifdef HAVE_XEN_PAGE_FOREIGN +-static int netbk_tx_check_gop(struct xen_netbk *netbk, +- struct sk_buff *skb, +- struct gnttab_map_grant_ref **gopp) +-#else + static int netbk_tx_check_gop(struct xen_netbk *netbk, + struct sk_buff *skb, + struct gnttab_copy **gopp) +-#endif + { +-#ifdef HAVE_XEN_PAGE_FOREIGN +- struct gnttab_map_grant_ref *gop = *gopp; +-#else + struct gnttab_copy *gop = *gopp; +-#endif + int pending_idx = *((u16 *)skb->data); + struct pending_tx_info *pending_tx_info = netbk->pending_tx_info; + struct xen_netif *netif = pending_tx_info[pending_idx].netif; +@@ -1190,13 +1015,6 @@ static int netbk_tx_check_gop(struct xen_netbk *netbk, + make_tx_response(netif, txp, NETIF_RSP_ERROR); + netbk->pending_ring[index] = pending_idx; + netif_put(netif); +-#ifdef HAVE_XEN_PAGE_FOREIGN +- } else { +- set_phys_to_machine( +- __pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT, +- FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT)); +- netbk->grant_tx_handle[pending_idx] = gop->handle; +-#endif + } + + /* Skip first skb fragment if it is on same page as header fragment. */ +@@ -1211,14 +1029,6 @@ static int netbk_tx_check_gop(struct xen_netbk *netbk, + /* Check error status: if okay then remember grant handle. */ + newerr = (++gop)->status; + if (likely(!newerr)) { +-#ifdef HAVE_XEN_PAGE_FOREIGN +- unsigned long addr; +- addr = idx_to_kaddr(netbk, pending_idx); +- set_phys_to_machine( +- __pa(addr)>>PAGE_SHIFT, +- FOREIGN_FRAME(gop->dev_bus_addr>>PAGE_SHIFT)); +- netbk->grant_tx_handle[pending_idx] = gop->handle; +-#endif + /* Had a previous error? Invalidate this fragment. */ + if (unlikely(err)) + netif_idx_release(netbk, pending_idx); +@@ -1264,11 +1074,6 @@ static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb) + unsigned long pending_idx; + + pending_idx = (unsigned long)frag->page; +-#ifdef HAVE_XEN_PAGE_FOREIGN +- netbk->pending_inuse[pending_idx].alloc_time = jiffies; +- list_add_tail(&netbk->pending_inuse[pending_idx].list, +- &netbk->pending_inuse_head); +-#endif + + txp = &netbk->pending_tx_info[pending_idx].req; + frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx)); +@@ -1411,11 +1216,7 @@ static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size) + + static unsigned net_tx_build_gops(struct xen_netbk *netbk) + { +-#ifdef HAVE_XEN_PAGE_FOREIGN +- struct gnttab_map_grant_ref *gop = netbk->tx_map_ops, *request_gop; +-#else + struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop; +-#endif + struct sk_buff *skb; + int ret; + +@@ -1424,9 +1225,7 @@ static unsigned net_tx_build_gops(struct xen_netbk *netbk) + struct xen_netif *netif; + struct xen_netif_tx_request txreq; + struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; +-#ifndef HAVE_XEN_PAGE_FOREIGN + struct page *page; +-#endif + struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; + u16 pending_idx; + RING_IDX idx; +@@ -1523,12 +1322,6 @@ static unsigned net_tx_build_gops(struct xen_netbk *netbk) + } + } + +-#ifdef HAVE_XEN_PAGE_FOREIGN +- gnttab_set_map_op(gop, idx_to_kaddr(netbk, pending_idx), +- GNTMAP_host_map | GNTMAP_readonly, +- txreq.gref, netif->domid); +- gop++; +-#else + /* XXX could copy straight to head */ + page = netif_alloc_page(netbk, skb, pending_idx); + if (!page) { +@@ -1551,7 +1344,6 @@ static unsigned net_tx_build_gops(struct xen_netbk *netbk) + gop->flags = GNTCOPY_source_gref; + + gop++; +-#endif + + memcpy(&netbk->pending_tx_info[pending_idx].req, + &txreq, sizeof(txreq)); +@@ -1586,29 +1378,16 @@ static unsigned net_tx_build_gops(struct xen_netbk *netbk) + netif->tx.req_cons = idx; + netif_schedule_work(netif); + +-#ifdef HAVE_XEN_PAGE_FOREIGN +- if ((gop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops)) +- break; +-#else + if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops)) + break; +-#endif + } + +-#ifdef HAVE_XEN_PAGE_FOREIGN +- return gop - netbk->tx_map_ops; +-#else + return gop - netbk->tx_copy_ops; +-#endif + } + + static void net_tx_submit(struct xen_netbk *netbk) + { +-#ifdef HAVE_XEN_PAGE_FOREIGN +- struct gnttab_map_grant_ref *gop = netbk->tx_map_ops; +-#else + struct gnttab_copy *gop = netbk->tx_copy_ops; +-#endif + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) { +@@ -1674,15 +1453,6 @@ static void net_tx_submit(struct xen_netbk *netbk) + continue; + } + +-#ifdef HAVE_XEN_PAGE_FOREIGN +- if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) && +- unlikely(skb_linearize(skb))) { +- DPRINTK("Can't linearize skb in net_tx_action.\n"); +- kfree_skb(skb); +- continue; +- } +-#endif +- + netif->stats.rx_bytes += skb->len; + netif->stats.rx_packets++; + +@@ -1698,70 +1468,18 @@ static void net_tx_action(unsigned long data) + unsigned nr_gops; + int ret; + +-#ifdef HAVE_XEN_PAGE_FOREIGN +- net_tx_action_dealloc(netbk); +-#endif +- + nr_gops = net_tx_build_gops(netbk); + +-#ifdef HAVE_XEN_PAGE_FOREIGN +- if (nr_gops == 0) +- goto out; +- ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, +- netbk->tx_map_ops, nr_gops); +-#else + if (nr_gops == 0) + return; + ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, + netbk->tx_copy_ops, nr_gops); +-#endif + BUG_ON(ret); + + net_tx_submit(netbk); + +-#ifdef HAVE_XEN_PAGE_FOREIGN +-out: +- if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && +- !list_empty(&netbk->pending_inuse_head)) { +- struct netbk_tx_pending_inuse *oldest; +- +- oldest = list_entry(netbk->pending_inuse_head.next, +- struct netbk_tx_pending_inuse, list); +- mod_timer(&netbk->netbk_tx_pending_timer, +- oldest->alloc_time + HZ); +- } +-#endif + } + +-#ifdef HAVE_XEN_PAGE_FOREIGN +-static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx) +-{ +- static DEFINE_SPINLOCK(_lock); +- unsigned long flags; +- pending_ring_idx_t index; +- +- spin_lock_irqsave(&_lock, flags); +- index = pending_index(netbk->dealloc_prod); +- netbk->dealloc_ring[index] = pending_idx; +- /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */ +- smp_wmb(); +- netbk->dealloc_prod++; +- spin_unlock_irqrestore(&_lock, flags); +- +- xen_netbk_bh_handler(netbk, 0); +-} +- +-static void netif_page_release(struct page *page, unsigned int order) +-{ +- unsigned int group, idx; +- int foreign = netif_get_page_ext(page, &group, &idx); +- +- BUG_ON(!foreign); +- BUG_ON(order); +- +- netif_idx_release(&xen_netbk[group], idx); +-} +-#else + static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx) + { + struct xen_netif *netif; +@@ -1788,7 +1506,6 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx) + put_page(netbk->mmap_pages[pending_idx]); + netbk->mmap_pages[pending_idx] = NULL; + } +-#endif + + irqreturn_t netif_be_int(int irq, void *dev_id) + { +@@ -1906,14 +1623,6 @@ static inline int rx_work_todo(struct xen_netbk *netbk) + + static inline int tx_work_todo(struct xen_netbk *netbk) + { +-#ifdef HAVE_XEN_PAGE_FOREIGN +- if (netbk->dealloc_cons != netbk->dealloc_prod) +- return 1; +- +- if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && +- !list_empty(&netbk->pending_inuse_head)) +- return 1; +-#endif + + if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && + !list_empty(&netbk->net_schedule_list)) +@@ -1948,9 +1657,6 @@ static int netbk_action_thread(void *data) + static int __init netback_init(void) + { + int i; +-#ifdef HAVE_XEN_PAGE_FOREIGN +- struct page *page; +-#endif + int rc = 0; + int group; + +@@ -1974,32 +1680,6 @@ static int __init netback_init(void) + netbk->net_timer.data = (unsigned long)netbk; + netbk->net_timer.function = net_alarm; + +-#ifdef HAVE_XEN_PAGE_FOREIGN +- init_timer(&netbk->netbk_tx_pending_timer); +- netbk->netbk_tx_pending_timer.data = (unsigned long)netbk; +- netbk->netbk_tx_pending_timer.function = +- netbk_tx_pending_timeout; +-#endif +- +-#ifdef HAVE_XEN_PAGE_FOREIGN +- netbk->mmap_pages = +- alloc_empty_pages_and_pagevec(MAX_PENDING_REQS); +- if (!netbk->mmap_pages) { +- printk(KERN_ALERT "%s: out of memory\n", __func__); +- del_timer(&netbk->netbk_tx_pending_timer); +- del_timer(&netbk->net_timer); +- rc = -ENOMEM; +- goto failed_init; +- } +- +- for (i = 0; i < MAX_PENDING_REQS; i++) { +- page = netbk->mmap_pages[i]; +- SetPageForeign(page, netif_page_release); +- netif_set_page_ext(page, netbk, i); +- INIT_LIST_HEAD(&netbk->pending_inuse[i].list); +- } +-#endif +- + netbk->pending_cons = 0; + netbk->pending_prod = MAX_PENDING_REQS; + for (i = 0; i < MAX_PENDING_REQS; i++) +@@ -2017,11 +1697,6 @@ static int __init netback_init(void) + } else { + printk(KERN_ALERT + "kthread_run() fails at netback\n"); +-#ifdef HAVE_XEN_PAGE_FOREIGN +- free_empty_pages_and_pagevec(netbk->mmap_pages, +- MAX_PENDING_REQS); +- del_timer(&netbk->netbk_tx_pending_timer); +-#endif + del_timer(&netbk->net_timer); + rc = PTR_ERR(netbk->kthread.task); + goto failed_init; +@@ -2035,9 +1710,6 @@ static int __init netback_init(void) + (unsigned long)netbk); + } + +-#ifdef HAVE_XEN_PAGE_FOREIGN +- INIT_LIST_HEAD(&netbk->pending_inuse_head); +-#endif + INIT_LIST_HEAD(&netbk->net_schedule_list); + + spin_lock_init(&netbk->net_schedule_list_lock); +@@ -2047,17 +1719,6 @@ static int __init netback_init(void) + wake_up_process(netbk->kthread.task); + } + +-#ifdef HAVE_XEN_PAGE_FOREIGN +- netbk_copy_skb_mode = NETBK_DONT_COPY_SKB; +- if (MODPARM_copy_skb) { +- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, +- NULL, 0)) +- netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB; +- else +- netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB; +- } +-#endif +- + rc = netif_xenbus_init(); + if (rc) + goto failed_init; +@@ -2076,17 +1737,11 @@ static int __init netback_init(void) + failed_init: + for (i = 0; i < group; i++) { + struct xen_netbk *netbk = &xen_netbk[i]; +-#ifdef HAVE_XEN_PAGE_FOREIGN +- free_empty_pages_and_pagevec(netbk->mmap_pages, +- MAX_PENDING_REQS); +- del_timer(&netbk->netbk_tx_pending_timer); +-#else + int j; + for (j = 0; j < MAX_PENDING_REQS; j++) { + if (netbk->mmap_pages[i]) + __free_page(netbk->mmap_pages[i]); + } +-#endif + del_timer(&netbk->net_timer); + if (MODPARM_netback_kthread) + kthread_stop(netbk->kthread.task); +diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c +index f6bf50e..0aa3366 100644 +--- a/drivers/net/xen-netback/xenbus.c ++++ b/drivers/net/xen-netback/xenbus.c +@@ -64,10 +64,6 @@ static int netback_probe(struct xenbus_device *dev, + dev_set_drvdata(&dev->dev, be); + + sg = 1; +-#ifdef HAVE_XEN_PAGE_FOREIGN +- if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) +- sg = 0; +-#endif + + do { + err = xenbus_transaction_start(&xbt); +-- +1.7.3.4 + + +From 7f1492ef6a8026cba4c1b49b7a2030802f76ec83 Mon Sep 17 00:00:00 2001 +From: Ian Campbell +Date: Wed, 19 Jan 2011 14:41:55 +0000 +Subject: [PATCH 125/139] xen: netback: drop ethtool drvinfo callback + +The default provided by the network core is sufficient for our needs. + +Signed-off-by: Ian Campbell +--- + drivers/net/xen-netback/interface.c | 9 --------- + 1 files changed, 0 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c +index eae5cf8..295ab3d 100644 +--- a/drivers/net/xen-netback/interface.c ++++ b/drivers/net/xen-netback/interface.c +@@ -179,16 +179,7 @@ static int netbk_set_tso(struct net_device *dev, u32 data) + return 0; + } + +-static void netbk_get_drvinfo(struct net_device *dev, +- struct ethtool_drvinfo *info) +-{ +- strcpy(info->driver, "netbk"); +- strcpy(info->bus_info, dev_name(dev->dev.parent)); +-} +- + static struct ethtool_ops network_ethtool_ops = { +- .get_drvinfo = netbk_get_drvinfo, +- + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = netbk_set_tx_csum, + .get_sg = ethtool_op_get_sg, +-- +1.7.3.4 + + +From 83381aa69cf38fc26125019479527e0710fe27cd Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Wed, 15 Dec 2010 13:31:03 -0500 +Subject: [PATCH 126/139] ttm: Set VM_IO only on pages with TTM_MEMTYPE_FLAG_FIXED set. + +This patch is based on "[Patch RFC] ttm: nouveau accelerated on Xen +pv-ops kernel" +http://lists.freedesktop.org/archives/nouveau/2010-March/005326.html + +Under Xen, the PFN of page is virtualized. The physical addresses used +for DMA programming needs to be the Machine Frame Number (MFN). +Xen transparently does the correct translation using the _PAGE_IOMEM +PTE bit. If the bit is set, Xen assumes that the backing memory is in +the IOMEM space, and PFN equals MFN. If not set, page_to_pfn() returns +a phantom MFN. + +The patch enables the ttm_bo_vm_fault() handler to behave correctly +under Xen, and has no side-effects on normal (not under Xen) operations. + +The use of TTM_MEMTYPE_FLAG_FIXED in the check assumes that +only pages which have this flag are backed by device memory or IO. + +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Arvind R + +Conflicts: + + drivers/gpu/drm/ttm/ttm_bo_vm.c +--- + drivers/gpu/drm/ttm/ttm_bo_vm.c | 7 ++++++- + 1 files changed, 6 insertions(+), 1 deletions(-) + +diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c +index 221b924..bb24374 100644 +--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c ++++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c +@@ -247,6 +247,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, + { + struct ttm_bo_driver *driver; + struct ttm_buffer_object *bo; ++ struct ttm_mem_type_manager *man; + int ret; + + read_lock(&bdev->vm_lock); +@@ -279,7 +280,11 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, + */ + + vma->vm_private_data = bo; +- vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND; ++ vma->vm_flags |= VM_RESERVED | VM_MIXEDMAP | VM_DONTEXPAND; ++ man = &bdev->man[bo->mem.mem_type]; ++ if (man->flags & TTM_MEMTYPE_FLAG_FIXED) ++ vma->vm_flags |= VM_IO; ++ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + return 0; + out_unref: + ttm_bo_unref(&bo); +-- +1.7.3.4 + + +From 9c2e85765d147fc77ae27cb81a7091942f22a584 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Wed, 15 Dec 2010 13:32:15 -0500 +Subject: [PATCH 127/139] drm: recompute vma->vm_page_prot after changing vm_flags + +vm_get_page_prot() computes vm_page_prot depending on vm_flags, so +we need to re-call it if we change flags. + +Signed-off-by: Jeremy Fitzhardinge + +Conflicts: + + drivers/gpu/drm/ttm/ttm_bo_vm.c +--- + drivers/gpu/drm/ttm/ttm_bo_vm.c | 1 + + 1 files changed, 1 insertions(+), 0 deletions(-) + +diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c +index bb24374..735dc1d 100644 +--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c ++++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c +@@ -300,6 +300,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo) + vma->vm_ops = &ttm_bo_vm_ops; + vma->vm_private_data = ttm_bo_reference(bo); + vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND; ++ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + return 0; + } + EXPORT_SYMBOL(ttm_fbdev_mmap); +-- +1.7.3.4 + + +From 74632f8e51618dc31beba712d03dd0f1168cc241 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Fri, 20 Feb 2009 15:58:42 -0500 +Subject: [PATCH 128/139] x86: define arch_vm_get_page_prot to set _PAGE_IOMAP on VM_IO vmas + +Set _PAGE_IOMAP in ptes mapping a VM_IO vma. This says that the mapping +is of a real piece of physical hardware, and not just system memory. + +Xen, in particular, uses to this to inhibit the normal pfn->mfn conversion +that would normally happen - in other words, treat the address directly +as a machine physical address without converting it from pseudo-physical. + +[ Impact: make VM_IO mappings map the right thing under Xen ] +[ v2: rebased on v2.6.37-rc1] +Signed-off-by: Jeremy Fitzhardinge +Signed-off-by: Konrad Rzeszutek Wilk +--- + arch/x86/include/asm/pgtable.h | 3 +++ + arch/x86/mm/pgtable.c | 10 ++++++++++ + 2 files changed, 13 insertions(+), 0 deletions(-) + +diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h +index 18601c8..284ee01 100644 +--- a/arch/x86/include/asm/pgtable.h ++++ b/arch/x86/include/asm/pgtable.h +@@ -485,6 +485,9 @@ static inline unsigned long pages_to_mb(unsigned long npg) + #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + ++#define arch_vm_get_page_prot arch_vm_get_page_prot ++extern pgprot_t arch_vm_get_page_prot(unsigned vm_flags); ++ + #if PAGETABLE_LEVELS > 2 + static inline int pud_none(pud_t pud) + { +diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c +index 500242d..1e72207 100644 +--- a/arch/x86/mm/pgtable.c ++++ b/arch/x86/mm/pgtable.c +@@ -15,6 +15,16 @@ + + gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP; + ++pgprot_t arch_vm_get_page_prot(unsigned vm_flags) ++{ ++ pgprot_t ret = __pgprot(0); ++ ++ if (vm_flags & VM_IO) ++ ret = __pgprot(_PAGE_IOMAP); ++ ++ return ret; ++} ++ + pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) + { + return (pte_t *)__get_free_page(PGALLOC_GFP); +-- +1.7.3.4 + + +From 81ec0e742ce919124909640039c05baa29b1568a Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Wed, 8 Dec 2010 11:03:27 -0800 +Subject: [PATCH 129/139] mm: remove unused "token" argument from apply_to_page_range callback. + +The argument is basically the struct page of the pte_t * passed into +the callback. But there's no need to pass that, since it can be fairly +easily derived from the pte_t * itself if needed (and no current users +need to do that anyway). + +Signed-off-by: Jeremy Fitzhardinge +--- + arch/x86/xen/grant-table.c | 6 ++---- + arch/x86/xen/mmu.c | 3 +-- + include/linux/mm.h | 3 +-- + mm/memory.c | 2 +- + mm/vmalloc.c | 2 +- + 5 files changed, 6 insertions(+), 10 deletions(-) + +diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c +index 49ba9b5..5bf892a 100644 +--- a/arch/x86/xen/grant-table.c ++++ b/arch/x86/xen/grant-table.c +@@ -44,8 +44,7 @@ + + #include + +-static int map_pte_fn(pte_t *pte, struct page *pmd_page, +- unsigned long addr, void *data) ++static int map_pte_fn(pte_t *pte, unsigned long addr, void *data) + { + unsigned long **frames = (unsigned long **)data; + +@@ -54,8 +53,7 @@ static int map_pte_fn(pte_t *pte, struct page *pmd_page, + return 0; + } + +-static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, +- unsigned long addr, void *data) ++static int unmap_pte_fn(pte_t *pte, unsigned long addr, void *data) + { + + set_pte_at(&init_mm, addr, pte, __pte(0)); +diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c +index 5e92b61..38ba804 100644 +--- a/arch/x86/xen/mmu.c ++++ b/arch/x86/xen/mmu.c +@@ -2292,8 +2292,7 @@ struct remap_data { + struct mmu_update *mmu_update; + }; + +-static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, +- unsigned long addr, void *data) ++static int remap_area_mfn_pte_fn(pte_t *ptep, unsigned long addr, void *data) + { + struct remap_data *rmd = data; + pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 956a355..bb898ec 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1529,8 +1529,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, + #define FOLL_MLOCK 0x40 /* mark page as mlocked */ + #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ + +-typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, +- void *data); ++typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); + extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, + unsigned long size, pte_fn_t fn, void *data); + +diff --git a/mm/memory.c b/mm/memory.c +index 31250fa..740470c 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -2032,7 +2032,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, + token = pmd_pgtable(*pmd); + + do { +- err = fn(pte++, token, addr, data); ++ err = fn(pte++, addr, data); + if (err) + break; + } while (addr += PAGE_SIZE, addr != end); +diff --git a/mm/vmalloc.c b/mm/vmalloc.c +index f9b1667..5ddbdfe 100644 +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -2061,7 +2061,7 @@ void __attribute__((weak)) vmalloc_sync_all(void) + } + + +-static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) ++static int f(pte_t *pte, unsigned long addr, void *data) + { + /* apply_to_page_range() does all the hard work. */ + return 0; +-- +1.7.3.4 + + +From 7f635db45f8e921c9203fdfb904d0095b7af6480 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Tue, 30 Nov 2010 10:03:44 -0800 +Subject: [PATCH 130/139] mm: add apply_to_page_range_batch() + +apply_to_page_range() calls its callback function once for each pte, which +is pretty inefficient since it will almost always be operating on a batch +of adjacent ptes. apply_to_page_range_batch() calls its callback +with both a pte_t * and a count, so it can operate on multiple ptes at +once. + +The callback is expected to handle all its ptes, or return an error. For +both apply_to_page_range and apply_to_page_range_batch, it is up to +the caller to work out how much progress was made if either fails with +an error. + +Signed-off-by: Jeremy Fitzhardinge +--- + include/linux/mm.h | 6 +++++ + mm/memory.c | 57 +++++++++++++++++++++++++++++++++++++-------------- + 2 files changed, 47 insertions(+), 16 deletions(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index bb898ec..5a32a8a 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1533,6 +1533,12 @@ typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); + extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, + unsigned long size, pte_fn_t fn, void *data); + ++typedef int (*pte_batch_fn_t)(pte_t *pte, unsigned count, ++ unsigned long addr, void *data); ++extern int apply_to_page_range_batch(struct mm_struct *mm, ++ unsigned long address, unsigned long size, ++ pte_batch_fn_t fn, void *data); ++ + #ifdef CONFIG_PROC_FS + void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long); + #else +diff --git a/mm/memory.c b/mm/memory.c +index 740470c..496e4e6 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -2012,11 +2012,10 @@ EXPORT_SYMBOL(remap_pfn_range); + + static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, unsigned long end, +- pte_fn_t fn, void *data) ++ pte_batch_fn_t fn, void *data) + { + pte_t *pte; + int err; +- pgtable_t token; + spinlock_t *uninitialized_var(ptl); + + pte = (mm == &init_mm) ? +@@ -2028,25 +2027,17 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, + BUG_ON(pmd_huge(*pmd)); + + arch_enter_lazy_mmu_mode(); +- +- token = pmd_pgtable(*pmd); +- +- do { +- err = fn(pte++, addr, data); +- if (err) +- break; +- } while (addr += PAGE_SIZE, addr != end); +- ++ err = fn(pte, (end - addr) / PAGE_SIZE, addr, data); + arch_leave_lazy_mmu_mode(); + + if (mm != &init_mm) +- pte_unmap_unlock(pte-1, ptl); ++ pte_unmap_unlock(pte, ptl); + return err; + } + + static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, +- pte_fn_t fn, void *data) ++ pte_batch_fn_t fn, void *data) + { + pmd_t *pmd; + unsigned long next; +@@ -2068,7 +2059,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, + + static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, +- pte_fn_t fn, void *data) ++ pte_batch_fn_t fn, void *data) + { + pud_t *pud; + unsigned long next; +@@ -2090,8 +2081,9 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, + * Scan a region of virtual memory, filling in page tables as necessary + * and calling a provided function on each leaf page table. + */ +-int apply_to_page_range(struct mm_struct *mm, unsigned long addr, +- unsigned long size, pte_fn_t fn, void *data) ++int apply_to_page_range_batch(struct mm_struct *mm, ++ unsigned long addr, unsigned long size, ++ pte_batch_fn_t fn, void *data) + { + pgd_t *pgd; + unsigned long next; +@@ -2109,6 +2101,39 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, + + return err; + } ++EXPORT_SYMBOL_GPL(apply_to_page_range_batch); ++ ++struct pte_single_fn ++{ ++ pte_fn_t fn; ++ void *data; ++}; ++ ++static int apply_pte_batch(pte_t *pte, unsigned count, ++ unsigned long addr, void *data) ++{ ++ struct pte_single_fn *single = data; ++ int err = 0; ++ ++ while (count--) { ++ err = single->fn(pte, addr, single->data); ++ if (err) ++ break; ++ ++ addr += PAGE_SIZE; ++ pte++; ++ } ++ ++ return err; ++} ++ ++int apply_to_page_range(struct mm_struct *mm, unsigned long addr, ++ unsigned long size, pte_fn_t fn, void *data) ++{ ++ struct pte_single_fn single = { .fn = fn, .data = data }; ++ return apply_to_page_range_batch(mm, addr, size, ++ apply_pte_batch, &single); ++} + EXPORT_SYMBOL_GPL(apply_to_page_range); + + /* +-- +1.7.3.4 + + +From dea51c0fa49689f6a489205c00ebf83c8e78f6cd Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Mon, 6 Dec 2010 12:26:53 -0800 +Subject: [PATCH 131/139] ioremap: use apply_to_page_range_batch() for ioremap_page_range() + +Signed-off-by: Jeremy Fitzhardinge +--- + lib/ioremap.c | 85 +++++++++++++++------------------------------------------ + 1 files changed, 22 insertions(+), 63 deletions(-) + +diff --git a/lib/ioremap.c b/lib/ioremap.c +index da4e2ad..e75d0d1 100644 +--- a/lib/ioremap.c ++++ b/lib/ioremap.c +@@ -13,81 +13,40 @@ + #include + #include + +-static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, +- unsigned long end, phys_addr_t phys_addr, pgprot_t prot) ++struct ioremap_data + { +- pte_t *pte; ++ phys_addr_t phys_addr; ++ pgprot_t prot; ++}; ++ ++static int ioremap_pte_range(pte_t *pte, unsigned count, ++ unsigned long addr, void *v) ++{ ++ struct ioremap_data *data = v; + u64 pfn; + +- pfn = phys_addr >> PAGE_SHIFT; +- pte = pte_alloc_kernel(pmd, addr); +- if (!pte) +- return -ENOMEM; +- do { +- BUG_ON(!pte_none(*pte)); +- set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot)); +- pfn++; +- } while (pte++, addr += PAGE_SIZE, addr != end); +- return 0; +-} ++ pfn = data->phys_addr >> PAGE_SHIFT; ++ data->phys_addr += count * PAGE_SIZE; + +-static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, +- unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +-{ +- pmd_t *pmd; +- unsigned long next; ++ while (count--) { ++ BUG_ON(!pte_none(*pte)); + +- phys_addr -= addr; +- pmd = pmd_alloc(&init_mm, pud, addr); +- if (!pmd) +- return -ENOMEM; +- do { +- next = pmd_addr_end(addr, end); +- if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) +- return -ENOMEM; +- } while (pmd++, addr = next, addr != end); +- return 0; +-} ++ set_pte_at(&init_mm, addr, pte++, pfn_pte(pfn++, data->prot)); + +-static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, +- unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +-{ +- pud_t *pud; +- unsigned long next; ++ addr += PAGE_SIZE; ++ } + +- phys_addr -= addr; +- pud = pud_alloc(&init_mm, pgd, addr); +- if (!pud) +- return -ENOMEM; +- do { +- next = pud_addr_end(addr, end); +- if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) +- return -ENOMEM; +- } while (pud++, addr = next, addr != end); + return 0; + } + +-int ioremap_page_range(unsigned long addr, +- unsigned long end, phys_addr_t phys_addr, pgprot_t prot) ++int ioremap_page_range(unsigned long addr, unsigned long end, ++ phys_addr_t phys_addr, pgprot_t prot) + { +- pgd_t *pgd; +- unsigned long start; +- unsigned long next; +- int err; +- +- BUG_ON(addr >= end); +- +- start = addr; +- phys_addr -= addr; +- pgd = pgd_offset_k(addr); +- do { +- next = pgd_addr_end(addr, end); +- err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot); +- if (err) +- break; +- } while (pgd++, addr = next, addr != end); ++ struct ioremap_data data = { .phys_addr = phys_addr, .prot = prot }; ++ int err = apply_to_page_range_batch(&init_mm, addr, end - addr, ++ ioremap_pte_range, &data); + +- flush_cache_vmap(start, end); ++ flush_cache_vmap(addr, end); + + return err; + } +-- +1.7.3.4 + + +From 7a064a31021ba0b4adfc90061d7da2daa9b3d27e Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Mon, 29 Nov 2010 12:22:24 -0800 +Subject: [PATCH 132/139] vmalloc: use plain pte_clear() for unmaps + +ptep_get_and_clear() is potentially moderately expensive (at least +an atomic operation, or potentially a trap-and-fault when virtualized) +so use a plain pte_clear(). + +Signed-off-by: Jeremy Fitzhardinge +--- + mm/vmalloc.c | 3 ++- + 1 files changed, 2 insertions(+), 1 deletions(-) + +diff --git a/mm/vmalloc.c b/mm/vmalloc.c +index 5ddbdfe..c06dc1e 100644 +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -39,8 +39,9 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) + + pte = pte_offset_kernel(pmd, addr); + do { +- pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte); ++ pte_t ptent = *pte; + WARN_ON(!pte_none(ptent) && !pte_present(ptent)); ++ pte_clear(&init_mm, addr, pte); + } while (pte++, addr += PAGE_SIZE, addr != end); + } + +-- +1.7.3.4 + + +From 334c14835ef823ce665eeebf6aad467064f47e47 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Mon, 29 Nov 2010 11:06:19 -0800 +Subject: [PATCH 133/139] vmalloc: use apply_to_page_range_batch() for vunmap_page_range() + +There's no need to open-code it when there's helpful utility function +to do the job. + +Signed-off-by: Jeremy Fitzhardinge +Cc: Nick Piggin +--- + mm/vmalloc.c | 53 +++++++++-------------------------------------------- + 1 files changed, 9 insertions(+), 44 deletions(-) + +diff --git a/mm/vmalloc.c b/mm/vmalloc.c +index c06dc1e..e99aa3b 100644 +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -33,59 +33,24 @@ + + /*** Page table manipulation functions ***/ + +-static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) ++static int vunmap_pte(pte_t *pte, unsigned count, ++ unsigned long addr, void *data) + { +- pte_t *pte; +- +- pte = pte_offset_kernel(pmd, addr); +- do { ++ while (count--) { + pte_t ptent = *pte; +- WARN_ON(!pte_none(ptent) && !pte_present(ptent)); +- pte_clear(&init_mm, addr, pte); +- } while (pte++, addr += PAGE_SIZE, addr != end); +-} +- +-static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end) +-{ +- pmd_t *pmd; +- unsigned long next; + +- pmd = pmd_offset(pud, addr); +- do { +- next = pmd_addr_end(addr, end); +- if (pmd_none_or_clear_bad(pmd)) +- continue; +- vunmap_pte_range(pmd, addr, next); +- } while (pmd++, addr = next, addr != end); +-} ++ WARN_ON(!pte_none(ptent) && !pte_present(ptent)); + +-static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) +-{ +- pud_t *pud; +- unsigned long next; ++ pte_clear(&init_mm, addr, pte++); ++ addr += PAGE_SIZE; ++ } + +- pud = pud_offset(pgd, addr); +- do { +- next = pud_addr_end(addr, end); +- if (pud_none_or_clear_bad(pud)) +- continue; +- vunmap_pmd_range(pud, addr, next); +- } while (pud++, addr = next, addr != end); ++ return 0; + } + + static void vunmap_page_range(unsigned long addr, unsigned long end) + { +- pgd_t *pgd; +- unsigned long next; +- +- BUG_ON(addr >= end); +- pgd = pgd_offset_k(addr); +- do { +- next = pgd_addr_end(addr, end); +- if (pgd_none_or_clear_bad(pgd)) +- continue; +- vunmap_pud_range(pgd, addr, next); +- } while (pgd++, addr = next, addr != end); ++ apply_to_page_range_batch(&init_mm, addr, end - addr, vunmap_pte, NULL); + } + + static int vmap_pte_range(pmd_t *pmd, unsigned long addr, +-- +1.7.3.4 + + +From 937b74f8d19f7e62d63d4e82c2cf21f3bd636d9e Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Mon, 29 Nov 2010 11:11:45 -0800 +Subject: [PATCH 134/139] vmalloc: use apply_to_page_range_batch() for vmap_page_range_noflush() + +There's no need to open-code it when there's a helpful utility +function. + +Signed-off-by: Jeremy Fitzhardinge +Cc: Nick Piggin +--- + mm/vmalloc.c | 92 ++++++++++++++++++--------------------------------------- + 1 files changed, 29 insertions(+), 63 deletions(-) + +diff --git a/mm/vmalloc.c b/mm/vmalloc.c +index e99aa3b..cf4e705 100644 +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -53,63 +53,34 @@ static void vunmap_page_range(unsigned long addr, unsigned long end) + apply_to_page_range_batch(&init_mm, addr, end - addr, vunmap_pte, NULL); + } + +-static int vmap_pte_range(pmd_t *pmd, unsigned long addr, +- unsigned long end, pgprot_t prot, struct page **pages, int *nr) ++struct vmap_data + { +- pte_t *pte; ++ struct page **pages; ++ unsigned index; ++ pgprot_t prot; ++}; + +- /* +- * nr is a running index into the array which helps higher level +- * callers keep track of where we're up to. +- */ ++static int vmap_pte(pte_t *pte, unsigned count, ++ unsigned long addr, void *data) ++{ ++ struct vmap_data *vmap = data; + +- pte = pte_alloc_kernel(pmd, addr); +- if (!pte) +- return -ENOMEM; +- do { +- struct page *page = pages[*nr]; ++ while (count--) { ++ struct page *page = vmap->pages[vmap->index]; + + if (WARN_ON(!pte_none(*pte))) + return -EBUSY; ++ + if (WARN_ON(!page)) + return -ENOMEM; +- set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); +- (*nr)++; +- } while (pte++, addr += PAGE_SIZE, addr != end); +- return 0; +-} + +-static int vmap_pmd_range(pud_t *pud, unsigned long addr, +- unsigned long end, pgprot_t prot, struct page **pages, int *nr) +-{ +- pmd_t *pmd; +- unsigned long next; +- +- pmd = pmd_alloc(&init_mm, pud, addr); +- if (!pmd) +- return -ENOMEM; +- do { +- next = pmd_addr_end(addr, end); +- if (vmap_pte_range(pmd, addr, next, prot, pages, nr)) +- return -ENOMEM; +- } while (pmd++, addr = next, addr != end); +- return 0; +-} ++ set_pte_at(&init_mm, addr, pte, mk_pte(page, vmap->prot)); + +-static int vmap_pud_range(pgd_t *pgd, unsigned long addr, +- unsigned long end, pgprot_t prot, struct page **pages, int *nr) +-{ +- pud_t *pud; +- unsigned long next; ++ pte++; ++ addr += PAGE_SIZE; ++ vmap->index++; ++ } + +- pud = pud_alloc(&init_mm, pgd, addr); +- if (!pud) +- return -ENOMEM; +- do { +- next = pud_addr_end(addr, end); +- if (vmap_pmd_range(pud, addr, next, prot, pages, nr)) +- return -ENOMEM; +- } while (pud++, addr = next, addr != end); + return 0; + } + +@@ -122,22 +93,17 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr, + static int vmap_page_range_noflush(unsigned long start, unsigned long end, + pgprot_t prot, struct page **pages) + { +- pgd_t *pgd; +- unsigned long next; +- unsigned long addr = start; +- int err = 0; +- int nr = 0; +- +- BUG_ON(addr >= end); +- pgd = pgd_offset_k(addr); +- do { +- next = pgd_addr_end(addr, end); +- err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); +- if (err) +- return err; +- } while (pgd++, addr = next, addr != end); +- +- return nr; ++ int err; ++ struct vmap_data vmap = { ++ .pages = pages, ++ .index = 0, ++ .prot = prot ++ }; ++ ++ err = apply_to_page_range_batch(&init_mm, start, end - start, ++ vmap_pte, &vmap); ++ ++ return err ? err : vmap.index; + } + + static int vmap_page_range(unsigned long start, unsigned long end, +-- +1.7.3.4 + + +From d4205306bb6609275ad93a8d1bfb4de3d06d0eb5 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Wed, 1 Dec 2010 15:45:21 -0800 +Subject: [PATCH 135/139] vmalloc: use apply_to_page_range_batch() in alloc_vm_area() + +Signed-off-by: Jeremy Fitzhardinge +--- + mm/vmalloc.c | 8 ++++---- + 1 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/mm/vmalloc.c b/mm/vmalloc.c +index cf4e705..64d395f 100644 +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -1993,9 +1993,9 @@ void __attribute__((weak)) vmalloc_sync_all(void) + } + + +-static int f(pte_t *pte, unsigned long addr, void *data) ++static int f(pte_t *pte, unsigned count, unsigned long addr, void *data) + { +- /* apply_to_page_range() does all the hard work. */ ++ /* apply_to_page_range_batch() does all the hard work. */ + return 0; + } + +@@ -2024,8 +2024,8 @@ struct vm_struct *alloc_vm_area(size_t size) + * This ensures that page tables are constructed for this region + * of kernel virtual address space and mapped into init_mm. + */ +- if (apply_to_page_range(&init_mm, (unsigned long)area->addr, +- area->size, f, NULL)) { ++ if (apply_to_page_range_batch(&init_mm, (unsigned long)area->addr, ++ area->size, f, NULL)) { + free_vm_area(area); + return NULL; + } +-- +1.7.3.4 + + +From e35361f09bf25ecb5ba6877e44319de315b76f5e Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Wed, 1 Dec 2010 15:44:04 -0800 +Subject: [PATCH 136/139] xen/mmu: use apply_to_page_range_batch() in xen_remap_domain_mfn_range() + +Signed-off-by: Jeremy Fitzhardinge +--- + arch/x86/xen/mmu.c | 19 ++++++++++++------- + 1 files changed, 12 insertions(+), 7 deletions(-) + +diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c +index 38ba804..25da278 100644 +--- a/arch/x86/xen/mmu.c ++++ b/arch/x86/xen/mmu.c +@@ -2292,14 +2292,19 @@ struct remap_data { + struct mmu_update *mmu_update; + }; + +-static int remap_area_mfn_pte_fn(pte_t *ptep, unsigned long addr, void *data) ++static int remap_area_mfn_pte_fn(pte_t *ptep, unsigned count, ++ unsigned long addr, void *data) + { + struct remap_data *rmd = data; +- pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); + +- rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr; +- rmd->mmu_update->val = pte_val_ma(pte); +- rmd->mmu_update++; ++ while (count--) { ++ pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); ++ ++ rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr; ++ rmd->mmu_update->val = pte_val_ma(pte); ++ rmd->mmu_update++; ++ ptep++; ++ } + + return 0; + } +@@ -2328,8 +2333,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, + range = (unsigned long)batch << PAGE_SHIFT; + + rmd.mmu_update = mmu_update; +- err = apply_to_page_range(vma->vm_mm, addr, range, +- remap_area_mfn_pte_fn, &rmd); ++ err = apply_to_page_range_batch(vma->vm_mm, addr, range, ++ remap_area_mfn_pte_fn, &rmd); + if (err) + goto out; + +-- +1.7.3.4 + + +From 02533b01d70f7cbbe3cf47de3f27740ab334a11f Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Wed, 1 Dec 2010 15:50:12 -0800 +Subject: [PATCH 137/139] xen/grant-table: use apply_to_page_range_batch() + +No need to call the callback per-pte. + +Signed-off-by: Jeremy Fitzhardinge +--- + arch/x86/xen/grant-table.c | 28 ++++++++++++++++++---------- + 1 files changed, 18 insertions(+), 10 deletions(-) + +diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c +index 5bf892a..11a8a45 100644 +--- a/arch/x86/xen/grant-table.c ++++ b/arch/x86/xen/grant-table.c +@@ -44,19 +44,27 @@ + + #include + +-static int map_pte_fn(pte_t *pte, unsigned long addr, void *data) ++static int map_pte_fn(pte_t *pte, unsigned count, unsigned long addr, void *data) + { + unsigned long **frames = (unsigned long **)data; + +- set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); +- (*frames)++; ++ while (count--) { ++ set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); ++ (*frames)++; ++ pte++; ++ addr += PAGE_SIZE; ++ } + return 0; + } + +-static int unmap_pte_fn(pte_t *pte, unsigned long addr, void *data) ++static int unmap_pte_fn(pte_t *pte, unsigned count, unsigned long addr, void *data) + { ++ while (count--) { ++ pte_clear(&init_mm, addr, pte); ++ addr += PAGE_SIZE; ++ pte++; ++ } + +- set_pte_at(&init_mm, addr, pte, __pte(0)); + return 0; + } + +@@ -75,15 +83,15 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, + *__shared = shared; + } + +- rc = apply_to_page_range(&init_mm, (unsigned long)shared, +- PAGE_SIZE * nr_gframes, +- map_pte_fn, &frames); ++ rc = apply_to_page_range_batch(&init_mm, (unsigned long)shared, ++ PAGE_SIZE * nr_gframes, ++ map_pte_fn, &frames); + return rc; + } + + void arch_gnttab_unmap_shared(struct grant_entry *shared, + unsigned long nr_gframes) + { +- apply_to_page_range(&init_mm, (unsigned long)shared, +- PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); ++ apply_to_page_range_batch(&init_mm, (unsigned long)shared, ++ PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); + } +-- +1.7.3.4 + + +From cb3172f5566fe75b749b0873deedc42687c39064 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Wed, 19 Jan 2011 18:41:03 -0500 +Subject: [PATCH 138/139] x86/nx: Made .bss be HPAGE_ALIGNED. + +That makes it boot under Xen. +--- + arch/x86/kernel/vmlinux.lds.S | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S +index b34ab80..e37d10f 100644 +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -341,7 +341,7 @@ SECTIONS + #endif + + /* BSS */ +- . = ALIGN(PAGE_SIZE); ++ . = ALIGN(HPAGE_SIZE); + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + __bss_start = .; + *(.bss..page_aligned) +-- +1.7.3.4 + + +From 8751f3b0fd2ca59c410052d1faecc2297bb91d62 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Mon, 24 Jan 2011 17:25:29 -0800 +Subject: [PATCH 139/139] xen/gntdev: remove token argument from find_grant_ptes + +Signed-off-by: Jeremy Fitzhardinge +--- + drivers/xen/gntdev.c | 3 +-- + 1 files changed, 1 insertions(+), 2 deletions(-) + +diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c +index 1e31cdc..2b777c0 100644 +--- a/drivers/xen/gntdev.c ++++ b/drivers/xen/gntdev.c +@@ -226,8 +226,7 @@ static void gntdev_free_map(struct grant_map *map) + + /* ------------------------------------------------------------------ */ + +-static int find_grant_ptes(pte_t *pte, pgtable_t token, +- unsigned long addr, void *data) ++static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) + { + struct grant_map *map = data; + unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; +-- +1.7.3.4 + diff --git a/xen.pvhvm.fixes.patch b/xen.pvhvm.fixes.patch new file mode 100644 index 0000000..cf6dc95 --- /dev/null +++ b/xen.pvhvm.fixes.patch @@ -0,0 +1,197 @@ +From bb89b5a4444a7514f3cf6cadb4f613832d7a9887 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Fri, 14 Jan 2011 17:55:44 -0500 +Subject: [PATCH 1/4] m2p: No need to catch exceptions when we know that there is no RAM + +.. beyound what we think is the end of memory. However there might +be more System RAM - but assigned to a guest. Hence jump to the +M2P override check and consult. + +Signed-off-by: Konrad Rzeszutek Wilk +--- + arch/x86/include/asm/xen/page.h | 6 +++++- + 1 files changed, 5 insertions(+), 1 deletions(-) + +diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h +index f25bdf2..74a8559 100644 +--- a/arch/x86/include/asm/xen/page.h ++++ b/arch/x86/include/asm/xen/page.h +@@ -77,6 +77,10 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) + if (xen_feature(XENFEAT_auto_translated_physmap)) + return mfn; + ++ if (unlikely((mfn >> machine_to_phys_order) != 0)) { ++ pfn = ~0; ++ goto try_override; ++ } + pfn = 0; + /* + * The array access can fail (e.g., device space beyond end of RAM). +@@ -84,7 +88,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) + * but we must handle the fault without crashing! + */ + __get_user(pfn, &machine_to_phys_mapping[mfn]); +- ++try_override: + /* + * If this appears to be a foreign mfn (because the pfn + * doesn't map back to the mfn), then check the local override +-- +1.7.3.4 + + +From a7cdabcd381dffb5db72a31c78b65a2bcdec2a04 Mon Sep 17 00:00:00 2001 +From: Stefan Bader +Date: Thu, 20 Jan 2011 15:19:46 +0000 +Subject: [PATCH 2/4] xen: p2m: correctly initialize partial p2m leave + +After changing the p2m mapping to a tree by + + commit 58e05027b530ff081ecea68e38de8d59db8f87e0 + xen: convert p2m to a 3 level tree + +and trying to boot a DomU with 615MB of memory, the following crash was +observed in the dump: + +kernel direct mapping tables up to 26f00000 @ 1ec4000-1fff000 +BUG: unable to handle kernel NULL pointer dereference at (null) +IP: [] xen_set_pte+0x27/0x60 +*pdpt = 0000000000000000 *pde = 0000000000000000 + +Adding further debug statements showed that when trying to set up +pfn=0x26700 the returned mapping was invalid. + +pfn=0x266ff calling set_pte(0xc1fe77f8, 0x6b3003) +pfn=0x26700 calling set_pte(0xc1fe7800, 0x3) + +Although the last_pfn obtained from the startup info is 0x26700, which +should in turn not be hit, the additional 8MB which are added as extra +memory normally seem to be ok. This lead to looking into the initial +p2m tree construction, which uses the smaller value and assuming that +there is other code handling the extra memory. + +When the p2m tree is set up, the leaves are directly pointed to the +array which the domain builder set up. But if the mapping is not on a +boundary that fits into one p2m page, this will result in the last leaf +being only partially valid. And as the invalid entries are not +initialized in that case, things go badly wrong. + +I am trying to fix that by checking whether the current leaf is a +complete map and if not, allocate a completely new page and copy only +the valid pointers there. This may not be the most efficient or elegant +solution, but at least it seems to allow me booting DomUs with memory +assignments all over the range. + +Signed-off-by: Stefan Bader +--- + arch/x86/xen/p2m.c | 20 +++++++++++++++++++- + 1 files changed, 19 insertions(+), 1 deletions(-) + +diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c +index 8f2251d..c9307ec 100644 +--- a/arch/x86/xen/p2m.c ++++ b/arch/x86/xen/p2m.c +@@ -237,7 +237,25 @@ void __init xen_build_dynamic_phys_to_machine(void) + p2m_top[topidx] = mid; + } + +- p2m_top[topidx][mididx] = &mfn_list[pfn]; ++ /* ++ * As long as the mfn_list has enough entries to completely ++ * fill a p2m page, pointing into the array is ok. But if ++ * not the entries beyond the last pfn will be undefined. ++ * And guessing that the 'what-ever-there-is' does not take it ++ * too kindly when changing it to invalid markers, a new page ++ * is allocated, initialized and filled with the valid part. ++ */ ++ if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) { ++ unsigned long p2midx; ++ unsigned long **p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); ++ p2m_init(p2m); ++ ++ for (p2midx = 0; pfn + p2midx < max_pfn; p2midx++) { ++ p2m[p2midx] = mfn_list[pfn + p2midx]; ++ } ++ p2m_top[topidx][mididx] = p2m; ++ } else ++ p2m_top[topidx][mididx] = &mfn_list[pfn]; + } + + m2p_override_init(); +-- +1.7.3.4 + + +From b84683ad1e704c2a296d08ff0cbe29db936f94a7 Mon Sep 17 00:00:00 2001 +From: Stefano Stabellini +Date: Tue, 25 Jan 2011 12:03:42 +0000 +Subject: [PATCH 3/4] xen: make sure the e820 memory regions end at page boundary + +Signed-off-by: M A Young +Signed-off-by: Stefano Stabellini +--- + arch/x86/xen/setup.c | 5 ++++- + 1 files changed, 4 insertions(+), 1 deletions(-) + +diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c +index b5a7f92..a3d28a1 100644 +--- a/arch/x86/xen/setup.c ++++ b/arch/x86/xen/setup.c +@@ -179,7 +179,10 @@ char * __init xen_memory_setup(void) + e820.nr_map = 0; + xen_extra_mem_start = mem_end; + for (i = 0; i < memmap.nr_entries; i++) { +- unsigned long long end = map[i].addr + map[i].size; ++ unsigned long long end; ++ if (map[i].type == E820_RAM) ++ map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE; ++ end = map[i].addr + map[i].size; + + if (map[i].type == E820_RAM && end > mem_end) { + /* RAM off the end - may be partially included */ +-- +1.7.3.4 + + +From daed469760dabbf6ae81c9869a263535fb587e63 Mon Sep 17 00:00:00 2001 +From: Stefano Stabellini +Date: Tue, 25 Jan 2011 12:05:11 +0000 +Subject: [PATCH 4/4] When destroying mappings between _brk_end and _end, do not go over _end + +Signed-off-by: Stefano Stabellini +--- + arch/x86/mm/init.c | 15 ++++++++++++++- + 1 files changed, 14 insertions(+), 1 deletions(-) + +diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c +index 947f42a..ebc0221 100644 +--- a/arch/x86/mm/init.c ++++ b/arch/x86/mm/init.c +@@ -291,10 +291,23 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, + * located on different 2M pages. cleanup_highmap(), however, + * can only consider _end when it runs, so destroy any + * mappings beyond _brk_end here. ++ * Be careful not to go over _end. + */ + pud = pud_offset(pgd_offset_k(_brk_end), _brk_end); + pmd = pmd_offset(pud, _brk_end - 1); +- while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1)) ++ while (++pmd < pmd_offset(pud, (unsigned long)_end - 1)) ++ pmd_clear(pmd); ++ if (((unsigned long)_end) & ~PMD_MASK) { ++ pte_t *pte; ++ unsigned long addr; ++ for (addr = ((unsigned long)_end) & PMD_MASK; ++ addr < ((unsigned long)_end); ++ addr += PAGE_SIZE) { ++ pte = pte_offset_map(pmd, addr); ++ pte_clear(&init_mm, addr, pte); ++ pte_unmap(pte); ++ } ++ } else + pmd_clear(pmd); + } + #endif +-- +1.7.3.4 +