diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/conf/files head.xen/sys/conf/files --- head.moves/sys/conf/files 2010-09-16 16:31:06.481640000 -0600 +++ head.xen/sys/conf/files 2010-09-16 16:49:09.603370866 -0600 @@ -2994,19 +2994,20 @@ xen/features.c optional xen | xenhvm xen/evtchn/evtchn.c optional xen xen/evtchn/evtchn_dev.c optional xen | xenhvm -xen/reboot.c optional xen -xen/xenbus/xenbus_client.c optional xen | xenhvm -xen/xenbus/xenbus_comms.c optional xen | xenhvm -xen/xenbus/xenbus_dev.c optional xen | xenhvm xen/xenbus/xenbus_if.m optional xen | xenhvm -xen/xenbus/xenbus_probe.c optional xen | xenhvm -#xen/xenbus/xenbus_probe_backend.c optional xen -xen/xenbus/xenbus_xs.c optional xen | xenhvm +xen/xenbus/xenbus.c optional xen | xenhvm +xen/xenbus/xenbusb_if.m optional xen | xenhvm +xen/xenbus/xenbusb.c optional xen | xenhvm +xen/xenbus/xenbusb_front.c optional xen | xenhvm +xen/xenbus/xenbusb_back.c optional xen | xenhvm +xen/xenstore/xenstore.c optional xen | xenhvm +xen/xenstore/xenstore_dev.c optional xen | xenhvm dev/xen/balloon/balloon.c optional xen | xenhvm +dev/xen/blkfront/blkfront.c optional xen | xenhvm +dev/xen/blkback/blkback.c optional xen | xenhvm dev/xen/console/console.c optional xen dev/xen/console/xencons_ring.c optional xen -dev/xen/blkfront/blkfront.c optional xen | xenhvm +dev/xen/control/control.c optional xen | xenhvm dev/xen/netfront/netfront.c optional xen | xenhvm dev/xen/xenpci/xenpci.c optional xenpci dev/xen/xenpci/evtchn.c optional xenpci -dev/xen/xenpci/machine_reboot.c optional xenpci diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/dev/xen/balloon/balloon.c head.xen/sys/dev/xen/balloon/balloon.c --- head.moves/sys/dev/xen/balloon/balloon.c 2010-09-16 16:31:08.610304000 -0600 +++ head.xen/sys/dev/xen/balloon/balloon.c 2010-09-16 16:49:09.608508916 -0600 @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include @@ -406,20 +406,20 @@ wakeup(balloon_process); } -static struct xenbus_watch target_watch = +static struct xs_watch target_watch = { .node = "memory/target" }; /* React to a change in the target key */ static void -watch_target(struct xenbus_watch *watch, +watch_target(struct xs_watch *watch, const char **vec, unsigned int len) { unsigned long long new_target; int err; - err = xenbus_scanf(XBT_NIL, "memory", "target", NULL, + err = xs_scanf(XST_NIL, "memory", "target", NULL, "%llu", &new_target); if (err) { /* This is ok (for domain0 at least) - so just return */ @@ -438,7 +438,7 @@ { int err; - err = register_xenbus_watch(&target_watch); + err = xs_register_watch(&target_watch); if (err) printf("Failed to set balloon watcher\n"); diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/dev/xen/blkback/blkback.c head.xen/sys/dev/xen/blkback/blkback.c --- head.moves/sys/dev/xen/blkback/blkback.c 2010-09-16 16:31:08.622890000 -0600 +++ head.xen/sys/dev/xen/blkback/blkback.c 2010-09-16 16:49:09.616667886 -0600 @@ -1,1191 +1,2361 @@ -/* - * Copyright (c) 2006, Cisco Systems, Inc. +/*- + * Copyright (c) 2009-2010 Justin T. Gibbs, Spectra Logic Corporation * All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. */ - #include __FBSDID("$FreeBSD: head/sys/dev/xen/blkback/blkback.c 196403 2009-08-20 19:17:53Z jhb $"); +/** + * \file blkback.c + * + * \brief Device driver supporting the vending of block storage from + * a FreeBSD domain to other domains. + */ + #include #include -#include -#include #include -#include -#include -#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include +#include +#include +#include #include -#include -#include -#include - -#include -#include -#include +#include #include +#include +#include + +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include -#if XEN_BLKBACK_DEBUG +#include + +MALLOC_DEFINE(M_XENBLOCKBACK, "xbbd", "Xen Block Back driver data"); + +/* Required until HVM grant table mappings also update the IOMMU */ +#define USE_BOUNCE_BUFFERS + +/*#define XEN_BLKBACK_DEBUG */ + +#ifdef XEN_BLKBACK_DEBUG #define DPRINTF(fmt, args...) \ - printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) + printf("xbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) #else -#define DPRINTF(fmt, args...) ((void)0) +#define DPRINTF(fmt, args...) do {} while(0) #endif -#define WPRINTF(fmt, args...) \ - printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) +#define BLKBACK_INVALID_HANDLE (~0) -#define BLKBACK_INVALID_HANDLE (~0) +/** + * The maximum number of outstanding requests blocks (request headers plus + * additional segment blocks) we will allow in a negotiated block-front/back + * communication channel. + */ +#define XBB_MAX_REQUESTS 256 -struct ring_ref { - vm_offset_t va; - grant_handle_t handle; - uint64_t bus_addr; -}; +/** + * The maximum mapped region size per request we will allow in a negotiated + * block-front/back communication channel. + */ +#define XBB_MAX_REQUEST_SIZE \ + MIN(MAXPHYS, BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) -typedef struct blkback_info { +/** + * The maximum number of segments (within a request header and accompanying + * segment blocks) per request we will allow in a negotiated block-front/back + * communication channel. + */ +#define XBB_MAX_SEGMENTS_PER_REQUEST \ + (MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \ + (XBB_MAX_REQUEST_SIZE / PAGE_SIZE) + 1)) + +/** + * The maximum number of shared memory ring pages we will allow in a + * negotiated block-front/back communication channel. Allow enough + * ring space for all requests to be XBB_MAX_REQUEST_SIZE'd. + */ +#define XBB_MAX_RING_PAGES \ + BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBB_MAX_SEGMENTS_PER_REQUEST) \ + * XBB_MAX_REQUESTS) + +struct xbb_softc; +static int test_generated_data; +static int dont_perform_io = 0; - /* Schedule lists */ - STAILQ_ENTRY(blkback_info) next_req; - int on_req_sched_list; - - struct xenbus_device *xdev; - XenbusState frontend_state; - - domid_t domid; - - int state; - int ring_connected; - struct ring_ref rr; - blkif_back_ring_t ring; - evtchn_port_t evtchn; - int irq; - void *irq_cookie; - - int ref_cnt; - - int handle; - char *mode; - char *type; - char *dev_name; - - struct vnode *vn; - struct cdev *cdev; - struct cdevsw *csw; - u_int sector_size; - int sector_size_shift; - off_t media_size; - u_int media_num_sectors; - int major; - int minor; - int read_only; +static char dump_buf[128 * 1024]; +static char *dump_ptr; - struct mtx blk_ring_lock; +static void xbb_attach_failed(struct xbb_softc *xbb, int err, const char *fmt, + ...) __attribute__((format(printf, 3, 4))); - device_t ndev; +/** + * \brief Object tracking an in-flight I/O from a Xen VBD consumer. + */ +struct xbb_xen_req { + SLIST_ENTRY(xbb_xen_req) links; + struct xbb_softc *xbb; + uint64_t id; + uint8_t *kva; +#ifdef USE_BOUNCE_BUFFERS + uint8_t *bounce; +#endif + uint64_t gnt_base; + int nr_pages; + int pendcnt; + int operation; + int status; + grant_handle_t *gnt_handles; +}; +SLIST_HEAD(xbb_xen_req_slist, xbb_xen_req); - /* Stats */ - int st_rd_req; - int st_wr_req; - int st_oo_req; - int st_err_req; -} blkif_t; +struct xbb_ring_config { + vm_offset_t va; + uint64_t gnt_addr; + grant_handle_t handle[XBB_MAX_RING_PAGES]; + uint64_t bus_addr[XBB_MAX_RING_PAGES]; + uint32_t ring_pages; + grant_ref_t ring_ref[XBB_MAX_RING_PAGES]; + evtchn_port_t evtchn; +}; -/* - * These are rather arbitrary. They are fairly large because adjacent requests - * pulled from a communication ring are quite likely to end up being part of - * the same scatter/gather request at the disc. - * - * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** - * - * This will increase the chances of being able to write whole tracks. - * 64 should be enough to keep us competitive with Linux. - */ -static int blkif_reqs = 64; -TUNABLE_INT("xen.vbd.blkif_reqs", &blkif_reqs); +typedef enum +{ + XBBF_READ_ONLY = 0x01, + XBBF_RING_CONNECTED = 0x02, + XBBF_RESOURCE_SHORTAGE = 0x04, + XBBF_SHUTDOWN = 0x08 +} xbb_flag_t; + +typedef enum { + XBB_TYPE_NONE = 0x00, + XBB_TYPE_DISK = 0x01, + XBB_TYPE_FILE = 0x02 +} xbb_type; + +struct xbb_sg_state { + int16_t nsect; + uint8_t first_sect; + uint8_t last_sect; +}; -static int mmap_pages; +typedef int (*xbb_dispatch_t)(struct xbb_softc *xbb, blkif_request_t *ring_req, + struct xbb_xen_req *req, + struct blkif_request_segment *sg, + struct xbb_sg_state *sg_states, int nseg, + int operation, int flags); /* - * Each outstanding request that we've passed to the lower device layers has a - * 'pending_req' allocated to it. Each buffer_head that completes decrements - * the pendcnt towards zero. When it hits zero, the specified domain has a - * response queued for it, with the saved 'id' passed back. + * Make sure we can't have more iovecs than allowed in a uio. In practice + * we shouldn't be very close. */ -typedef struct pending_req { - blkif_t *blkif; - uint64_t id; - int nr_pages; - int pendcnt; - unsigned short operation; - int status; - STAILQ_ENTRY(pending_req) free_list; -} pending_req_t; +#if (XBB_MAX_SEGMENTS_PER_REQUEST > UIO_MAXIOV) +#error "XBB_MAX_SEGMENTS_PER_REQUEST too large, need to fix xbb_dispatch_file()" +#endif -static pending_req_t *pending_reqs; -static STAILQ_HEAD(pending_reqs_list, pending_req) pending_free = - STAILQ_HEAD_INITIALIZER(pending_free); -static struct mtx pending_free_lock; -static STAILQ_HEAD(blkback_req_sched_list, blkback_info) req_sched_list = - STAILQ_HEAD_INITIALIZER(req_sched_list); -static struct mtx req_sched_list_lock; +struct xbb_softc { -static unsigned long mmap_vstart; -static unsigned long *pending_vaddrs; -static grant_handle_t *pending_grant_handles; + /* Thread based taskqueue support.*/ + struct taskqueue *io_taskqueue; + struct task io_task; + + xbb_type device_type; + device_t dev; + struct ucred *cred; + xbb_dispatch_t dispatch_io; + + u_int active_request_count; + struct xbb_xen_req_slist request_free_slist; + struct xbb_xen_req *requests; + vm_offset_t kva; + uint64_t gnt_base_addr; + int kva_size; + + /** + * ivar access isn't super cheap. So we cache some + * information in our softc. + */ + domid_t otherend_id; + + /** + * The protocol abi in effect. + */ + int abi; + + /** + * The maximum number of requests allowed to be in + * flight at a time. + */ + uint32_t max_requests; + + /** + * The maximum number of segments (1 page per segment) that can + * be mapped by a request. + */ + uint32_t max_request_segments; + + /** + * The maximum size of any request to this back-end + * device. + */ + uint32_t max_request_size; + + /* + * Various configuration and state bit flags. + */ + xbb_flag_t flags; + + struct xbb_ring_config ring_config; + blkif_back_rings_t rings; + evtchn_port_t evtchn; + int irq; + + char *dev_mode; + char *dev_type; + char *dev_name; + int dev_ref; + + struct vnode *vn; + struct cdev *cdev; + struct cdevsw *csw; + u_int sector_size; + u_int sector_size_shift; + off_t media_size; + uint64_t media_num_sectors; + int major; + int minor; + struct xbb_sg_state sg_states[XBB_MAX_SEGMENTS_PER_REQUEST]; + struct iovec xiovecs[XBB_MAX_SEGMENTS_PER_REQUEST]; +#ifdef USE_BOUNCE_BUFFERS + struct iovec saved_xiovecs[XBB_MAX_SEGMENTS_PER_REQUEST]; + void *xiovecs_vaddr[XBB_MAX_SEGMENTS_PER_REQUEST]; +#endif /* USE_BOUNCE_BUFFERS */ + + struct mtx lock; + + device_t ndev; + +#ifdef XENHVM + struct resource *pseudo_phys_res; + int pseudo_phys_res_id; +#endif + + /* Stats */ + int st_rd_req; + int st_wr_req; + int st_oo_req; + int st_err_req; +}; -static struct task blk_req_task; +static int xbb_detach(device_t dev); +static int xbb_alloc_communication_mem(struct xbb_softc *xbb); +static void xbb_free_communication_mem(struct xbb_softc *xbb); +static int xbb_open_device(struct xbb_softc *xbb); +static void xbb_close_device(struct xbb_softc *xbb); +static void xbb_connect(struct xbb_softc *xbb); +static int xbb_connect_ring(struct xbb_softc *xbb); +static void xbb_disconnect(struct xbb_softc *xbb); +static task_fn_t xbb_run_queue; +static int xbb_dispatch_dev(struct xbb_softc *xbb, + blkif_request_t *ring_req, + struct xbb_xen_req *req, + struct blkif_request_segment *sg, + struct xbb_sg_state *sg_states, int nseg, + int operation, int flags); +static int xbb_dispatch_file(struct xbb_softc *xbb, + blkif_request_t *ring_req, + struct xbb_xen_req *req, + struct blkif_request_segment *sg, + struct xbb_sg_state *sg_states, int nseg, + int operation, int flags); +static void xbb_dispatch_io(struct xbb_softc *xbb, + blkif_request_t *ring_req, + struct xbb_xen_req *req, + RING_IDX req_ring_idx); +static void xbb_send_response(struct xbb_softc *xbb, + struct xbb_xen_req *req, int status); +static void xbb_bio_done(struct bio *bio); +static int xbb_shutdown(struct xbb_softc *xbb); +/*static void xbb_hotplug_fatal(struct xbb_softc *xbb); */ -/* Protos */ -static void disconnect_ring(blkif_t *blkif); -static int vbd_add_dev(struct xenbus_device *xdev); +static driver_intr_t xbb_intr; -static inline int vaddr_pagenr(pending_req_t *req, int seg) +static int +xbb_probe(device_t dev) { - return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; + + if (!strcmp(xenbus_get_type(dev), "vbd")) { + device_set_desc(dev, "Backend Virtual Block Device"); + device_quiet(dev); + return (0); + } + + return (ENXIO); } -static inline unsigned long vaddr(pending_req_t *req, int seg) +/* + */ +static int +xbb_attach(device_t dev) { - return pending_vaddrs[vaddr_pagenr(req, seg)]; -} + struct xbb_softc *xbb; + int error; -#define pending_handle(_req, _seg) \ - (pending_grant_handles[vaddr_pagenr(_req, _seg)]) + DPRINTF("Attaching to %s\n", xenbus_get_node(dev)); -static unsigned long -alloc_empty_page_range(unsigned long nr_pages) -{ - void *pages; - int i = 0, j = 0; - multicall_entry_t mcl[17]; - unsigned long mfn_list[16]; - struct xen_memory_reservation reservation = { - .extent_start = mfn_list, - .nr_extents = 0, - .address_bits = 0, - .extent_order = 0, - .domid = DOMID_SELF - }; + /* + * Basic initialization. + * After this block it is safe to call xbb_detach() + * to clean up any allocated data for this instance. + */ + xbb = device_get_softc(dev); + xbb->dev = dev; + xbb->otherend_id = xenbus_get_otherend_id(dev); + TASK_INIT(&xbb->io_task, /*priority*/0, xbb_run_queue, xbb); + mtx_init(&xbb->lock, device_get_nameunit(dev), NULL, MTX_DEF); + SLIST_INIT(&xbb->request_free_slist); + + /* + * Protocol defaults valid even if all negotiation fails. + */ + xbb->ring_config.ring_pages = 1; + xbb->max_requests = BLKIF_MAX_RING_REQUESTS(PAGE_SIZE); + xbb->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK; + xbb->max_request_size = xbb->max_request_segments * PAGE_SIZE; + + /* + * Publish protocol capabilities for consumption by the + * front-end. + */ + error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), + "feature-barrier", "1"); + if (error) { + xbb_attach_failed(xbb, error, "writing %s/feature-barrier", + xenbus_get_node(xbb->dev)); + return (error); + } + + error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), + "feature-flush-cache", "1"); + if (error) { + xbb_attach_failed(xbb, error, "writing %s/feature-flush-cache", + xenbus_get_node(xbb->dev)); + return (error); + } + + error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), + "max-ring-pages", "%zu", XBB_MAX_RING_PAGES); + if (error) { + xbb_attach_failed(xbb, error, "writing %s/max-ring-pages", + xenbus_get_node(xbb->dev)); + return (error); + } + + error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), + "max-requests", "%u", XBB_MAX_REQUESTS); + if (error) { + xbb_attach_failed(xbb, error, "writing %s/max-requests", + xenbus_get_node(xbb->dev)); + return (error); + } + + error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), + "max-request-segments", "%u", + XBB_MAX_SEGMENTS_PER_REQUEST); + if (error) { + xbb_attach_failed(xbb, error, "writing %s/max-request-segments", + xenbus_get_node(xbb->dev)); + return (error); + } + + error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), + "max-request-size", "%u", + XBB_MAX_REQUEST_SIZE); + if (error) { + xbb_attach_failed(xbb, error, "writing %s/max-request-size", + xenbus_get_node(xbb->dev)); + return (error); + } + + /* Collect physical device information. */ +#ifdef NOT_YET + error = xs_gather(XST_NIL, xenbus_get_otherend_path(xbb->dev), + "device-type", NULL, &xbb->dev_type, + NULL); + if (error != 0) + xbb->dev_type = NULL; +#endif - pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT); - if (pages == NULL) - return 0; + error = xs_gather(XST_NIL, xenbus_get_node(dev), + "mode", NULL, &xbb->dev_mode, + "params", NULL, &xbb->dev_name, + NULL); + if (error != 0) { + xbb_attach_failed(xbb, error, "reading backend fields at %s", + xenbus_get_node(dev)); + return (ENXIO); + } + + /* Parse fopen style mode flags. */ + if (strchr(xbb->dev_mode, 'w') == NULL) + xbb->flags |= XBBF_READ_ONLY; - memset(mcl, 0, sizeof(mcl)); + if (!dont_perform_io) { + /* + * Verify the physical device is present and can support + * the desired I/O mode. + */ + DROP_GIANT(); + error = xbb_open_device(xbb); + PICKUP_GIANT(); + if (error != 0) { + xbb_attach_failed(xbb, error, "Unable to open %s", + xbb->dev_name); + return (ENXIO); + } + } - while (i < nr_pages) { - unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE); + /* + * Create a taskqueue for doing work that must occur from a + * thread context. + */ + xbb->io_taskqueue = taskqueue_create(device_get_nameunit(dev), M_NOWAIT, + taskqueue_thread_enqueue, + /*context*/&xbb->io_taskqueue); + if (xbb->io_taskqueue == NULL) { + xbb_attach_failed(xbb, error, "Unable to create taskqueue"); + return (ENOMEM); + } + + taskqueue_start_threads(&xbb->io_taskqueue, + /*num threads*/1, + /*priority*/PWAIT, + /*thread name*/ + "%s taskq", device_get_nameunit(dev)); + + /* Update hot-plug status to satisfy xend. */ + error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), + "hotplug-status", "connected"); + if (error) { + xbb_attach_failed(xbb, error, "writing %s/hotplug-status", + xenbus_get_node(xbb->dev)); + return (error); + } - mcl[j].op = __HYPERVISOR_update_va_mapping; - mcl[j].args[0] = va; + /* Tell the front end that we are ready to connect. */ + xenbus_set_state(dev, XenbusStateInitWait); - mfn_list[j++] = vtomach(va) >> PAGE_SHIFT; + return (0); +} - xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY; +static int +xbb_detach(device_t dev) +{ + struct xbb_softc *xbb; - if (j == 16 || i == nr_pages) { - mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL; + DPRINTF("\n"); - reservation.nr_extents = j; + xbb = device_get_softc(dev); + mtx_lock(&xbb->lock); + while (xbb_shutdown(xbb) == EAGAIN) { + msleep(xbb, &xbb->lock, /*wakeup prio unchanged*/0, + "xbb_shutdown", 0); + } + mtx_unlock(&xbb->lock); + mtx_destroy(&xbb->lock); - mcl[j].op = __HYPERVISOR_memory_op; - mcl[j].args[0] = XENMEM_decrease_reservation; - mcl[j].args[1] = (unsigned long)&reservation; - - (void)HYPERVISOR_multicall(mcl, j+1); + DPRINTF("\n"); + + xbb_close_device(xbb); + xbb_free_communication_mem(xbb); + + if (xbb->dev_mode != NULL) { + free(xbb->dev_mode, M_XENBUS); + xbb->dev_mode = NULL; + } + + if (xbb->dev_type != NULL) { + free(xbb->dev_type, M_XENBUS); + xbb->dev_type = NULL; + } - mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0; - j = 0; + if (xbb->dev_name != NULL) { + free(xbb->dev_name, M_XENBUS); + xbb->dev_name = NULL; + } + + if (xbb->requests != NULL) { + struct xbb_xen_req *req; + struct xbb_xen_req *last_req; + + req = xbb->requests; + last_req = &xbb->requests[xbb->max_requests - 1]; + while (req <= last_req) { +#ifdef USE_BOUNCE_BUFFERS + if (req->bounce != NULL) { + free(req->bounce, M_XENBLOCKBACK); + req->bounce = NULL; + } +#endif + if (req->gnt_handles != NULL) { + free (req->gnt_handles, M_XENBLOCKBACK); + req->gnt_handles = NULL; + } + req++; } + free(xbb->requests, M_XENBLOCKBACK); + xbb->requests = NULL; } - return (unsigned long)pages; + return (0); +} + +static int +xbb_suspend(device_t dev) +{ +#ifdef NOT_YET + struct xbb_softc *sc = device_get_softc(dev); + + /* Prevent new requests being issued until we fix things up. */ + mtx_lock(&sc->xb_io_lock); + sc->connected = BLKIF_STATE_SUSPENDED; + mtx_unlock(&sc->xb_io_lock); +#endif + + return (0); +} + +static int +xbb_resume(device_t dev) +{ + return (0); } -static pending_req_t * -alloc_req(void) +static int +xbb_frontend_changed(device_t dev, XenbusState frontend_state) { - pending_req_t *req; - mtx_lock(&pending_free_lock); - if ((req = STAILQ_FIRST(&pending_free))) { - STAILQ_REMOVE(&pending_free, req, pending_req, free_list); - STAILQ_NEXT(req, free_list) = NULL; + struct xbb_softc *xbb = device_get_softc(dev); + + DPRINTF("state=%s\n", xenbus_strstate(frontend_state)); + + switch (frontend_state) { + case XenbusStateInitialising: + case XenbusStateClosing: + break; + case XenbusStateInitialised: + case XenbusStateConnected: + xbb_connect(xbb); + break; + case XenbusStateClosed: + case XenbusStateInitWait: + + mtx_lock(&xbb->lock); + xbb_shutdown(xbb); + mtx_unlock(&xbb->lock); + break; + default: + xenbus_dev_fatal(xbb->dev, EINVAL, "saw state %d at frontend", + frontend_state); + break; } - mtx_unlock(&pending_free_lock); - return req; + return (0); } +/* Driver registration */ +static device_method_t xbb_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, xbb_probe), + DEVMETHOD(device_attach, xbb_attach), + DEVMETHOD(device_detach, xbb_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, xbb_suspend), + DEVMETHOD(device_resume, xbb_resume), + + /* Xenbus interface */ + DEVMETHOD(xenbus_otherend_changed, xbb_frontend_changed), + + { 0, 0 } +}; + +static driver_t xbb_driver = { + "xbbd", + xbb_methods, + sizeof(struct xbb_softc), +}; +devclass_t xbb_devclass; + +DRIVER_MODULE(xbbd, xenbusb_back, xbb_driver, xbb_devclass, 0, 0); + static void -free_req(pending_req_t *req) +xbb_attach_failed(struct xbb_softc *xbb, int err, const char *fmt, ...) { - int was_empty; + va_list ap; + va_list ap_hotplug; - mtx_lock(&pending_free_lock); - was_empty = STAILQ_EMPTY(&pending_free); - STAILQ_INSERT_TAIL(&pending_free, req, free_list); - mtx_unlock(&pending_free_lock); - if (was_empty) - taskqueue_enqueue(taskqueue_swi, &blk_req_task); + va_start(ap, fmt); + va_copy(ap_hotplug, ap); + xs_vprintf(XST_NIL, xenbus_get_node(xbb->dev), + "hotplug-error", fmt, ap_hotplug); + va_end(ap_hotplug); + xs_printf(XST_NIL, xenbus_get_node(xbb->dev), + "hotplug-status", "error"); + + xenbus_dev_vfatal(xbb->dev, err, fmt, ap); + va_end(ap); + + xs_printf(XST_NIL, xenbus_get_node(xbb->dev), + "online", "0"); + xbb_detach(xbb->dev); +} + +static inline struct xbb_xen_req * +xbb_get_req(struct xbb_softc *xbb) +{ + struct xbb_xen_req *req; + + req = NULL; + mtx_lock(&xbb->lock); + + /* + * Do not allow new requests to be allocated while we + * are shutting down. + */ + if ((xbb->flags & XBBF_SHUTDOWN) == 0) { + if ((req = SLIST_FIRST(&xbb->request_free_slist)) != NULL) { + SLIST_REMOVE_HEAD(&xbb->request_free_slist, links); + xbb->active_request_count++; + } else { + xbb->flags |= XBBF_RESOURCE_SHORTAGE; + } + } + mtx_unlock(&xbb->lock); + return (req); } -static void -fast_flush_area(pending_req_t *req) +static inline void +xbb_release_req(struct xbb_softc *xbb, struct xbb_xen_req *req) { - struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - unsigned int i, invcount = 0; - grant_handle_t handle; - int ret; + int wake_thread; - for (i = 0; i < req->nr_pages; i++) { - handle = pending_handle(req, i); - if (handle == BLKBACK_INVALID_HANDLE) - continue; - unmap[invcount].host_addr = vaddr(req, i); - unmap[invcount].dev_bus_addr = 0; - unmap[invcount].handle = handle; - pending_handle(req, i) = BLKBACK_INVALID_HANDLE; - invcount++; + mtx_lock(&xbb->lock); + wake_thread = xbb->flags & XBBF_RESOURCE_SHORTAGE; + xbb->flags &= ~XBBF_RESOURCE_SHORTAGE; + SLIST_INSERT_HEAD(&xbb->request_free_slist, req, links); + xbb->active_request_count--; + + if ((xbb->flags & XBBF_SHUTDOWN) != 0) { + /* + * Shutdown is in progress. See if we can + * progress further now that one more request + * has completed and been returned to the + * free pool. + */ + xbb_shutdown(xbb); } + mtx_unlock(&xbb->lock); - ret = HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, unmap, invcount); - PANIC_IF(ret); + if (wake_thread != 0) + taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); } -static void -blkif_get(blkif_t *blkif) +static inline uint8_t * +xbb_req_vaddr(struct xbb_xen_req *req, int pagenr, int sector) { - atomic_add_int(&blkif->ref_cnt, 1); + return (req->kva + (PAGE_SIZE * pagenr) + (sector << 9)); } -static void -blkif_put(blkif_t *blkif) +#ifdef USE_BOUNCE_BUFFERS +static inline uint8_t * +xbb_req_bounce_addr(struct xbb_xen_req *req, int pagenr, int sector) { - if (atomic_fetchadd_int(&blkif->ref_cnt, -1) == 1) { - DPRINTF("Removing %x\n", (unsigned int)blkif); - disconnect_ring(blkif); - if (blkif->mode) - free(blkif->mode, M_DEVBUF); - if (blkif->type) - free(blkif->type, M_DEVBUF); - if (blkif->dev_name) - free(blkif->dev_name, M_DEVBUF); - free(blkif, M_DEVBUF); - } + return (req->bounce + (PAGE_SIZE * pagenr) + (sector << 9)); } -static int -blkif_create(struct xenbus_device *xdev, long handle, char *mode, char *type, char *params) +static inline uint8_t * +xbb_req_ioaddr(struct xbb_xen_req *req, int pagenr, int sector) { - blkif_t *blkif; + return (xbb_req_bounce_addr(req, pagenr, sector)); +} - blkif = (blkif_t *)malloc(sizeof(*blkif), M_DEVBUF, M_NOWAIT | M_ZERO); - if (!blkif) - return ENOMEM; - - DPRINTF("Created %x\n", (unsigned int)blkif); +#else - blkif->ref_cnt = 1; - blkif->domid = xdev->otherend_id; - blkif->handle = handle; - blkif->mode = mode; - blkif->type = type; - blkif->dev_name = params; - blkif->xdev = xdev; - xdev->data = blkif; +static inline uint8_t * +xbb_req_ioaddr(struct xbb_xen_req *req, int pagenr, int sector) +{ + return (xbb_req_vaddr(req, pagenr, sector)); +} - mtx_init(&blkif->blk_ring_lock, "blk_ring_ock", "blkback ring lock", MTX_DEF); +#endif - if (strcmp(mode, "w")) - blkif->read_only = 1; +static inline uintptr_t +xbb_req_gntaddr(struct xbb_xen_req *req, int pagenr, int sector) +{ + return ((uintptr_t)(req->gnt_base + + (PAGE_SIZE * pagenr) + (sector << 9))); +} - return 0; +static int +xbb_alloc_communication_mem(struct xbb_softc *xbb) +{ + xbb->kva_size = (xbb->ring_config.ring_pages + + (xbb->max_requests * xbb->max_request_segments)) + * PAGE_SIZE; +#ifndef XENHVM + xbb->kva = kmem_alloc_nofault(kernel_map, xbb->kva_size); + if (xbb->kva == 0) + return (ENOMEM); + xbb->gnt_base_addr = xbb->kva; +#else /* XENHVM */ + /* + * Reserve a range of pseudo physical memory that we can map + * into kva. These pages will only be backed by machine + * pages ("real memory") during the lifetime of front-end requests + * via grant table operations. + */ + xbb->pseudo_phys_res_id = 0; + xbb->pseudo_phys_res = bus_alloc_resource(xbb->dev, SYS_RES_MEMORY, + &xbb->pseudo_phys_res_id, + 0, ~0, xbb->kva_size, + RF_ACTIVE); + if (xbb->pseudo_phys_res == NULL) { + xbb->kva = 0; + return (ENOMEM); + } + xbb->kva = (vm_offset_t)rman_get_virtual(xbb->pseudo_phys_res); + xbb->gnt_base_addr = rman_get_start(xbb->pseudo_phys_res); +#endif /* XENHVM */ + return (0); } static void -add_to_req_schedule_list_tail(blkif_t *blkif) +xbb_free_communication_mem(struct xbb_softc *xbb) { - if (!blkif->on_req_sched_list) { - mtx_lock(&req_sched_list_lock); - if (!blkif->on_req_sched_list && (blkif->state == XenbusStateConnected)) { - blkif_get(blkif); - STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req); - blkif->on_req_sched_list = 1; - taskqueue_enqueue(taskqueue_swi, &blk_req_task); + if (xbb->kva != 0) { +#ifndef XENHVM + kmem_free(kernel_map, xbb->kva, xbb->kva_size); +#else + if (xbb->pseudo_phys_res != NULL) { + bus_release_resource(xbb->dev, SYS_RES_MEMORY, + xbb->pseudo_phys_res_id, + xbb->pseudo_phys_res); + xbb->pseudo_phys_res = NULL; } - mtx_unlock(&req_sched_list_lock); +#endif } + xbb->kva = 0; + xbb->gnt_base_addr = 0; } -/* This routine does not call blkif_get(), does not schedule the blk_req_task to run, - and assumes that the state is connected */ -static void -add_to_req_schedule_list_tail2(blkif_t *blkif) +static int +xbb_open_device(struct xbb_softc *xbb) { - mtx_lock(&req_sched_list_lock); - if (!blkif->on_req_sched_list) { - STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req); - blkif->on_req_sched_list = 1; + struct nameidata nd; + struct vattr vattr; + struct cdev *dev; + struct cdevsw *devsw; + int flags; + int error; + int vfs_is_locked; + + flags = FREAD; + error = 0; + + DPRINTF("opening dev=%s\n", xbb->dev_name); + + if ((xbb->flags & XBBF_READ_ONLY) == 0) + flags |= FWRITE; + + if (!curthread->td_proc->p_fd->fd_cdir) { + curthread->td_proc->p_fd->fd_cdir = rootvnode; + VREF(rootvnode); + } + if (!curthread->td_proc->p_fd->fd_rdir) { + curthread->td_proc->p_fd->fd_rdir = rootvnode; + VREF(rootvnode); + } + if (!curthread->td_proc->p_fd->fd_jdir) { + curthread->td_proc->p_fd->fd_jdir = rootvnode; + VREF(rootvnode); } - mtx_unlock(&req_sched_list_lock); -} -/* Removes blkif from front of list and does not call blkif_put() (caller must) */ -static blkif_t * -remove_from_req_schedule_list(void) -{ - blkif_t *blkif; + again: + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, xbb->dev_name, curthread); + error = vn_open(&nd, &flags, 0, NULL); + if (error) { + /* + * This is the only reasonable guess we can make as far as + * path if the user doesn't give us a fully qualified path. + * If they want to specify a file, they need to specify the + * full path. + */ + if (xbb->dev_name[0] != '/') { + char *dev_path = "/dev/"; + char *dev_name; - mtx_lock(&req_sched_list_lock); + /* Try adding device path at beginning of name */ + dev_name = malloc(strlen(xbb->dev_name) + + strlen(dev_path) + 1, + M_XENBLOCKBACK, M_NOWAIT); + if (dev_name) { + sprintf(dev_name, "%s%s", dev_path, + xbb->dev_name); + free(xbb->dev_name, M_XENBLOCKBACK); + xbb->dev_name = dev_name; + goto again; + } + } + xenbus_dev_fatal(xbb->dev, error, "error opening device %s", + xbb->dev_name); + return (error); + } - if ((blkif = STAILQ_FIRST(&req_sched_list))) { - STAILQ_REMOVE(&req_sched_list, blkif, blkback_info, next_req); - STAILQ_NEXT(blkif, next_req) = NULL; - blkif->on_req_sched_list = 0; + vfs_is_locked = NDHASGIANT(&nd); + + NDFREE(&nd, NDF_ONLY_PNBUF); + + xbb->vn = nd.ni_vp; + + /* We only support disks and files. */ + if (vn_isdisk(xbb->vn, &error)) { + xbb->device_type = XBB_TYPE_DISK; + } else if (xbb->vn->v_type == VREG) { + xbb->device_type = XBB_TYPE_FILE; + } else { + xenbus_dev_fatal(xbb->dev, error, "%s is not a disk " + "or file", xbb->dev_name); + VOP_UNLOCK(xbb->vn, 0); + goto error; } - mtx_unlock(&req_sched_list_lock); + switch (xbb->device_type) { + case XBB_TYPE_DISK: + xbb->dispatch_io = xbb_dispatch_dev; + xbb->cdev = xbb->vn->v_rdev; + xbb->csw = dev_refthread(xbb->cdev, &xbb->dev_ref); + if (xbb->csw == NULL) + panic("Unable to retrieve device switch"); + + error = VOP_GETATTR(xbb->vn, &vattr, NOCRED); + if (error) { + xenbus_dev_fatal(xbb->dev, error, "error getting " + "vnode attributes for device %s", + xbb->dev_name); + VOP_UNLOCK(xbb->vn, 0); + goto error; + } - return blkif; -} + VOP_UNLOCK(xbb->vn, 0); -static void -make_response(blkif_t *blkif, uint64_t id, - unsigned short op, int st) -{ - blkif_response_t *resp; - blkif_back_ring_t *blk_ring = &blkif->ring; - int more_to_do = 0; - int notify; - - mtx_lock(&blkif->blk_ring_lock); - - - /* Place on the response ring for the relevant domain. */ - resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt); - resp->id = id; - resp->operation = op; - resp->status = st; - blk_ring->rsp_prod_pvt++; - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify); + dev = xbb->vn->v_rdev; + devsw = dev->si_devsw; + if (!devsw->d_ioctl) { + error = ENODEV; + xenbus_dev_fatal(xbb->dev, error, "no d_ioctl for " + "device %s!", xbb->dev_name); + goto error; + } + + error = devsw->d_ioctl(dev, DIOCGSECTORSIZE, + (caddr_t)&xbb->sector_size, FREAD, + curthread); + if (error) { + xenbus_dev_fatal(xbb->dev, error, + "error calling ioctl DIOCGSECTORSIZE " + "for device %s", xbb->dev_name); + goto error; + } + + error = devsw->d_ioctl(dev, DIOCGMEDIASIZE, + (caddr_t)&xbb->media_size, FREAD, + curthread); + if (error) { + xenbus_dev_fatal(xbb->dev, error, + "error calling ioctl DIOCGMEDIASIZE " + "for device %s", xbb->dev_name); + goto error; + } + + xbb->major = major(vattr.va_rdev); + xbb->minor = minor(vattr.va_rdev); + + break; + case XBB_TYPE_FILE: { + struct vattr vattr; + + xbb->dispatch_io = xbb_dispatch_file; + error = VOP_GETATTR(xbb->vn, &vattr, curthread->td_ucred); + if (error != 0) { + xenbus_dev_fatal(xbb->dev, error, + "error calling VOP_GETATTR()" + "for file %s", xbb->dev_name); + goto error; + } + if (VOP_ISLOCKED(xbb->vn) != LK_EXCLUSIVE) { + vn_lock(xbb->vn, LK_UPGRADE | LK_RETRY); + if (xbb->vn->v_iflag & VI_DOOMED) { + xenbus_dev_fatal(xbb->dev, error, + "error locking file %s", + xbb->dev_name); + error = EBADF; + + VOP_UNLOCK(xbb->vn, 0); + VFS_UNLOCK_GIANT(vfs_is_locked); + goto error; + } + } + VOP_UNLOCK(xbb->vn, 0); + + xbb->cred = crhold(curthread->td_ucred); + + VFS_UNLOCK_GIANT(vfs_is_locked); + + xbb->media_size = vattr.va_size; - if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) { /* - * Tail check for pending requests. Allows frontend to avoid - * notifications if requests are already in flight (lower - * overheads and promotes batching). + * XXX KDM we're getting a sector size of 131072 back here, + * which somehow translates into 0 on the blkfront side. + * We should debug this, and then put some sanity checks in + * to make sure the sector size is sane. + * + * For now, just use 512 bytes as the sector size. Since + * we're dealing with a file, that works fine. */ - RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do); +#if 0 + xbb->sector_size = vattr.va_blocksize; +#endif + xbb->sector_size = 512; - } else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring)) - more_to_do = 1; + /* + * Sanity check. The media size has to be at least one + * sector long. + */ + if (xbb->media_size < xbb->sector_size) { + xenbus_dev_fatal(xbb->dev, error, + "file %s size %ju < block size %u", + xbb->dev_name, + (uintmax_t)xbb->media_size, + xbb->sector_size); + error = EINVAL; + goto error; + } + break; + } + default: + panic("invalid file type %d", xbb->device_type); + /* NOTREACHED */ + } - mtx_unlock(&blkif->blk_ring_lock); + xbb->sector_size_shift = fls(xbb->sector_size) - 1; + xbb->media_num_sectors = + xbb->media_size >> xbb->sector_size_shift; - if (more_to_do) - add_to_req_schedule_list_tail(blkif); + DPRINTF("opened %s=%s major=%d minor=%d sector_size=%u " + "media_size=%" PRId64 "\n", (xbb->device_type == + XBB_TYPE_DISK) ? "dev" : "file", xbb->dev_name, xbb->major, + xbb->minor, xbb->sector_size, xbb->media_size); - if (notify) - notify_remote_via_irq(blkif->irq); + return (0); + + error: + xbb_close_device(xbb); + return (error); } static void -end_block_io_op(struct bio *bio) +xbb_close_device(struct xbb_softc *xbb) { - pending_req_t *pending_req = bio->bio_caller2; + DROP_GIANT(); + DPRINTF("closing dev=%s\n", xbb->dev_name); + if (xbb->vn) { + int flags = FREAD; + int vfs_is_locked = 0; - if (bio->bio_error) { - DPRINTF("BIO returned error %d for operation on device %s\n", - bio->bio_error, pending_req->blkif->dev_name); - pending_req->status = BLKIF_RSP_ERROR; - pending_req->blkif->st_err_req++; - } + if ((xbb->flags & XBBF_READ_ONLY) == 0) + flags |= FWRITE; -#if 0 - printf("done: bio=%x error=%x completed=%llu resid=%lu flags=%x\n", - (unsigned int)bio, bio->bio_error, bio->bio_completed, bio->bio_resid, bio->bio_flags); -#endif + if (xbb->csw) { + dev_relthread(xbb->cdev, xbb->dev_ref); + xbb->csw = NULL; + xbb->cdev = NULL; + } - if (atomic_fetchadd_int(&pending_req->pendcnt, -1) == 1) { - fast_flush_area(pending_req); - make_response(pending_req->blkif, pending_req->id, - pending_req->operation, pending_req->status); - blkif_put(pending_req->blkif); - free_req(pending_req); - } + if (xbb->device_type == XBB_TYPE_FILE) + vfs_is_locked = VFS_LOCK_GIANT(xbb->vn->v_mount); - g_destroy_bio(bio); + (void)vn_close(xbb->vn, flags, NOCRED, curthread); + + if (xbb->device_type == XBB_TYPE_FILE) + VFS_UNLOCK_GIANT(vfs_is_locked); + + xbb->vn = NULL; + } + PICKUP_GIANT(); } +/** + * \brief Connect to our blkfront peer now that it has completed + * publishing its configuration into XenBus. + */ static void -dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req, pending_req_t *pending_req) +xbb_connect(struct xbb_softc *xbb) { - struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - struct { - unsigned long buf; unsigned int nsec; - } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - unsigned int nseg = req->nr_segments, nr_sects = 0; - struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - int operation, ret, i, nbio = 0; + struct xs_transaction xst; + char protocol_abi[64]; + struct xbb_xen_req *req; + struct xbb_xen_req *last_req; + uint8_t *req_kva; + u_long gnt_base; + u_int ring_idx; + int error; - /* Check that number of segments is sane. */ - if (unlikely(nseg == 0) || - unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { - DPRINTF("Bad number of segments in request (%d)\n", nseg); - goto fail_response; + if (xenbus_get_state(xbb->dev) == XenbusStateConnected) + return; + + /* + * Collect final frontend information. + * Mandatory (used in all versions of the protocol) first. + */ + error = xs_gather(XST_NIL, xenbus_get_otherend_path(xbb->dev), + "ring-ref", "%" PRIu32, + &xbb->ring_config.ring_ref[0], + "event-channel", "%" PRIu32, + &xbb->ring_config.evtchn, + NULL); + if (error != 0) { + xenbus_dev_fatal(xbb->dev, error, + "Unable to retrieve ring information from " + "frontend %s. Unable to connect.", + xenbus_get_otherend_path(xbb->dev)); + return; } - if (req->operation == BLKIF_OP_WRITE) { - if (blkif->read_only) { - DPRINTF("Attempt to write to read only device %s\n", blkif->dev_name); - goto fail_response; + /* + * These fields are initialized to legacy protocol defaults + * so we only need to fail if reading the updated value succeeds + * and the new value is outside of its allowed range. + * + * \note xs_gather() returns on the first encountered error, so + * we must use independant calls in order to guarantee + * we don't miss information in a sparsly populated front-end + * tree. + */ + (void)xs_scanf(XST_NIL, xenbus_get_otherend_path(xbb->dev), + "ring-pages", NULL, "%" PRIu32, + &xbb->ring_config.ring_pages); + + (void)xs_scanf(XST_NIL, xenbus_get_otherend_path(xbb->dev), + "max-requests", NULL, "%" PRIu32, + &xbb->max_requests); + + (void)xs_scanf(XST_NIL, xenbus_get_otherend_path(xbb->dev), + "max-request-segments", NULL, "%" PRIu32, + &xbb->max_request_segments); + + (void)xs_scanf(XST_NIL, xenbus_get_otherend_path(xbb->dev), + "max-request-size", NULL, "%" PRIu32, + &xbb->max_request_size); + + if (xbb->ring_config.ring_pages > XBB_MAX_RING_PAGES) { + xenbus_dev_fatal(xbb->dev, EINVAL, + "Front-end specificed ring-pages of %u " + "exceeds backend limit of %zu. " + "Unable to connect.", + xbb->ring_config.ring_pages, + XBB_MAX_RING_PAGES); + return; + } else if (xbb->max_requests > XBB_MAX_REQUESTS) { + xenbus_dev_fatal(xbb->dev, EINVAL, + "Front-end specificed max_requests of %u " + "exceeds backend limit of %u. " + "Unable to connect.", + xbb->max_requests, + XBB_MAX_REQUESTS); + return; + } else if (xbb->max_request_segments > XBB_MAX_SEGMENTS_PER_REQUEST) { + xenbus_dev_fatal(xbb->dev, EINVAL, + "Front-end specificed max_requests_segments " + "of %u exceeds backend limit of %u. " + "Unable to connect.", + xbb->max_request_segments, + XBB_MAX_SEGMENTS_PER_REQUEST); + return; + } else if (xbb->max_request_size > XBB_MAX_REQUEST_SIZE) { + xenbus_dev_fatal(xbb->dev, EINVAL, + "Front-end specificed max_request_size " + "of %u exceeds backend limit of %u. " + "Unable to connect.", + xbb->max_request_size, + XBB_MAX_REQUEST_SIZE); + return; + } + + /* If using a multi-page ring, pull in the remaining references. */ + for (ring_idx = 1; ring_idx < xbb->ring_config.ring_pages; ring_idx++) { + char ring_ref_name[]= "ring_refXX"; + + snprintf(ring_ref_name, sizeof(ring_ref_name), + "ring-ref%u", ring_idx); + error = xs_scanf(XST_NIL, xenbus_get_otherend_path(xbb->dev), + ring_ref_name, NULL, "%" PRIu32, + &xbb->ring_config.ring_ref[ring_idx]); + if (error != 0) { + xenbus_dev_fatal(xbb->dev, error, + "Failed to retriev grant reference " + "for page %u of shared ring. Unable " + "to connect.", ring_idx); + return; } - operation = BIO_WRITE; - } else - operation = BIO_READ; + } - pending_req->blkif = blkif; - pending_req->id = req->id; - pending_req->operation = req->operation; - pending_req->status = BLKIF_RSP_OKAY; - pending_req->nr_pages = nseg; - - for (i = 0; i < nseg; i++) { - seg[i].nsec = req->seg[i].last_sect - - req->seg[i].first_sect + 1; - - if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) || - (seg[i].nsec <= 0)) - goto fail_response; - nr_sects += seg[i].nsec; - - map[i].host_addr = vaddr(pending_req, i); - map[i].dom = blkif->domid; - map[i].ref = req->seg[i].gref; - map[i].flags = GNTMAP_host_map; - if (operation == BIO_WRITE) - map[i].flags |= GNTMAP_readonly; + error = xs_gather(XST_NIL, xenbus_get_otherend_path(xbb->dev), + "protocol", "%63s", protocol_abi, + NULL); + if (error != 0 + || !strcmp(protocol_abi, XEN_IO_PROTO_ABI_NATIVE)) { + /* + * Assume native if the frontend has not + * published ABI data or it has published and + * matches our own ABI. + */ + xbb->abi = BLKIF_PROTOCOL_NATIVE; + } else if (!strcmp(protocol_abi, XEN_IO_PROTO_ABI_X86_32)) { + + xbb->abi = BLKIF_PROTOCOL_X86_32; + } else if (!strcmp(protocol_abi, XEN_IO_PROTO_ABI_X86_64)) { + + xbb->abi = BLKIF_PROTOCOL_X86_64; + } else { + + xenbus_dev_fatal(xbb->dev, error, + "Unknown protocol ABI (%s) published by " + "frontend. Unable to connect.", protocol_abi); + return; } - /* Convert to the disk's sector size */ - nr_sects = (nr_sects << 9) >> blkif->sector_size_shift; + /* Allocate resources whose size depends on front-end configuration. */ + + error = xbb_alloc_communication_mem(xbb); + if (error != 0) { + xenbus_dev_fatal(xbb->dev, error, + "Unable to allocate communication memory"); + return; + } - ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); - PANIC_IF(ret); + /* + * Allocate request book keeping datastructures. + */ + xbb->requests = malloc(xbb->max_requests * sizeof(*xbb->requests), + M_XENBLOCKBACK, M_NOWAIT|M_ZERO); + if (xbb->requests == NULL) { + xenbus_dev_fatal(xbb->dev, error, + "Unable to allocate request structures"); + return; + } - for (i = 0; i < nseg; i++) { - if (unlikely(map[i].status != 0)) { - DPRINTF("invalid buffer -- could not remap it\n"); - goto fail_flush; + req_kva = (uint8_t *)xbb->kva; + gnt_base = xbb->gnt_base_addr; + req = xbb->requests; + last_req = &xbb->requests[xbb->max_requests - 1]; + while (req <= last_req) { + int seg; + + req->xbb = xbb; + req->kva = req_kva; + req->gnt_handles = malloc(xbb->max_request_segments + * sizeof(*req->gnt_handles), + M_XENBLOCKBACK, M_NOWAIT|M_ZERO); + if (req->gnt_handles == NULL) { + xenbus_dev_fatal(xbb->dev, ENOMEM, + "Unable to allocate request " + "grant references"); + return; } +#ifdef USE_BOUNCE_BUFFERS + req->bounce = malloc(xbb->max_request_size, + M_XENBLOCKBACK, M_NOWAIT); + if (req->bounce == NULL) { + xenbus_dev_fatal(xbb->dev, ENOMEM, + "Unable to allocate request " + "bounce buffers"); + return; + } +#endif /* USE_BOUNCE_BUFFERS */ + req->gnt_base = gnt_base; + req_kva += xbb->max_request_segments * PAGE_SIZE; + gnt_base += xbb->max_request_segments * PAGE_SIZE; + SLIST_INSERT_HEAD(&xbb->request_free_slist, req, links); + + for (seg = 0; seg < xbb->max_request_segments; seg++) + req->gnt_handles[seg] = BLKBACK_INVALID_HANDLE; + + req++; + } + + /* + * Connect communication channel. + */ + error = xbb_connect_ring(xbb); + if (error != 0) { + /* Specific errors are reported by xbb_connect_ring(). */ + return; + } + + /* + * Supply information about the physical device to + * the frontend via XenBus. + */ +again: + error = xs_transaction_start(&xst); + if (error != 0) { + xenbus_dev_fatal(xbb->dev, error, + "Error writing configuration for backend " + "(start transaction)"); + xbb_disconnect(xbb); + return; + } - pending_handle(pending_req, i) = map[i].handle; -#if 0 - /* Can't do this in FreeBSD since vtophys() returns the pfn */ - /* of the remote domain who loaned us the machine page - DPT */ - xen_phys_machine[(vtophys(vaddr(pending_req, i)) >> PAGE_SHIFT)] = - map[i]dev_bus_addr >> PAGE_SHIFT; -#endif - seg[i].buf = map[i].dev_bus_addr | - (req->seg[i].first_sect << 9); + error = xs_printf(xst, xenbus_get_node(xbb->dev), + "sectors", "%"PRIu64, xbb->media_num_sectors); + if (error) { + xenbus_dev_fatal(xbb->dev, error, "writing %s/sectors", + xenbus_get_node(xbb->dev)); + goto abort; } - if (req->sector_number + nr_sects > blkif->media_num_sectors) { - DPRINTF("%s of [%llu,%llu] extends past end of device %s\n", - operation == BIO_READ ? "read" : "write", - req->sector_number, - req->sector_number + nr_sects, blkif->dev_name); - goto fail_flush; + /* XXX Support all VBD attributes here. */ + error = xs_printf(xst, xenbus_get_node(xbb->dev), "info", "%u", + xbb->flags & XBBF_READ_ONLY ? VDISK_READONLY : 0); + if (error) { + xenbus_dev_fatal(xbb->dev, error, "writing %s/info", + xenbus_get_node(xbb->dev)); + goto abort; + } + error = xs_printf(xst, xenbus_get_node(xbb->dev), + "sector-size", "%u", + xbb->sector_size); + if (error) { + xenbus_dev_fatal(xbb->dev, error, "writing %s/sector-size", + xenbus_get_node(xbb->dev)); + goto abort; } - for (i = 0; i < nseg; i++) { - struct bio *bio; + error = xs_transaction_end(xst, 0); + if (error == EAGAIN) + goto again; + if (error) + xenbus_dev_fatal(xbb->dev, error, "ending transaction"); + + /* Ready for I/O. */ + xenbus_set_state(xbb->dev, XenbusStateConnected); + return; + +abort: + xs_transaction_end(xst, 1); + xbb_disconnect(xbb); +} - if ((int)seg[i].nsec & ((blkif->sector_size >> 9) - 1)) { - DPRINTF("Misaligned I/O request from domain %d", blkif->domid); - goto fail_put_bio; +static int +xbb_connect_ring(struct xbb_softc *xbb) +{ + struct gnttab_map_grant_ref gnts[XBB_MAX_RING_PAGES]; + struct gnttab_map_grant_ref *gnt; + u_int ring_idx; + int error; + + if ((xbb->flags & XBBF_RING_CONNECTED) != 0) + return (0); + + /* + * Kva for our ring is at the tail of the region of kva allocated + * by xbb_alloc_communication_mem(). + */ + xbb->ring_config.va = xbb->kva + + (xbb->kva_size + - (xbb->ring_config.ring_pages * PAGE_SIZE)); + xbb->ring_config.gnt_addr = xbb->gnt_base_addr + + (xbb->kva_size + - (xbb->ring_config.ring_pages * PAGE_SIZE)); + + for (ring_idx = 0, gnt = gnts; + ring_idx < xbb->ring_config.ring_pages; + ring_idx++, gnt++) { + + gnt->host_addr = xbb->ring_config.gnt_addr + + (ring_idx * PAGE_SIZE); + gnt->flags = GNTMAP_host_map; + gnt->ref = xbb->ring_config.ring_ref[ring_idx]; + gnt->dom = xbb->otherend_id; + } + + error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, gnts, + xbb->ring_config.ring_pages); + if (error) + panic("blkback: Ring page grant table op failed (%d)", error); + + for (ring_idx = 0, gnt = gnts; + ring_idx < xbb->ring_config.ring_pages; + ring_idx++, gnt++) { + if (gnt->status != 0) { + xbb->ring_config.va = 0; + xenbus_dev_fatal(xbb->dev, EACCES, + "Ring shared page mapping failed. " + "Status %d.", gnt->status); + return (EACCES); } + xbb->ring_config.handle[ring_idx] = gnt->handle; + xbb->ring_config.bus_addr[ring_idx] = gnt->dev_bus_addr; + } - bio = biolist[nbio++] = g_new_bio(); - if (unlikely(bio == NULL)) - goto fail_put_bio; + /* Initialize the ring based on ABI. */ + switch (xbb->abi) { + case BLKIF_PROTOCOL_NATIVE: + { + blkif_sring_t *sring; + sring = (blkif_sring_t *)xbb->ring_config.va; + BACK_RING_INIT(&xbb->rings.native, sring, + xbb->ring_config.ring_pages * PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_32: + { + blkif_x86_32_sring_t *sring_x86_32; + sring_x86_32 = (blkif_x86_32_sring_t *)xbb->ring_config.va; + BACK_RING_INIT(&xbb->rings.x86_32, sring_x86_32, + xbb->ring_config.ring_pages * PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_64: + { + blkif_x86_64_sring_t *sring_x86_64; + sring_x86_64 = (blkif_x86_64_sring_t *)xbb->ring_config.va; + BACK_RING_INIT(&xbb->rings.x86_64, sring_x86_64, + xbb->ring_config.ring_pages * PAGE_SIZE); + break; + } + default: + panic("Unexpected blkif protocol ABI."); + } - bio->bio_cmd = operation; - bio->bio_offset = req->sector_number << blkif->sector_size_shift; - bio->bio_length = seg[i].nsec << 9; - bio->bio_bcount = bio->bio_length; - bio->bio_data = (caddr_t)(vaddr(pending_req, i) | (seg[i].buf & PAGE_MASK)); - bio->bio_done = end_block_io_op; - bio->bio_caller2 = pending_req; - bio->bio_dev = blkif->cdev; + xbb->flags |= XBBF_RING_CONNECTED; - req->sector_number += (seg[i].nsec << 9) >> blkif->sector_size_shift; -#if 0 - printf("new: bio=%x cmd=%d sect=%llu nsect=%u iosize_max=%u @ %08lx\n", - (unsigned int)bio, req->operation, req->sector_number, seg[i].nsec, - blkif->cdev->si_iosize_max, seg[i].buf); -#endif + error = + bind_interdomain_evtchn_to_irqhandler(xbb->otherend_id, + xbb->ring_config.evtchn, + device_get_nameunit(xbb->dev), + xbb_intr, /*arg*/xbb, + INTR_TYPE_BIO | INTR_MPSAFE, + &xbb->irq); + if (error) { + xbb_disconnect(xbb); + xenbus_dev_fatal(xbb->dev, error, "binding event channel"); + return (error); } - pending_req->pendcnt = nbio; - blkif_get(blkif); + DPRINTF("rings connected!\n"); - for (i = 0; i < nbio; i++) - (*blkif->csw->d_strategy)(biolist[i]); + return 0; +} - return; +static void +xbb_disconnect(struct xbb_softc *xbb) +{ + struct gnttab_unmap_grant_ref ops[XBB_MAX_RING_PAGES]; + struct gnttab_unmap_grant_ref *op; + u_int ring_idx; + int error; + + DPRINTF("\n"); + + if ((xbb->flags & XBBF_RING_CONNECTED) == 0) + return; + + if (xbb->irq != 0) { + unbind_from_irqhandler(xbb->irq); + xbb->irq = 0; + } + + for (ring_idx = 0, op = ops; + ring_idx < xbb->ring_config.ring_pages; + ring_idx++, op++) { + + op->host_addr = xbb->ring_config.gnt_addr + + (ring_idx * PAGE_SIZE); + op->dev_bus_addr = xbb->ring_config.bus_addr[ring_idx]; + op->handle = xbb->ring_config.handle[ring_idx]; + } + + error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, ops, + xbb->ring_config.ring_pages); + if (error != 0) + panic("Grant table op failed (%d)", error); - fail_put_bio: - for (i = 0; i < (nbio-1); i++) - g_destroy_bio(biolist[i]); - fail_flush: - fast_flush_area(pending_req); - fail_response: - make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); - free_req(pending_req); + xbb->flags &= ~XBBF_RING_CONNECTED; } static void -blk_req_action(void *context, int pending) +xbb_unmap_req(struct xbb_xen_req *req) +{ + struct gnttab_unmap_grant_ref unmap[XBB_MAX_SEGMENTS_PER_REQUEST]; + u_int i; + u_int invcount; + int error; + + invcount = 0; + for (i = 0; i < req->nr_pages; i++) { + + if (req->gnt_handles[i] == BLKBACK_INVALID_HANDLE) + continue; + + unmap[invcount].host_addr = xbb_req_gntaddr(req, i, 0); + unmap[invcount].dev_bus_addr = 0; + unmap[invcount].handle = req->gnt_handles[i]; + req->gnt_handles[i] = BLKBACK_INVALID_HANDLE; + invcount++; + } + + error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + unmap, invcount); + KASSERT(error == 0, ("Grant table operation failed")); +} + +void +xbb_run_queue(void *context, int pending) +{ + struct xbb_softc *xbb; + blkif_back_rings_t *rings; + RING_IDX rp; + + + xbb = (struct xbb_softc *)context; + rings = &xbb->rings; + + /* + * Cache req_prod to avoid accessing a cache line shared + * with the frontend. + */ + rp = rings->common.sring->req_prod; + + /* Ensure we see queued requests up to 'rp'. */ + rmb(); + + /** + * Run so long as there is work to consume and the generation + * of a response will not overflow the ring. + * + * @note There's a 1 to 1 relationship between requests and responses, + * so an overflow should never occur. This test is to protect + * our domain from digesting bogus data. Shouldn't we log this? + */ + while (rings->common.req_cons != rp + && RING_REQUEST_CONS_OVERFLOW(&rings->common, + rings->common.req_cons) == 0) { + blkif_request_t ring_req_storage; + blkif_request_t *ring_req; + struct xbb_xen_req *req; + RING_IDX req_ring_idx; + + req = xbb_get_req(xbb); + if (req == NULL) { + /* + * Resource shortage has been recorded. + * We'll be scheduled to run once a request + * object frees up due to a completion. + */ + break; + } + + switch (xbb->abi) { + case BLKIF_PROTOCOL_NATIVE: + ring_req = RING_GET_REQUEST(&xbb->rings.native, + rings->common.req_cons); + break; + case BLKIF_PROTOCOL_X86_32: + { + struct blkif_x86_32_request *ring_req32; + + ring_req32 = RING_GET_REQUEST(&xbb->rings.x86_32, + rings->common.req_cons); + blkif_get_x86_32_req(&ring_req_storage, ring_req32); + ring_req = &ring_req_storage; + break; + } + case BLKIF_PROTOCOL_X86_64: + { + struct blkif_x86_64_request *ring_req64; + + ring_req64 = RING_GET_REQUEST(&xbb->rings.x86_64, + rings->common.req_cons); + blkif_get_x86_64_req(&ring_req_storage, ring_req64); + ring_req = &ring_req_storage; + break; + } + default: + panic("Unexpected blkif protocol ABI."); + /* NOTREACHED */ + } + + /* + * Signify that we can overwrite this request with a + * response by incrementing our consumer index. The + * response won't be generated until after we've already + * consumed all necessary data out of the version of the + * request in the ring buffer (for native mode). We + * must update the consumer index before issueing back-end + * I/O so there is no possibility that it will complete + * and a response be generated before we make room in + * the queue for that response. + */ + req_ring_idx = xbb->rings.common.req_cons; + xbb->rings.common.req_cons += + BLKIF_SEGS_TO_BLOCKS(ring_req->nr_segments); + + xbb_dispatch_io(xbb, ring_req, req, req_ring_idx); + } +} + +void xbb_generated_data_strategy(struct bio *bio); +void +xbb_generated_data_strategy(struct bio *bio) { - blkif_t *blkif; + uint64_t data_pattern; + uint64_t *data; + uint64_t *end_data; + + data_pattern = (bio->bio_offset >> 9) << 32; + data = (uint64_t *)bio->bio_data; + end_data = (uint64_t *)(bio->bio_data + bio->bio_bcount); + + switch (bio->bio_cmd) + { + case BIO_READ: + while (data < end_data) + { + *data++ = data_pattern++; + + if ((data_pattern & 63) == 0) { + data_pattern -= 64; + data_pattern += (uint64_t)1 << 32; + } + } + break; + case BIO_WRITE: + while (data < end_data) + { + if (*data++ != data_pattern++) { + memset(dump_buf, '0', sizeof(dump_buf)); + memcpy(dump_buf, bio->bio_data, + MIN(sizeof(dump_buf), bio->bio_bcount)); + dump_ptr = dump_buf + + ((uint8_t *)data + - (uint8_t *)bio->bio_data - 1); + kdb_enter(KDB_WHY_BREAK, "Write miscompare\n"); + } + + if ((data_pattern & 63) == 0) { + data_pattern -= 64; + data_pattern += (uint64_t)1 << 32; + } + } + break; + case BIO_FLUSH: + break; + default: + break; + } + bio->bio_error = 0; + bio->bio_done(bio); +} + +/* + * Returns 0 for success, non-zero for failure. + */ +static int +xbb_dispatch_dev(struct xbb_softc *xbb, blkif_request_t *ring_req, + struct xbb_xen_req *req, struct blkif_request_segment *sg, + struct xbb_sg_state *sg_states, int nseg, int operation, + int bio_flags) +{ + struct bio *bios[XBB_MAX_SEGMENTS_PER_REQUEST]; + struct bio *bio; + struct xbb_sg_state *sg_state; + u_int nbio; + u_int bio_idx; + u_int seg_idx; + int error; + + error = 0; + nbio = 0; + bio_idx = 0; + + if (operation == BIO_FLUSH) { + bio = g_new_bio(); + if (unlikely(bio == NULL)) { + DPRINTF("Unable to allocate bio for BIO_FLUSH\n"); + error = ENOMEM; + return (error); + } + + bio->bio_cmd = BIO_FLUSH; + bio->bio_flags |= BIO_ORDERED; + bio->bio_dev = xbb->cdev; + bio->bio_offset = 0; + bio->bio_data = 0; + bio->bio_done = xbb_bio_done; + bio->bio_caller1 = req; + bio->bio_pblkno = 0; + + req->pendcnt = 1; + + if (test_generated_data) { + xbb_generated_data_strategy(bios[bio_idx]); + } else { + (*xbb->csw->d_strategy)(bios[bio_idx]); + } + + return (0); + } + + for (seg_idx = 0, bio = NULL, sg_state = sg_states; + seg_idx < nseg; + seg_idx++, sg_state++) { + + /* + * KVA will not be contiguous, so any additional + * I/O will need to be represented in a new bio. + */ + if (sg_state->first_sect != 0) + bio = NULL; + + if (bio == NULL) { + bio = bios[nbio++] = g_new_bio(); + if (unlikely(bio == NULL)) + goto fail_free_bios; + bio->bio_cmd = operation; + bio->bio_flags |= bio_flags; + bio->bio_dev = xbb->cdev; + bio->bio_offset = (off_t)ring_req->sector_number + << xbb->sector_size_shift; + bio->bio_data = xbb_req_ioaddr(req, seg_idx, + sg_state->first_sect); + bio->bio_done = xbb_bio_done; + bio->bio_caller1 = req; + bio->bio_pblkno = ring_req->sector_number; + } + + bio->bio_length += sg_state->nsect << 9; + bio->bio_bcount = bio->bio_length; + ring_req->sector_number += (sg_state->nsect << 9) + >> xbb->sector_size_shift; + + if (sg_state->last_sect != (PAGE_SIZE - 512) >> 9) { + + /* + * KVA will not be contiguous, so any additional + * I/O will need to be represented in a new bio. + */ + bio = NULL; + } + } - DPRINTF("\n"); + req->pendcnt = nbio; - while (!STAILQ_EMPTY(&req_sched_list)) { - blkif_back_ring_t *blk_ring; - RING_IDX rc, rp; - - blkif = remove_from_req_schedule_list(); - - blk_ring = &blkif->ring; - rc = blk_ring->req_cons; - rp = blk_ring->sring->req_prod; - rmb(); /* Ensure we see queued requests up to 'rp'. */ - - while ((rc != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) { - blkif_request_t *req; - pending_req_t *pending_req; - - pending_req = alloc_req(); - if (pending_req == NULL) - goto out_of_preqs; - - req = RING_GET_REQUEST(blk_ring, rc); - blk_ring->req_cons = ++rc; /* before make_response() */ - - switch (req->operation) { - case BLKIF_OP_READ: - blkif->st_rd_req++; - dispatch_rw_block_io(blkif, req, pending_req); - break; - case BLKIF_OP_WRITE: - blkif->st_wr_req++; - dispatch_rw_block_io(blkif, req, pending_req); - break; - default: - blkif->st_err_req++; - DPRINTF("error: unknown block io operation [%d]\n", - req->operation); - make_response(blkif, req->id, req->operation, - BLKIF_RSP_ERROR); - free_req(pending_req); - break; - } + for (bio_idx = 0; bio_idx < nbio; bio_idx++) + { +#ifdef USE_BOUNCE_BUFFERS + vm_offset_t kva_offset; + + kva_offset = (vm_offset_t)bios[bio_idx]->bio_data + - (vm_offset_t)req->bounce; + if (operation == BIO_WRITE) { + memcpy(bios[bio_idx]->bio_data, + (uint8_t *)req->kva + kva_offset, + bios[bio_idx]->bio_bcount); } +#endif - blkif_put(blkif); + if (test_generated_data) { + xbb_generated_data_strategy(bios[bio_idx]); + } else { + (*xbb->csw->d_strategy)(bios[bio_idx]); + } } - return; + return (error); - out_of_preqs: - /* We ran out of pending req structs */ - /* Just requeue interface and wait to be rescheduled to run when one is freed */ - add_to_req_schedule_list_tail2(blkif); - blkif->st_oo_req++; -} +fail_free_bios: + for (bio_idx = 0; bio_idx < (nbio-1); bio_idx++) + g_destroy_bio(bios[bio_idx]); -/* Handle interrupt from a frontend */ -static void -blkback_intr(void *arg) -{ - blkif_t *blkif = arg; - DPRINTF("%x\n", (unsigned int)blkif); - add_to_req_schedule_list_tail(blkif); + return (error); } -/* Map grant ref for ring */ +/* + * Returns 0 for success, non-zero for failure. + */ static int -map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring) -{ - struct gnttab_map_grant_ref op; - - ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE); - if (ring->va == 0) - return ENOMEM; - - op.host_addr = ring->va; - op.flags = GNTMAP_host_map; - op.ref = ref; - op.dom = dom; - HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); - if (op.status) { - WPRINTF("grant table op err=%d\n", op.status); - kmem_free(kernel_map, ring->va, PAGE_SIZE); - ring->va = 0; - return EACCES; - } +xbb_dispatch_file(struct xbb_softc *xbb, blkif_request_t *ring_req, + struct xbb_xen_req *req, struct blkif_request_segment *sg, + struct xbb_sg_state *sg_states, int nseg, int operation, + int flags) +{ + u_int seg_idx; + struct uio xuio; + struct xbb_sg_state *sg_state; + struct iovec *xiovec; +#ifdef USE_BOUNCE_BUFFERS + void **p_vaddr; + int saved_uio_iovcnt; +#endif /* USE_BOUNCE_BUFFERS */ + int vfs_is_locked; + int error; + + error = 0; + bzero(&xuio, sizeof(xuio)); + + req->pendcnt = 0; + + switch (operation) { + case BIO_READ: + xuio.uio_rw = UIO_READ; + break; + case BIO_WRITE: + xuio.uio_rw = UIO_WRITE; + break; + case BIO_FLUSH: { + struct mount *mountpoint; - ring->handle = op.handle; - ring->bus_addr = op.dev_bus_addr; + vfs_is_locked = VFS_LOCK_GIANT(xbb->vn->v_mount); - return 0; -} + (void) vn_start_write(xbb->vn, &mountpoint, V_WAIT); -/* Unmap grant ref for ring */ -static void -unmap_ring(struct ring_ref *ring) -{ - struct gnttab_unmap_grant_ref op; + vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY); + error = VOP_FSYNC(xbb->vn, MNT_WAIT, curthread); + VOP_UNLOCK(xbb->vn, 0); - op.host_addr = ring->va; - op.dev_bus_addr = ring->bus_addr; - op.handle = ring->handle; - HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); - if (op.status) - WPRINTF("grant table op err=%d\n", op.status); + vn_finished_write(mountpoint); - kmem_free(kernel_map, ring->va, PAGE_SIZE); - ring->va = 0; -} + VFS_UNLOCK_GIANT(vfs_is_locked); -static int -connect_ring(blkif_t *blkif) -{ - struct xenbus_device *xdev = blkif->xdev; - blkif_sring_t *ring; - unsigned long ring_ref; - evtchn_port_t evtchn; - evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; - int err; + goto bailout_send_response; + /* NOTREACHED */ + } + default: + panic("invalid operation %d", operation); + /* NOTREACHED */ + } + xuio.uio_offset = (vm_offset_t)ring_req->sector_number + << xbb->sector_size_shift; - if (blkif->ring_connected) - return 0; + xuio.uio_segflg = UIO_SYSSPACE; + xuio.uio_iov = xbb->xiovecs; + xuio.uio_iovcnt = 0; - // Grab FE data and map his memory - err = xenbus_gather(NULL, xdev->otherend, - "ring-ref", "%lu", &ring_ref, - "event-channel", "%u", &evtchn, NULL); - if (err) { - xenbus_dev_fatal(xdev, err, - "reading %s/ring-ref and event-channel", - xdev->otherend); - return err; - } - - err = map_ring(ring_ref, blkif->domid, &blkif->rr); - if (err) { - xenbus_dev_fatal(xdev, err, "mapping ring"); - return err; - } - ring = (blkif_sring_t *)blkif->rr.va; - BACK_RING_INIT(&blkif->ring, ring, PAGE_SIZE); - - op.u.bind_interdomain.remote_dom = blkif->domid; - op.u.bind_interdomain.remote_port = evtchn; - err = HYPERVISOR_event_channel_op(&op); - if (err) { - unmap_ring(&blkif->rr); - xenbus_dev_fatal(xdev, err, "binding event channel"); - return err; - } - blkif->evtchn = op.u.bind_interdomain.local_port; - - /* bind evtchn to irq handler */ - blkif->irq = - bind_evtchn_to_irqhandler(blkif->evtchn, "blkback", - blkback_intr, blkif, INTR_TYPE_NET|INTR_MPSAFE, &blkif->irq_cookie); + for (seg_idx = 0, xiovec = NULL, sg_state = sg_states; + seg_idx < nseg; seg_idx++, sg_state++) { - blkif->ring_connected = 1; + /* + * If the first sector is not 0, the KVA will not be + * contiguous and we'll need to go on to another segment. + */ + if (sg_state->first_sect != 0) + xiovec = NULL; - DPRINTF("%x rings connected! evtchn=%d irq=%d\n", - (unsigned int)blkif, blkif->evtchn, blkif->irq); + if (xiovec == NULL) { + xiovec = &xbb->xiovecs[xuio.uio_iovcnt]; + xiovec->iov_base = xbb_req_ioaddr(req, seg_idx, + sg_state->first_sect); +#ifdef USE_BOUNCE_BUFFERS + /* + * Store the address of the incoming buffer at this + * particular offset as well, so we can do the copy + * later without having to do more work to + * recalculate this address. + */ + p_vaddr = &xbb->xiovecs_vaddr[xuio.uio_iovcnt]; + *p_vaddr = xbb_req_vaddr(req, seg_idx, + sg_state->first_sect); +#endif /* USE_BOUNCE_BUFFERS */ + xiovec->iov_len = 0; + xuio.uio_iovcnt++; + } - return 0; -} + xiovec->iov_len += sg_state->nsect << 9; -static void -disconnect_ring(blkif_t *blkif) -{ - DPRINTF("\n"); + xuio.uio_resid += sg_state->nsect << 9; + ring_req->sector_number += (sg_state->nsect << 9) + >> xbb->sector_size_shift; - if (blkif->ring_connected) { - unbind_from_irqhandler(blkif->irq, blkif->irq_cookie); - blkif->irq = 0; - unmap_ring(&blkif->rr); - blkif->ring_connected = 0; + /* + * If the last sector is not the full page size count, + * the next segment will not be contiguous in KVA and we + * need a new iovec. + */ + if (sg_state->last_sect != (PAGE_SIZE - 512) >> 9) + xiovec = NULL; } -} - -static void -connect(blkif_t *blkif) -{ - struct xenbus_transaction *xbt; - struct xenbus_device *xdev = blkif->xdev; - int err; - - if (!blkif->ring_connected || - blkif->vn == NULL || - blkif->state == XenbusStateConnected) - return; - DPRINTF("%s\n", xdev->otherend); + xuio.uio_td = curthread; - /* Supply the information about the device the frontend needs */ -again: - xbt = xenbus_transaction_start(); - if (IS_ERR(xbt)) { - xenbus_dev_fatal(xdev, PTR_ERR(xbt), - "Error writing configuration for backend " - "(start transaction)"); - return; - } +#ifdef USE_BOUNCE_BUFFERS + saved_uio_iovcnt = xuio.uio_iovcnt; - err = xenbus_printf(xbt, xdev->nodename, "sectors", "%u", - blkif->media_num_sectors); - if (err) { - xenbus_dev_fatal(xdev, err, "writing %s/sectors", - xdev->nodename); - goto abort; - } + if (operation == BIO_WRITE) { + /* Copy the write data to the local buffer. */ + for (seg_idx = 0, p_vaddr = xbb->xiovecs_vaddr, + xiovec = xuio.uio_iov; seg_idx < xuio.uio_iovcnt; + seg_idx++, xiovec++, p_vaddr++) { - err = xenbus_printf(xbt, xdev->nodename, "info", "%u", - blkif->read_only ? VDISK_READONLY : 0); - if (err) { - xenbus_dev_fatal(xdev, err, "writing %s/info", - xdev->nodename); - goto abort; - } - err = xenbus_printf(xbt, xdev->nodename, "sector-size", "%u", - blkif->sector_size); - if (err) { - xenbus_dev_fatal(xdev, err, "writing %s/sector-size", - xdev->nodename); - goto abort; + memcpy(xiovec->iov_base, *p_vaddr, xiovec->iov_len); + } + } else { + /* + * We only need to save off the iovecs in the case of a + * read, because the copy for the read happens after the + * VOP_READ(). (The uio will get modified in that call + * sequence.) + */ + memcpy(xbb->saved_xiovecs, xuio.uio_iov, + xuio.uio_iovcnt * sizeof(xuio.uio_iov[0])); } +#endif /* USE_BOUNCE_BUFFERS */ - err = xenbus_transaction_end(xbt, 0); - if (err == -EAGAIN) - goto again; - if (err) - xenbus_dev_fatal(xdev, err, "ending transaction"); + vfs_is_locked = VFS_LOCK_GIANT(xbb->vn->v_mount); + switch (operation) { + case BIO_READ: - err = xenbus_switch_state(xdev, NULL, XenbusStateConnected); - if (err) - xenbus_dev_fatal(xdev, err, "switching to Connected state", - xdev->nodename); + vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY); - blkif->state = XenbusStateConnected; + /* + * UFS pays attention to IO_DIRECT for reads. If the + * DIRECTIO option is configured into the kernel, it calls + * ffs_rawread(). But that only works for single-segment + * uios with user space addresses. In our case, with a + * kernel uio, it still reads into the buffer cache, but it + * will just try to release the buffer from the cache later + * on in ffs_read(). + * + * ZFS does not pay attention to IO_DIRECT for reads. + * + * UFS does not pay attention to IO_SYNC for reads. + * + * ZFS pays attention to IO_SYNC (which translates into the + * Solaris define FRSYNC for zfs_read()) for reads. It + * attempts to sync the file before reading. + * + * So, to attempt to provide some barrier semantics in the + * BIO_ORDERED case, set both IO_DIRECT and IO_SYNC. + */ + error = VOP_READ(xbb->vn, &xuio, (flags & BIO_ORDERED) ? + (IO_DIRECT|IO_SYNC) : 0, xbb->cred); - return; + VOP_UNLOCK(xbb->vn, 0); + break; + case BIO_WRITE: { + struct mount *mountpoint; - abort: - xenbus_transaction_end(xbt, 1); -} + (void)vn_start_write(xbb->vn, &mountpoint, V_WAIT); -static int -blkback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id) -{ - int err; - char *p, *mode = NULL, *type = NULL, *params = NULL; - long handle; + vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY); - DPRINTF("node=%s\n", xdev->nodename); + /* + * UFS pays attention to IO_DIRECT for writes. The write + * is done asynchronously. (Normally the write would just + * get put into cache. + * + * UFS pays attention to IO_SYNC for writes. It will + * attempt to write the buffer out synchronously if that + * flag is set. + * + * ZFS does not pay attention to IO_DIRECT for writes. + * + * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) + * for writes. It will flush the transaction from the + * cache before returning. + * + * So if we've got the BIO_ORDERED flag set, we want + * IO_SYNC in either the UFS or ZFS case. + */ + error = VOP_WRITE(xbb->vn, &xuio, (flags & BIO_ORDERED) ? + IO_SYNC : 0, xbb->cred); + VOP_UNLOCK(xbb->vn, 0); - p = strrchr(xdev->otherend, '/') + 1; - handle = strtoul(p, NULL, 0); + vn_finished_write(mountpoint); - mode = xenbus_read(NULL, xdev->nodename, "mode", NULL); - if (IS_ERR(mode)) { - xenbus_dev_fatal(xdev, PTR_ERR(mode), "reading mode"); - err = PTR_ERR(mode); - goto error; - } - - type = xenbus_read(NULL, xdev->nodename, "type", NULL); - if (IS_ERR(type)) { - xenbus_dev_fatal(xdev, PTR_ERR(type), "reading type"); - err = PTR_ERR(type); - goto error; - } - - params = xenbus_read(NULL, xdev->nodename, "params", NULL); - if (IS_ERR(type)) { - xenbus_dev_fatal(xdev, PTR_ERR(params), "reading params"); - err = PTR_ERR(params); - goto error; - } - - err = blkif_create(xdev, handle, mode, type, params); - if (err) { - xenbus_dev_fatal(xdev, err, "creating blkif"); - goto error; + break; } - - err = vbd_add_dev(xdev); - if (err) { - blkif_put((blkif_t *)xdev->data); - xenbus_dev_fatal(xdev, err, "adding vbd device"); + default: + panic("invalid operation %d", operation); + /* NOTREACHED */ } + VFS_UNLOCK_GIANT(vfs_is_locked); - return err; - - error: - if (mode) - free(mode, M_DEVBUF); - if (type) - free(type, M_DEVBUF); - if (params) - free(params, M_DEVBUF); - return err; -} - -static int -blkback_remove(struct xenbus_device *xdev) -{ - blkif_t *blkif = xdev->data; - device_t ndev; - - DPRINTF("node=%s\n", xdev->nodename); - - blkif->state = XenbusStateClosing; - - if ((ndev = blkif->ndev)) { - blkif->ndev = NULL; - mtx_lock(&Giant); - device_detach(ndev); - mtx_unlock(&Giant); +#ifdef USE_BOUNCE_BUFFERS + /* We only need to copy here for read operations */ + if (operation == BIO_READ) { + + for (seg_idx = 0, p_vaddr = xbb->xiovecs_vaddr, + xiovec = xbb->saved_xiovecs; + seg_idx < saved_uio_iovcnt; seg_idx++, + xiovec++, p_vaddr++) { + + /* + * Note that we have to use the copy of the + * io vector we made above. uiomove() modifies + * the uio and its referenced vector as uiomove + * performs the copy, so we can't rely on any + * state from the original uio. + */ + memcpy(*p_vaddr, xiovec->iov_base, xiovec->iov_len); + } } +#endif /* USE_BOUNCE_BUFFERS */ - xdev->data = NULL; - blkif->xdev = NULL; - blkif_put(blkif); +bailout_send_response: - return 0; -} + /* + * All I/O is already done, send the response. A lock is not + * necessary here because we're single threaded, and therefore the + * only context accessing this request right now. If that changes, + * we may need some locking here. + */ + xbb_unmap_req(req); + xbb_send_response(xbb, req, (error == 0) ? BLKIF_RSP_OKAY : + BLKIF_RSP_ERROR); + xbb_release_req(xbb, req); -static int -blkback_resume(struct xenbus_device *xdev) -{ - DPRINTF("node=%s\n", xdev->nodename); - return 0; + return (0); } static void -frontend_changed(struct xenbus_device *xdev, - XenbusState frontend_state) +xbb_dispatch_io(struct xbb_softc *xbb, blkif_request_t *ring_req, + struct xbb_xen_req *req, RING_IDX req_ring_idx) { - blkif_t *blkif = xdev->data; - - DPRINTF("state=%d\n", frontend_state); - - blkif->frontend_state = frontend_state; - - switch (frontend_state) { - case XenbusStateInitialising: - break; - case XenbusStateInitialised: - case XenbusStateConnected: - connect_ring(blkif); - connect(blkif); - break; - case XenbusStateClosing: - xenbus_switch_state(xdev, NULL, XenbusStateClosing); - break; - case XenbusStateClosed: - xenbus_remove_device(xdev); + struct gnttab_map_grant_ref maps[XBB_MAX_SEGMENTS_PER_REQUEST]; + struct xbb_sg_state *sg_states; + struct gnttab_map_grant_ref *map; + struct xbb_sg_state *sg_state; + struct blkif_request_segment *sg; + struct blkif_request_segment *last_block_sg; + u_int nseg; + u_int seg_idx; + u_int block_segs; + int nr_sects; + int operation; + uint8_t bio_flags; + int error; + + nseg = ring_req->nr_segments; + nr_sects = 0; + + req->xbb = xbb; + req->id = ring_req->id; + req->operation = ring_req->operation; + req->status = BLKIF_RSP_OKAY; + req->nr_pages = nseg; + bio_flags = 0; + sg = NULL; + sg_states = xbb->sg_states; + + switch (req->operation) { + case BLKIF_OP_WRITE_BARRIER: + bio_flags |= BIO_ORDERED; + /* FALLTHROUGH */ + case BLKIF_OP_WRITE: + operation = BIO_WRITE; + xbb->st_wr_req++; + if ((xbb->flags & XBBF_READ_ONLY) != 0) { + DPRINTF("Attempt to write to read only device %s\n", + xbb->dev_name); + goto fail_send_response; + } break; - case XenbusStateUnknown: - case XenbusStateInitWait: - xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend", - frontend_state); + case BLKIF_OP_READ: + operation = BIO_READ; + xbb->st_rd_req++; break; + case BLKIF_OP_FLUSH_DISKCACHE: + operation = BIO_FLUSH; + goto do_dispatch; + /*NOTREACHED*/ + default: + DPRINTF("error: unknown block io operation [%d]\n", + req->operation); + goto fail_send_response; } -} -/* ** Driver registration ** */ + /* Check that number of segments is sane. */ + if (unlikely(nseg == 0) + || unlikely(nseg > xbb->max_request_segments)) { + DPRINTF("Bad number of segments in request (%d)\n", nseg); + goto fail_send_response; + } -static struct xenbus_device_id blkback_ids[] = { - { "vbd" }, - { "" } -}; + if (dont_perform_io) { + xbb_send_response(xbb, req, BLKIF_RSP_OKAY); + xbb_release_req(xbb, req); + return; + } -static struct xenbus_driver blkback = { - .name = "blkback", - .ids = blkback_ids, - .probe = blkback_probe, - .remove = blkback_remove, - .resume = blkback_resume, - .otherend_changed = frontend_changed, -}; + map = maps; + sg_state = sg_states; + block_segs = MIN(req->nr_pages, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK); + sg = ring_req->seg; + last_block_sg = sg + block_segs; + seg_idx = 0; + while (1) { -static void -blkback_init(void *unused) -{ - int i; + while (sg < last_block_sg) { + + sg_state->first_sect = sg->first_sect; + sg_state->last_sect = sg->last_sect; + sg_state->nsect = + (int8_t)(sg->last_sect - sg->first_sect + 1); + + if ((sg_state->nsect + & ((xbb->sector_size >> 9) - 1)) != 0) { + DPRINTF("Misaligned I/O request from domain %d", + xbb->otherend_id); + goto fail_send_response; + } - TASK_INIT(&blk_req_task, 0, blk_req_action, NULL); - mtx_init(&req_sched_list_lock, "blk_req_sched_lock", "blkback req sched lock", MTX_DEF); + if ((sg->last_sect >= (PAGE_SIZE >> 9)) + || (sg_state->nsect <= 0)) + goto fail_send_response; + + nr_sects += sg_state->nsect; + map->host_addr = xbb_req_gntaddr(req, seg_idx, + /*sector*/0); + map->flags = GNTMAP_host_map; + map->ref = sg->gref; + map->dom = xbb->otherend_id; + if (operation == BIO_WRITE) + map->flags |= GNTMAP_readonly; +#if 0 + printf("Mapping(%d): 0x%lx, flags 0x%x\n", seg_idx, + map->host_addr, map->flags); +#endif + sg++; + map++; + sg_state++; + seg_idx++; + } - mtx_init(&pending_free_lock, "blk_pending_req_ock", "blkback pending request lock", MTX_DEF); + block_segs = MIN(nseg - seg_idx, + BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK); + if (block_segs == 0) + break; - mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; - pending_reqs = malloc(sizeof(pending_reqs[0]) * - blkif_reqs, M_DEVBUF, M_ZERO|M_NOWAIT); - pending_grant_handles = malloc(sizeof(pending_grant_handles[0]) * - mmap_pages, M_DEVBUF, M_NOWAIT); - pending_vaddrs = malloc(sizeof(pending_vaddrs[0]) * - mmap_pages, M_DEVBUF, M_NOWAIT); - mmap_vstart = alloc_empty_page_range(mmap_pages); - if (!pending_reqs || !pending_grant_handles || !pending_vaddrs || !mmap_vstart) { - if (pending_reqs) - free(pending_reqs, M_DEVBUF); - if (pending_grant_handles) - free(pending_grant_handles, M_DEVBUF); - if (pending_vaddrs) - free(pending_vaddrs, M_DEVBUF); - WPRINTF("out of memory\n"); - return; + /* + * Fetch the next request block full of SG elements. + * For now, only the spacing between entries is different + * in the different ABIs, not the sg entry layout. + */ + req_ring_idx++; + switch (xbb->abi) { + case BLKIF_PROTOCOL_NATIVE: + sg = BLKRING_GET_SG_REQUEST(&xbb->rings.native, + req_ring_idx); + break; + case BLKIF_PROTOCOL_X86_32: + { + sg = BLKRING_GET_SG_REQUEST(&xbb->rings.x86_32, + req_ring_idx); + break; + } + case BLKIF_PROTOCOL_X86_64: + { + sg = BLKRING_GET_SG_REQUEST(&xbb->rings.x86_64, + req_ring_idx); + break; + } + default: + panic("Unexpected blkif protocol ABI."); + /* NOTREACHED */ + } + last_block_sg = sg + block_segs; } - for (i = 0; i < mmap_pages; i++) { - pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT); - pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; + /* Convert to the disk's sector size */ + nr_sects = (nr_sects << 9) >> xbb->sector_size_shift; + + error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + maps, req->nr_pages); + if (error != 0) + panic("Grant table operation failed (%d)", error); + + for (seg_idx = 0, map = maps; seg_idx < nseg; seg_idx++, map++) { + + if (unlikely(map->status != 0)) { + DPRINTF("invalid buffer -- could not remap it (%d)\n", + map->status); + DPRINTF("Mapping(%d): Host Addr 0x%lx, flags 0x%x " + "ref 0x%x, dom %d\n", seg_idx, + map->host_addr, map->flags, map->ref, + map->dom); + goto fail_unmap_req; + } + + req->gnt_handles[seg_idx] = map->handle; } + if (ring_req->sector_number + nr_sects > xbb->media_num_sectors) { - for (i = 0; i < blkif_reqs; i++) { - STAILQ_INSERT_TAIL(&pending_free, &pending_reqs[i], free_list); + DPRINTF("%s of [%" PRIu64 ",%" PRIu64 "] " + "extends past end of device %s\n", + operation == BIO_READ ? "read" : "write", + ring_req->sector_number, + ring_req->sector_number + nr_sects, xbb->dev_name); + goto fail_unmap_req; + } + +do_dispatch: + + error = xbb->dispatch_io(xbb, + ring_req, + req, + sg, + sg_states, + nseg, + operation, + bio_flags); + + if (error != 0) { + if (operation == BIO_FLUSH) + goto fail_send_response; + else + goto fail_unmap_req; } - DPRINTF("registering %s\n", blkback.name); - xenbus_register_backend(&blkback); -} + return; -SYSINIT(xbbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, blkback_init, NULL) + +fail_unmap_req: + xbb_unmap_req(req); + /* FALLTHROUGH */ + +fail_send_response: + xbb_send_response(xbb, req, BLKIF_RSP_ERROR); + xbb_release_req(xbb, req); + xbb->st_err_req++; +} static void -close_device(blkif_t *blkif) +xbb_send_response(struct xbb_softc *xbb, struct xbb_xen_req *req, int status) { - DPRINTF("closing dev=%s\n", blkif->dev_name); - if (blkif->vn) { - int flags = FREAD; - - if (!blkif->read_only) - flags |= FWRITE; + blkif_response_t *resp; + int more_to_do; + int notify; - if (blkif->csw) { - dev_relthread(blkif->cdev); - blkif->csw = NULL; - } + more_to_do = 0; - (void)vn_close(blkif->vn, flags, NOCRED, curthread); - blkif->vn = NULL; + /* + * Place on the response ring for the relevant domain. + * For now, only the spacing between entries is different + * in the different ABIs, not the response entry layout. + */ + mtx_lock(&xbb->lock); + switch (xbb->abi) { + case BLKIF_PROTOCOL_NATIVE: + resp = RING_GET_RESPONSE(&xbb->rings.native, + xbb->rings.native.rsp_prod_pvt); + break; + case BLKIF_PROTOCOL_X86_32: + resp = (blkif_response_t *) + RING_GET_RESPONSE(&xbb->rings.x86_32, + xbb->rings.x86_32.rsp_prod_pvt); + break; + case BLKIF_PROTOCOL_X86_64: + resp = (blkif_response_t *) + RING_GET_RESPONSE(&xbb->rings.x86_64, + xbb->rings.x86_64.rsp_prod_pvt); + break; + default: + panic("Unexpected blkif protocol ABI."); } -} -static int -open_device(blkif_t *blkif) -{ - struct nameidata nd; - struct vattr vattr; - struct cdev *dev; - struct cdevsw *devsw; - int flags = FREAD, err = 0; - - DPRINTF("opening dev=%s\n", blkif->dev_name); + resp->id = req->id; + resp->operation = req->operation; + resp->status = status; - if (!blkif->read_only) - flags |= FWRITE; + xbb->rings.common.rsp_prod_pvt += BLKIF_SEGS_TO_BLOCKS(req->nr_pages); + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbb->rings.common, notify); - if (!curthread->td_proc->p_fd->fd_cdir) { - curthread->td_proc->p_fd->fd_cdir = rootvnode; - VREF(rootvnode); - } - if (!curthread->td_proc->p_fd->fd_rdir) { - curthread->td_proc->p_fd->fd_rdir = rootvnode; - VREF(rootvnode); - } - if (!curthread->td_proc->p_fd->fd_jdir) { - curthread->td_proc->p_fd->fd_jdir = rootvnode; - VREF(rootvnode); - } + if (xbb->rings.common.rsp_prod_pvt == xbb->rings.common.req_cons) { - again: - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, blkif->dev_name, curthread); - err = vn_open(&nd, &flags, 0, -1); - if (err) { - if (blkif->dev_name[0] != '/') { - char *dev_path = "/dev/"; - char *dev_name; + /* + * Tail check for pending requests. Allows frontend to avoid + * notifications if requests are already in flight (lower + * overheads and promotes batching). + */ + RING_FINAL_CHECK_FOR_REQUESTS(&xbb->rings.common, more_to_do); + } else if (RING_HAS_UNCONSUMED_REQUESTS(&xbb->rings.common)) { - /* Try adding device path at beginning of name */ - dev_name = malloc(strlen(blkif->dev_name) + strlen(dev_path) + 1, M_DEVBUF, M_NOWAIT); - if (dev_name) { - sprintf(dev_name, "%s%s", dev_path, blkif->dev_name); - free(blkif->dev_name, M_DEVBUF); - blkif->dev_name = dev_name; - goto again; - } - } - xenbus_dev_fatal(blkif->xdev, err, "error opening device %s", blkif->dev_name); - return err; + more_to_do = 1; } - NDFREE(&nd, NDF_ONLY_PNBUF); - - blkif->vn = nd.ni_vp; - /* We only support disks for now */ - if (!vn_isdisk(blkif->vn, &err)) { - xenbus_dev_fatal(blkif->xdev, err, "device %s is not a disk", blkif->dev_name); - VOP_UNLOCK(blkif->vn, 0, curthread); - goto error; - } + mtx_unlock(&xbb->lock); - blkif->cdev = blkif->vn->v_rdev; - blkif->csw = dev_refthread(blkif->cdev); - PANIC_IF(blkif->csw == NULL); - - err = VOP_GETATTR(blkif->vn, &vattr, NOCRED); - if (err) { - xenbus_dev_fatal(blkif->xdev, err, - "error getting vnode attributes for device %s", blkif->dev_name); - VOP_UNLOCK(blkif->vn, 0, curthread); - goto error; - } + if (more_to_do) + taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); + + if (notify) + notify_remote_via_irq(xbb->irq); +} - VOP_UNLOCK(blkif->vn, 0, curthread); +static void +xbb_bio_done(struct bio *bio) +{ + struct xbb_softc *xbb; + struct xbb_xen_req *req; - dev = blkif->vn->v_rdev; - devsw = dev->si_devsw; - if (!devsw->d_ioctl) { - err = ENODEV; - xenbus_dev_fatal(blkif->xdev, err, - "no d_ioctl for device %s!", blkif->dev_name); - goto error; + req = bio->bio_caller1; + xbb = req->xbb; + if (bio->bio_error) { + DPRINTF("BIO returned error %d for operation on device %s\n", + bio->bio_error, xbb->dev_name); + req->status = BLKIF_RSP_ERROR; + xbb->st_err_req++; + + if (bio->bio_error == ENXIO + && xenbus_get_state(xbb->dev) == XenbusStateConnected) { + + /* + * Backend device has disappeared. Signal the + * front-end that we (the device proxy) want to + * go away. + */ + xenbus_set_state(xbb->dev, XenbusStateClosing); + } } - err = (*devsw->d_ioctl)(dev, DIOCGSECTORSIZE, (caddr_t)&blkif->sector_size, FREAD, curthread); - if (err) { - xenbus_dev_fatal(blkif->xdev, err, - "error calling ioctl DIOCGSECTORSIZE for device %s", blkif->dev_name); - goto error; - } - blkif->sector_size_shift = fls(blkif->sector_size) - 1; +#if 0 + printf("done: bio=%x error=%x completed=%llu resid=%lu flags=%x\n", + (unsigned int)bio, bio->bio_error, bio->bio_completed, bio->bio_resid, bio->bio_flags); +#endif - err = (*devsw->d_ioctl)(dev, DIOCGMEDIASIZE, (caddr_t)&blkif->media_size, FREAD, curthread); - if (err) { - xenbus_dev_fatal(blkif->xdev, err, - "error calling ioctl DIOCGMEDIASIZE for device %s", blkif->dev_name); - goto error; +#ifdef USE_BOUNCE_BUFFERS + if (bio->bio_cmd == BIO_READ) { + vm_offset_t kva_offset; + + kva_offset = (vm_offset_t)bio->bio_data + - (vm_offset_t)req->bounce; + memcpy((uint8_t *)req->kva + kva_offset, + bio->bio_data, bio->bio_bcount); + } +#endif /* USE_BOUNCE_BUFFERS */ + + if (atomic_fetchadd_int(&req->pendcnt, -1) == 1) { + xbb_unmap_req(req); + xbb_send_response(xbb, req, req->status); + xbb_release_req(xbb, req); } - blkif->media_num_sectors = blkif->media_size >> blkif->sector_size_shift; - blkif->major = major(vattr.va_rdev); - blkif->minor = minor(vattr.va_rdev); - - DPRINTF("opened dev=%s major=%d minor=%d sector_size=%u media_size=%lld\n", - blkif->dev_name, blkif->major, blkif->minor, blkif->sector_size, blkif->media_size); + g_destroy_bio(bio); +} - return 0; +void +xbb_intr(void *arg) +{ + struct xbb_softc *xbb; - error: - close_device(blkif); - return err; + /* Defer to kernel thread. */ + xbb = (struct xbb_softc *)arg; + taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); } static int -vbd_add_dev(struct xenbus_device *xdev) +xbb_shutdown(struct xbb_softc *xbb) { - blkif_t *blkif = xdev->data; - device_t nexus, ndev; - devclass_t dc; - int err = 0; - - mtx_lock(&Giant); - - /* We will add a vbd device as a child of nexus0 (for now) */ - if (!(dc = devclass_find("nexus")) || - !(nexus = devclass_get_device(dc, 0))) { - WPRINTF("could not find nexus0!\n"); - err = ENOENT; - goto done; - } + static int in_shutdown; + DPRINTF("\n"); - /* Create a newbus device representing the vbd */ - ndev = BUS_ADD_CHILD(nexus, 0, "vbd", blkif->handle); - if (!ndev) { - WPRINTF("could not create newbus device vbd%d!\n", blkif->handle); - err = EFAULT; - goto done; - } + /* + * Due to the need to drop our mutex during some + * xenbus operations, it is possible for two threads + * to attempt to close out shutdown processing at + * the same time. Tell the caller that hits this + * race to try back later. + */ + if (in_shutdown != 0) + return (EAGAIN); + + DPRINTF("\n"); + + /* Indicate shutdown is in progress. */ + xbb->flags |= XBBF_SHUTDOWN; + + /* Wait for requests to complete. */ + if (xbb->active_request_count != 0) + return (EAGAIN); - blkif_get(blkif); - device_set_ivars(ndev, blkif); - blkif->ndev = ndev; + DPRINTF("\n"); - device_probe_and_attach(ndev); + /* Disconnect from the front-end. */ + xbb_disconnect(xbb); - done: + in_shutdown = 1; + mtx_unlock(&xbb->lock); + xenbus_set_state(xbb->dev, XenbusStateClosed); + mtx_lock(&xbb->lock); + in_shutdown = 0; - mtx_unlock(&Giant); + /* Indicate to xbb_detach() that is it safe to proceed. */ + wakeup(xbb); - return err; + return (0); +} + +#if 0 +static void +xbb_hotplug_fatal(struct xbb_softc *xbb) +{ } enum { @@ -1198,9 +2368,9 @@ }; static char * -vbd_sysctl_ring_info(blkif_t *blkif, int cmd) +vbd_sysctl_ring_info(struct xbb_softc *blkif, int cmd) { - char *buf = malloc(256, M_DEVBUF, M_WAITOK); + char *buf = malloc(256, M_XENBLOCKBACK, M_WAITOK); if (buf) { if (!blkif->ring_connected) sprintf(buf, "ring not connected\n"); @@ -1221,7 +2391,7 @@ vbd_sysctl_handler(SYSCTL_HANDLER_ARGS) { device_t dev = (device_t)arg1; - blkif_t *blkif = (blkif_t *)device_get_ivars(dev); + struct xbb_softc *blkif = (struct xbb_softc *)device_get_ivars(dev); const char *value; char *buf = NULL; int err; @@ -1246,104 +2416,50 @@ err = SYSCTL_OUT(req, value, strlen(value)); if (buf != NULL) - free(buf, M_DEVBUF); + free(buf, M_XENBLOCKBACK); return err; } -/* Newbus vbd device driver probe */ -static int -vbd_probe(device_t dev) -{ - DPRINTF("vbd%d\n", device_get_unit(dev)); - return 0; -} - -/* Newbus vbd device driver attach */ static int vbd_attach(device_t dev) { - blkif_t *blkif = (blkif_t *)device_get_ivars(dev); + struct xbb_softc *blkif = (struct xbb_softc *)device_get_ivars(dev); DPRINTF("%s\n", blkif->dev_name); - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD, dev, VBD_SYSCTL_DOMID, vbd_sysctl_handler, "I", "domid of frontend"); - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "rd_reqs", CTLTYPE_INT|CTLFLAG_RD, dev, VBD_SYSCTL_ST_RD_REQ, vbd_sysctl_handler, "I", "number of read reqs"); - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "wr_reqs", CTLTYPE_INT|CTLFLAG_RD, dev, VBD_SYSCTL_ST_WR_REQ, vbd_sysctl_handler, "I", "number of write reqs"); - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "oo_reqs", CTLTYPE_INT|CTLFLAG_RD, dev, VBD_SYSCTL_ST_OO_REQ, vbd_sysctl_handler, "I", "number of deferred reqs"); - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "err_reqs", CTLTYPE_INT|CTLFLAG_RD, dev, VBD_SYSCTL_ST_ERR_REQ, vbd_sysctl_handler, "I", "number of reqs that returned error"); #if XEN_BLKBACK_DEBUG - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ring", CTLFLAG_RD, dev, VBD_SYSCTL_RING, vbd_sysctl_handler, "A", "req ring info"); #endif - - if (!open_device(blkif)) - connect(blkif); - - return bus_generic_attach(dev); -} - -/* Newbus vbd device driver detach */ -static int -vbd_detach(device_t dev) -{ - blkif_t *blkif = (blkif_t *)device_get_ivars(dev); - - DPRINTF("%s\n", blkif->dev_name); - - close_device(blkif); - - bus_generic_detach(dev); - - blkif_put(blkif); - - return 0; } -static device_method_t vbd_methods[] = { - /* Device interface */ - DEVMETHOD(device_probe, vbd_probe), - DEVMETHOD(device_attach, vbd_attach), - DEVMETHOD(device_detach, vbd_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), - {0, 0} -}; - -static devclass_t vbd_devclass; - -static driver_t vbd_driver = { - "vbd", - vbd_methods, - 0, -}; - -DRIVER_MODULE(vbd, nexus, vbd_driver, vbd_devclass, 0, 0); - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: t - * End: - */ +#endif diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/dev/xen/blkfront/blkfront.c head.xen/sys/dev/xen/blkfront/blkfront.c --- head.moves/sys/dev/xen/blkfront/blkfront.c 2010-09-16 16:31:08.604231000 -0600 +++ head.xen/sys/dev/xen/blkfront/blkfront.c 2010-09-16 16:49:09.622827344 -0600 @@ -49,8 +49,10 @@ #include #include +#include #include #include + #include #include #include @@ -68,17 +70,21 @@ /* prototypes */ static void xb_free_command(struct xb_command *cm); static void xb_startio(struct xb_softc *sc); -static void connect(struct xb_softc *); +static void blkfront_connect(struct xb_softc *); static void blkfront_closing(device_t); static int blkfront_detach(device_t); -static int talk_to_backend(struct xb_softc *); static int setup_blkring(struct xb_softc *); static void blkif_int(void *); +static void blkfront_initialize(struct xb_softc *); +#if 0 static void blkif_recover(struct xb_softc *); -static void blkif_completion(struct xb_command *); +#endif +static int blkif_completion(struct xb_command *); static void blkif_free(struct xb_softc *, int); static void blkif_queue_cb(void *, bus_dma_segment_t *, int, int); +MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data"); + #define GRANT_INVALID_REF 0 /* Control whether runtime update of vbds is enabled. */ @@ -113,11 +119,6 @@ #define DPRINTK(fmt, args...) #endif -#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ - (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) - -#define BLKIF_MAXIO (32 * 1024) - static int blkif_open(struct disk *dp); static int blkif_close(struct disk *dp); static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td); @@ -215,7 +216,6 @@ if (strcmp(name, "xbd")) device_printf(sc->xb_dev, "attaching as %s%d\n", name, unit); - memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); sc->xb_disk = disk_alloc(); sc->xb_disk->d_unit = sc->xb_unit; sc->xb_disk->d_open = blkif_open; @@ -228,19 +228,13 @@ sc->xb_disk->d_sectorsize = sector_size; sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT; - sc->xb_disk->d_maxsize = BLKIF_MAXIO; + sc->xb_disk->d_maxsize = sc->max_request_size; sc->xb_disk->d_flags = 0; disk_create(sc->xb_disk, DISK_VERSION_00); return error; } -void -xlvbd_del(struct xb_softc *sc) -{ - - disk_destroy(sc->xb_disk); -} /************************ end VBD support *****************/ /* @@ -357,15 +351,16 @@ return (EBUSY); } - if (gnttab_alloc_grant_references( - BLKIF_MAX_SEGMENTS_PER_REQUEST, &cm->gref_head) < 0) { + if (gnttab_alloc_grant_references(sc->max_request_segments, + &cm->gref_head) != 0) { xb_free_command(cm); mtx_unlock(&sc->xb_io_lock); device_printf(sc->xb_dev, "no more grant allocs?\n"); return (EBUSY); } - chunk = length > BLKIF_MAXIO ? BLKIF_MAXIO : length; + chunk = length > sc->max_request_size + ? sc->max_request_size : length; cm->data = virtual; cm->datalen = chunk; cm->operation = BLKIF_OP_WRITE; @@ -423,16 +418,18 @@ blkfront_attach(device_t dev) { struct xb_softc *sc; - struct xb_command *cm; const char *name; - int error, vdevice, i, unit; + int error; + int vdevice; + int i; + int unit; /* FIXME: Use dynamic device id if this is not set. */ - error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev), + error = xs_scanf(XST_NIL, xenbus_get_node(dev), "virtual-device", NULL, "%i", &vdevice); if (error) { xenbus_dev_fatal(dev, error, "reading virtual-device"); - printf("couldn't find virtual device"); + device_printf(dev, "Couldn't determine virtual device.\n"); return (error); } @@ -447,51 +444,18 @@ xb_initq_ready(sc); xb_initq_complete(sc); xb_initq_bio(sc); - - /* Allocate parent DMA tag */ - if (bus_dma_tag_create( NULL, /* parent */ - 512, 4096, /* algnmnt, boundary */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - BLKIF_MAXIO, /* maxsize */ - BLKIF_MAX_SEGMENTS_PER_REQUEST, /* nsegments */ - PAGE_SIZE, /* maxsegsize */ - BUS_DMA_ALLOCNOW, /* flags */ - busdma_lock_mutex, /* lockfunc */ - &sc->xb_io_lock, /* lockarg */ - &sc->xb_io_dmat)) { - device_printf(dev, "Cannot allocate parent DMA tag\n"); - return (ENOMEM); - } -#ifdef notyet - if (bus_dma_tag_set(sc->xb_io_dmat, BUS_DMA_SET_MINSEGSZ, - XBD_SECTOR_SIZE)) { - device_printf(dev, "Cannot set sector size\n"); - return (EINVAL); - } -#endif + for (i = 0; i < XBF_MAX_RING_PAGES; i++) + sc->ring_ref[i] = GRANT_INVALID_REF; sc->xb_dev = dev; sc->vdevice = vdevice; sc->connected = BLKIF_STATE_DISCONNECTED; - /* work queue needed ? */ - for (i = 0; i < BLK_RING_SIZE; i++) { - cm = &sc->shadow[i]; - cm->req.id = i; - cm->cm_sc = sc; - if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0) - break; - xb_free_command(cm); - } - /* Front end dir is a number, which is used as the id. */ sc->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0); - error = talk_to_backend(sc); - if (error) - return (error); + /* Wait for backend device to publish its protocol capabilities. */ + xenbus_set_state(dev, XenbusStateInitialising); return (0); } @@ -512,121 +476,265 @@ static int blkfront_resume(device_t dev) { +#if 0 struct xb_softc *sc = device_get_softc(dev); - int err; DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev)); +/* XXX This can't work!!! */ blkif_free(sc, 1); - err = talk_to_backend(sc); - if (sc->connected == BLKIF_STATE_SUSPENDED && !err) + blkfront_initialize(sc); + if (sc->connected == BLKIF_STATE_SUSPENDED) blkif_recover(sc); - - return (err); +#endif + return (0); } -/* Common code used when first setting up, and when resuming. */ -static int -talk_to_backend(struct xb_softc *sc) +static void +blkfront_initialize(struct xb_softc *sc) { - device_t dev; - struct xenbus_transaction xbt; - const char *message = NULL; - int err; - - /* Create shared ring, alloc event channel. */ - dev = sc->xb_dev; - err = setup_blkring(sc); - if (err) - goto out; + const char *otherend_path; + const char *node_path; + int error; + int i; - again: - err = xenbus_transaction_start(&xbt); - if (err) { - xenbus_dev_fatal(dev, err, "starting transaction"); - goto destroy_blkring; + if (xenbus_get_state(sc->xb_dev) != XenbusStateInitialising) + return; + + /* + * Protocol defaults valid even if negotiation for a + * setting fails. + */ + sc->ring_pages = 1; + sc->max_requests = BLKIF_MAX_RING_REQUESTS(PAGE_SIZE); + sc->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK; + sc->max_request_size = sc->max_request_segments * PAGE_SIZE; + sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments); + + /* + * Protocol negotiation. + * + * \note xs_gather() returns on the first encountered error, so + * we must use independant calls in order to guarantee + * we don't miss information in a sparsly populated back-end + * tree. + */ + otherend_path = xenbus_get_otherend_path(sc->xb_dev); + node_path = xenbus_get_node(sc->xb_dev); + (void)xs_scanf(XST_NIL, otherend_path, + "max-ring-pages", NULL, "%" PRIu32, + &sc->ring_pages); + + (void)xs_scanf(XST_NIL, otherend_path, + "max-requests", NULL, "%" PRIu32, + &sc->max_requests); + + (void)xs_scanf(XST_NIL, otherend_path, + "max-request-segments", NULL, "%" PRIu32, + &sc->max_request_segments); + + (void)xs_scanf(XST_NIL, otherend_path, + "max-request-size", NULL, "%" PRIu32, + &sc->max_request_size); + + if (sc->ring_pages > XBF_MAX_RING_PAGES) { + device_printf(sc->xb_dev, "Back-end specified ring-pages of " + "%u limited to front-end limit of %zu.\n", + sc->ring_pages, XBF_MAX_RING_PAGES); + sc->ring_pages = XBF_MAX_RING_PAGES; + } + + if (sc->max_requests > XBF_MAX_REQUESTS) { + device_printf(sc->xb_dev, "Back-end specified max_requests of " + "%u limited to front-end limit of %u.\n", + sc->max_requests, XBF_MAX_REQUESTS); + sc->max_requests = XBF_MAX_REQUESTS; + } + + if (sc->max_request_segments > XBF_MAX_SEGMENTS_PER_REQUEST) { + device_printf(sc->xb_dev, "Back-end specificed " + "max_requests_segments of %u limited to " + "front-end limit of %u.\n", + sc->max_request_segments, + XBF_MAX_SEGMENTS_PER_REQUEST); + sc->max_request_segments = XBF_MAX_SEGMENTS_PER_REQUEST; + } + + if (sc->max_request_size > XBF_MAX_REQUEST_SIZE) { + device_printf(sc->xb_dev, "Back-end specificed " + "max_request_size of %u limited to front-end " + "limit of %u.\n", sc->max_request_size, + XBF_MAX_REQUEST_SIZE); + sc->max_request_size = XBF_MAX_REQUEST_SIZE; + } + sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments); + + /* Allocate datastructures based on negotiated values. */ + error = bus_dma_tag_create(NULL, /* parent */ + 512, PAGE_SIZE, /* algnmnt, boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + sc->max_request_size, + sc->max_request_segments, + PAGE_SIZE, /* maxsegsize */ + BUS_DMA_ALLOCNOW, /* flags */ + busdma_lock_mutex, /* lockfunc */ + &sc->xb_io_lock, /* lockarg */ + &sc->xb_io_dmat); + if (error != 0) { + xenbus_dev_fatal(sc->xb_dev, error, + "Cannot allocate parent DMA tag\n"); + return; } - err = xenbus_printf(xbt, xenbus_get_node(dev), - "ring-ref","%u", sc->ring_ref); - if (err) { - message = "writing ring-ref"; - goto abort_transaction; + /* Per-transaction data allocation. */ + sc->shadow = malloc(sizeof(*sc->shadow) * sc->max_requests, + M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); + if (sc->shadow == NULL) { + xenbus_dev_fatal(sc->xb_dev, error, + "Cannot allocate request structures\n"); } - err = xenbus_printf(xbt, xenbus_get_node(dev), - "event-channel", "%u", irq_to_evtchn_port(sc->irq)); - if (err) { - message = "writing event-channel"; - goto abort_transaction; + + for (i = 0; i < sc->max_requests; i++) { + struct xb_command *cm; + + cm = &sc->shadow[i]; + cm->sg_refs = malloc(sizeof(grant_ref_t) + * sc->max_request_segments, + M_XENBLOCKFRONT, M_NOWAIT); + if (cm->sg_refs == NULL) + break; + cm->id = i; + cm->cm_sc = sc; + if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0) + break; + xb_free_command(cm); } - err = xenbus_printf(xbt, xenbus_get_node(dev), - "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); - if (err) { - message = "writing protocol"; - goto abort_transaction; + + if (setup_blkring(sc) != 0) + return; + + error = xs_printf(XST_NIL, node_path, + "ring-pages","%u", sc->ring_pages); + if (error) { + xenbus_dev_fatal(sc->xb_dev, error, + "writing %s/ring-pages", + node_path); + return; } - err = xenbus_transaction_end(xbt, 0); - if (err) { - if (err == EAGAIN) - goto again; - xenbus_dev_fatal(dev, err, "completing transaction"); - goto destroy_blkring; + error = xs_printf(XST_NIL, node_path, + "max-requests","%u", sc->max_requests); + if (error) { + xenbus_dev_fatal(sc->xb_dev, error, + "writing %s/max-requests", + node_path); + return; } - xenbus_set_state(dev, XenbusStateInitialised); - - return 0; - abort_transaction: - xenbus_transaction_end(xbt, 1); - if (message) - xenbus_dev_fatal(dev, err, "%s", message); - destroy_blkring: - blkif_free(sc, 0); - out: - return err; + error = xs_printf(XST_NIL, node_path, + "max-request-segments","%u", sc->max_request_segments); + if (error) { + xenbus_dev_fatal(sc->xb_dev, error, + "writing %s/max-request-segments", + node_path); + return; + } + + error = xs_printf(XST_NIL, node_path, + "max-request-size","%u", sc->max_request_size); + if (error) { + xenbus_dev_fatal(sc->xb_dev, error, + "writing %s/max-request-size", + node_path); + return; + } + + error = xs_printf(XST_NIL, node_path, "event-channel", + "%u", irq_to_evtchn_port(sc->irq)); + if (error) { + xenbus_dev_fatal(sc->xb_dev, error, + "writing %s/event-channel", + node_path); + return; + } + + error = xs_printf(XST_NIL, node_path, + "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); + if (error) { + xenbus_dev_fatal(sc->xb_dev, error, + "writing %s/protocol", + node_path); + return; + } + + xenbus_set_state(sc->xb_dev, XenbusStateInitialised); } static int setup_blkring(struct xb_softc *sc) { blkif_sring_t *sring; + uintptr_t sring_page_addr; int error; + int i; - sc->ring_ref = GRANT_INVALID_REF; - - sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); + sring = malloc(sc->ring_pages * PAGE_SIZE, M_XENBLOCKFRONT, + M_NOWAIT|M_ZERO); if (sring == NULL) { xenbus_dev_fatal(sc->xb_dev, ENOMEM, "allocating shared ring"); - return ENOMEM; + return (ENOMEM); } SHARED_RING_INIT(sring); - FRONT_RING_INIT(&sc->ring, sring, PAGE_SIZE); + FRONT_RING_INIT(&sc->ring, sring, sc->ring_pages * PAGE_SIZE); - error = xenbus_grant_ring(sc->xb_dev, - (vtomach(sc->ring.sring) >> PAGE_SHIFT), &sc->ring_ref); + for (i = 0, sring_page_addr = (uintptr_t)sring; + i < sc->ring_pages; + i++, sring_page_addr += PAGE_SIZE) { + + error = xenbus_grant_ring(sc->xb_dev, + (vtomach(sring_page_addr) >> PAGE_SHIFT), &sc->ring_ref[i]); + if (error) { + xenbus_dev_fatal(sc->xb_dev, error, + "granting ring_ref(%d)", i); + return (error); + } + } + error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev), + "ring-ref","%u", sc->ring_ref[0]); if (error) { - free(sring, M_DEVBUF); - sc->ring.sring = NULL; - goto fail; + xenbus_dev_fatal(sc->xb_dev, error, "writing %s/ring-ref", + xenbus_get_node(sc->xb_dev)); + return (error); } - - error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(sc->xb_dev), + for (i = 1; i < sc->ring_pages; i++) { + char ring_ref_name[]= "ring_refXX"; + + snprintf(ring_ref_name, sizeof(ring_ref_name), "ring-ref%u", i); + error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev), + ring_ref_name, "%u", sc->ring_ref[i]); + if (error) { + xenbus_dev_fatal(sc->xb_dev, error, "writing %s/%s", + xenbus_get_node(sc->xb_dev), + ring_ref_name); + return (error); + } + } + + error = bind_listening_port_to_irqhandler( + xenbus_get_otherend_id(sc->xb_dev), "xbd", (driver_intr_t *)blkif_int, sc, INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq); if (error) { xenbus_dev_fatal(sc->xb_dev, error, "bind_evtchn_to_irqhandler failed"); - goto fail; + return (error); } return (0); - fail: - blkif_free(sc, 0); - return (error); } - /** * Callback received when the backend's state changes. */ @@ -640,15 +748,19 @@ switch (backend_state) { case XenbusStateUnknown: case XenbusStateInitialising: - case XenbusStateInitWait: - case XenbusStateInitialised: - case XenbusStateClosed: case XenbusStateReconfigured: case XenbusStateReconfiguring: + case XenbusStateClosed: break; + case XenbusStateInitWait: + blkfront_initialize(sc); + break; + + case XenbusStateInitialised: case XenbusStateConnected: - connect(sc); + blkfront_initialize(sc); + blkfront_connect(sc); break; case XenbusStateClosing: @@ -657,20 +769,7 @@ "Device in use; refusing to close"); else blkfront_closing(dev); -#ifdef notyet - bd = bdget(sc->dev); - if (bd == NULL) - xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); - - down(&bd->bd_sem); - if (sc->users > 0) - xenbus_dev_error(dev, -EBUSY, - "Device in use; refusing to close"); - else - blkfront_closing(dev); - up(&bd->bd_sem); - bdput(bd); -#endif + break; } return (0); @@ -681,7 +780,7 @@ ** the details about the physical device - #sectors, size, etc). */ static void -connect(struct xb_softc *sc) +blkfront_connect(struct xb_softc *sc) { device_t dev = sc->xb_dev; unsigned long sectors, sector_size; @@ -694,20 +793,20 @@ DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); - err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), - "sectors", "%lu", §ors, - "info", "%u", &binfo, - "sector-size", "%lu", §or_size, - NULL); + err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), + "sectors", "%lu", §ors, + "info", "%u", &binfo, + "sector-size", "%lu", §or_size, + NULL); if (err) { xenbus_dev_fatal(dev, err, "reading backend fields at %s", xenbus_get_otherend_path(dev)); return; } - err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), - "feature-barrier", "%lu", &feature_barrier, - NULL); + err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), + "feature-barrier", "%lu", &feature_barrier, + NULL); if (!err || feature_barrier) sc->xb_flags |= XB_BARRIER; @@ -741,15 +840,16 @@ { struct xb_softc *sc = device_get_softc(dev); + xenbus_set_state(dev, XenbusStateClosing); + DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev)); - if (sc->mi) { - DPRINTK("Calling xlvbd_del\n"); - xlvbd_del(sc); - sc->mi = NULL; + if (sc->xb_disk != NULL) { + disk_destroy(sc->xb_disk); + sc->xb_disk = NULL; } - xenbus_set_state(dev, XenbusStateClosed); + xenbus_set_state(dev, XenbusStateClosed); } @@ -778,11 +878,16 @@ notify_remote_via_irq(sc->irq); } -static void blkif_restart_queue_callback(void *arg) +static void +blkif_restart_queue_callback(void *arg) { struct xb_softc *sc = arg; + mtx_lock(&sc->xb_io_lock); + xb_startio(sc); + + mtx_unlock(&sc->xb_io_lock); } static int @@ -874,20 +979,17 @@ return (NULL); } - if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST, - &cm->gref_head) < 0) { + if (gnttab_alloc_grant_references(sc->max_request_segments, + &cm->gref_head) != 0) { gnttab_request_free_callback(&sc->callback, blkif_restart_queue_callback, sc, - BLKIF_MAX_SEGMENTS_PER_REQUEST); + sc->max_request_segments); xb_requeue_bio(sc, bp); xb_enqueue_free(cm); sc->xb_flags |= XB_FROZEN; return (NULL); } - /* XXX Can we grab refs before doing the load so that the ref can - * be filled out here? - */ cm->bp = bp; cm->data = bp->bio_data; cm->datalen = bp->bio_bcount; @@ -921,13 +1023,19 @@ struct xb_softc *sc; struct xb_command *cm; blkif_request_t *ring_req; + struct blkif_request_segment *sg; + struct blkif_request_segment *last_block_sg; + grant_ref_t *sg_ref; vm_paddr_t buffer_ma; uint64_t fsect, lsect; - int ref, i, op; + int ref; + int op; + int block_segs; cm = arg; sc = cm->cm_sc; +//printf("%s: Start\n", __func__); if (error) { printf("error %d in blkif_queue_cb\n", error); cm->bp->bio_error = EIO; @@ -938,44 +1046,63 @@ /* Fill out a communications ring structure. */ ring_req = RING_GET_REQUEST(&sc->ring, sc->ring.req_prod_pvt); - if (ring_req == NULL) { - /* XXX Is this possible? */ - printf("ring_req NULL, requeuing\n"); - xb_enqueue_ready(cm); - return; - } - ring_req->id = cm->req.id; + sc->ring.req_prod_pvt++; + ring_req->id = cm->id; ring_req->operation = cm->operation; ring_req->sector_number = cm->sector_number; ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk; ring_req->nr_segments = nsegs; + cm->nseg = nsegs; + + block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK); + sg = ring_req->seg; + last_block_sg = sg + block_segs; + sg_ref = cm->sg_refs; + + while (1) { - for (i = 0; i < nsegs; i++) { - buffer_ma = segs[i].ds_addr; - fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; - lsect = fsect + (segs[i].ds_len >> XBD_SECTOR_SHFT) - 1; - - KASSERT(lsect <= 7, - ("XEN disk driver data cannot cross a page boundary")); - - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&cm->gref_head); - KASSERT( ref >= 0, ("grant_reference failed") ); - - gnttab_grant_foreign_access_ref( - ref, - xenbus_get_otherend_id(sc->xb_dev), - buffer_ma >> PAGE_SHIFT, - ring_req->operation & 1 ); /* ??? */ + while (sg < last_block_sg) { + buffer_ma = segs->ds_addr; + fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; + lsect = fsect + (segs->ds_len >> XBD_SECTOR_SHFT) - 1; - ring_req->seg[i] = - (struct blkif_request_segment) { + KASSERT(lsect <= 7, ("XEN disk driver data cannot " + "cross a page boundary")); + + /* install a grant reference. */ + ref = gnttab_claim_grant_reference(&cm->gref_head); + + /* + * GNTTAB_LIST_END == 0xffffffff, but it is private + * to gnttab.c. + */ + KASSERT(ref != ~0, ("grant_reference failed")); + + gnttab_grant_foreign_access_ref( + ref, + xenbus_get_otherend_id(sc->xb_dev), + buffer_ma >> PAGE_SHIFT, + ring_req->operation == BLKIF_OP_WRITE); + + *sg_ref = ref; + *sg = (struct blkif_request_segment) { .gref = ref, .first_sect = fsect, .last_sect = lsect }; + sg++; + sg_ref++; + segs++; + nsegs--; + } + block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK); + if (block_segs == 0) + break; + + sg = BLKRING_GET_SG_REQUEST(&sc->ring, sc->ring.req_prod_pvt); + sc->ring.req_prod_pvt++; + last_block_sg = sg + block_segs; } - if (cm->operation == BLKIF_OP_READ) op = BUS_DMASYNC_PREREAD; else if (cm->operation == BLKIF_OP_WRITE) @@ -984,15 +1111,10 @@ op = 0; bus_dmamap_sync(sc->xb_io_dmat, cm->map, op); - sc->ring.req_prod_pvt++; - - /* Keep a private copy so we can reissue requests when recovering. */ - cm->req = *ring_req; + gnttab_free_grant_references(cm->gref_head); xb_enqueue_busy(cm); - gnttab_free_grant_references(cm->gref_head); - /* * This flag means that we're probably executing in the busdma swi * instead of in the startio context, so an explicit flush is needed. @@ -1000,6 +1122,7 @@ if (cm->cm_flags & XB_CMD_FROZEN) flush_requests(sc); +//printf("%s: Done\n", __func__); return; } @@ -1018,7 +1141,7 @@ mtx_assert(&sc->xb_io_lock, MA_OWNED); - while (!RING_FULL(&sc->ring)) { + while (RING_FREE_REQUESTS(&sc->ring) >= sc->max_request_blocks) { if (sc->xb_flags & XB_FROZEN) break; @@ -1061,12 +1184,12 @@ rp = sc->ring.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ - for (i = sc->ring.rsp_cons; i != rp; i++) { + for (i = sc->ring.rsp_cons; i != rp;) { bret = RING_GET_RESPONSE(&sc->ring, i); cm = &sc->shadow[bret->id]; xb_remove_busy(cm); - blkif_completion(cm); + i += blkif_completion(cm); if (cm->operation == BLKIF_OP_READ) op = BUS_DMASYNC_POSTREAD; @@ -1116,35 +1239,61 @@ static void blkif_free(struct xb_softc *sc, int suspend) { + uint8_t *sring_page_ptr; + int i; -/* Prevent new requests being issued until we fix things up. */ + /* Prevent new requests being issued until we fix things up. */ mtx_lock(&sc->xb_io_lock); sc->connected = suspend ? BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; mtx_unlock(&sc->xb_io_lock); /* Free resources associated with old device channel. */ - if (sc->ring_ref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(sc->ring_ref, - sc->ring.sring); - sc->ring_ref = GRANT_INVALID_REF; + if (sc->ring.sring != NULL) { + sring_page_ptr = (uint8_t *)sc->ring.sring; + for (i = 0; i < sc->ring_pages; i++) { + if (sc->ring_ref[i] != GRANT_INVALID_REF) { + gnttab_end_foreign_access_ref(sc->ring_ref[i]); + sc->ring_ref[i] = GRANT_INVALID_REF; + } + sring_page_ptr += PAGE_SIZE; + } + free(sc->ring.sring, M_XENBLOCKFRONT); sc->ring.sring = NULL; } - if (sc->irq) - unbind_from_irqhandler(sc->irq); - sc->irq = 0; + if (sc->shadow) { + + for (i = 0; i < sc->max_requests; i++) { + struct xb_command *cm; + + cm = &sc->shadow[i]; + if (cm->sg_refs != NULL) { + free(cm->sg_refs, M_XENBLOCKFRONT); + cm->sg_refs = NULL; + } + + bus_dmamap_destroy(sc->xb_io_dmat, cm->map); + } + free(sc->shadow, M_XENBLOCKFRONT); + sc->shadow = NULL; + } + + if (sc->irq) { + unbind_from_irqhandler(sc->irq); + sc->irq = 0; + } } -static void +static int blkif_completion(struct xb_command *s) { - int i; - - for (i = 0; i < s->req.nr_segments; i++) - gnttab_end_foreign_access(s->req.seg[i].gref, 0UL); +//printf("%s: Req %p(%d)\n", __func__, s, s->nseg); + gnttab_end_foreign_access_references(s->nseg, s->sg_refs); + return (BLKIF_SEGS_TO_BLOCKS(s->nseg)); } +#if 0 static void blkif_recover(struct xb_softc *sc) { @@ -1157,6 +1306,7 @@ * has been removed until further notice. */ } +#endif /* ** Driver registration ** */ static device_method_t blkfront_methods[] = { @@ -1169,7 +1319,7 @@ DEVMETHOD(device_resume, blkfront_resume), /* Xenbus interface */ - DEVMETHOD(xenbus_backend_changed, blkfront_backend_changed), + DEVMETHOD(xenbus_otherend_changed, blkfront_backend_changed), { 0, 0 } }; @@ -1181,4 +1331,4 @@ }; devclass_t blkfront_devclass; -DRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0); +DRIVER_MODULE(xbd, xenbusb_front, blkfront_driver, blkfront_devclass, 0, 0); diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/dev/xen/blkfront/block.h head.xen/sys/dev/xen/blkfront/block.h --- head.moves/sys/dev/xen/blkfront/block.h 2010-09-16 16:31:08.600214000 -0600 +++ head.xen/sys/dev/xen/blkfront/block.h 2010-09-16 16:49:09.627984112 -0600 @@ -32,7 +32,43 @@ #ifndef __XEN_DRIVERS_BLOCK_H__ #define __XEN_DRIVERS_BLOCK_H__ -#include +#include + +/** + * The maximum number of outstanding requests blocks (request headers plus + * additional segment blocks) we will allow in a negotiated block-front/back + * communication channel. + */ +#define XBF_MAX_REQUESTS 256 + +/** + * The maximum mapped region size per request we will allow in a negotiated + * block-front/back communication channel. + * + * \note We reserve a segement from the maximum supported by the transport to + * guarantee we can handle an unaligned transfer without the need to + * use a bounce buffer.. + */ +#define XBF_MAX_REQUEST_SIZE \ + MIN(MAXPHYS, (BLKIF_MAX_SEGMENTS_PER_REQUEST - 1) * PAGE_SIZE) + +/** + * The maximum number of segments (within a request header and accompanying + * segment blocks) per request we will allow in a negotiated block-front/back + * communication channel. + */ +#define XBF_MAX_SEGMENTS_PER_REQUEST \ + (MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \ + (XBF_MAX_REQUEST_SIZE / PAGE_SIZE) + 1)) + +/** + * The maximum number of shared memory ring pages we will allow in a + * negotiated block-front/back communication channel. Allow enough + * ring space for all requests to be XBF_MAX_REQUEST_SIZE'd. + */ +#define XBF_MAX_RING_PAGES \ + BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBF_MAX_SEGMENTS_PER_REQUEST) \ + * XBF_MAX_REQUESTS) struct xlbd_type_info { @@ -62,19 +98,19 @@ #define XB_ON_XBQ_COMPLETE (1<<5) #define XB_ON_XBQ_MASK ((1<<2)|(1<<3)|(1<<4)|(1<<5)) bus_dmamap_t map; - blkif_request_t req; + uint64_t id; + grant_ref_t *sg_refs; struct bio *bp; grant_ref_t gref_head; void *data; size_t datalen; + u_int nseg; int operation; blkif_sector_t sector_number; int status; void (* cm_complete)(struct xb_command *); }; -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) - #define XBQ_FREE 0 #define XBQ_BIO 1 #define XBQ_READY 2 @@ -108,10 +144,14 @@ int vdevice; blkif_vdev_t handle; int connected; - int ring_ref; + u_int ring_pages; + uint32_t max_requests; + uint32_t max_request_segments; + uint32_t max_request_blocks; + uint32_t max_request_size; + grant_ref_t ring_ref[XBF_MAX_RING_PAGES]; blkif_front_ring_t ring; unsigned int irq; - struct xlbd_major_info *mi; struct gnttab_free_callback callback; TAILQ_HEAD(,xb_command) cm_free; TAILQ_HEAD(,xb_command) cm_ready; @@ -126,7 +166,8 @@ */ int users; struct mtx xb_io_lock; - struct xb_command shadow[BLK_RING_SIZE]; + + struct xb_command *shadow; }; int xlvbd_add(struct xb_softc *, blkif_sector_t capacity, int device, @@ -188,7 +229,8 @@ struct xb_command *cm; \ \ if ((cm = TAILQ_FIRST(&sc->cm_ ## name)) != NULL) { \ - if ((cm->cm_flags & XB_ON_ ## index) == 0) { \ + if ((cm->cm_flags & XB_ON_XBQ_MASK) != \ + XB_ON_ ## index) { \ printf("command %p not in queue, " \ "flags = %#x, bit = %#x\n", cm, \ cm->cm_flags, XB_ON_ ## index); \ @@ -203,7 +245,7 @@ static __inline void \ xb_remove_ ## name (struct xb_command *cm) \ { \ - if ((cm->cm_flags & XB_ON_ ## index) == 0) { \ + if ((cm->cm_flags & XB_ON_XBQ_MASK) != XB_ON_ ## index){\ printf("command %p not in queue, flags = %#x, " \ "bit = %#x\n", cm, cm->cm_flags, \ XB_ON_ ## index); \ diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/dev/xen/control/control.c head.xen/sys/dev/xen/control/control.c --- head.moves/sys/dev/xen/control/control.c 2010-09-17 08:11:21.758053524 -0600 +++ head.xen/sys/dev/xen/control/control.c 2010-09-16 17:00:57.465861576 -0600 @@ -1,4 +1,35 @@ -/* +/*- + * Copyright (c) 2010 Justin T. Gibbs, Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + */ + +/*- + * PV suspend/resume support: * * Copyright (c) 2004 Christian Limpach. * Copyright (c) 2004-2006,2008 Kip Macy @@ -30,143 +61,143 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/*- + * HVM suspend/resume support: + * + * Copyright (c) 2008 Citrix Systems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ #include -__FBSDID("$FreeBSD: head/sys/dev/xen/control/control.c -1 $"); +__FBSDID("$FreeBSD$"); + +/** + * \file control.c + * + * \brief Device driver to repond to control domain events that impact + * this VM. + */ #include -#include -#include +#include #include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include +#include +#include +#include +#include + +#ifndef XENHVM #include #include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#ifdef XENHVM - -#include - -#else - -static void xen_suspend(void); - #endif -static void -shutdown_handler(struct xenbus_watch *watch, - const char **vec, unsigned int len) -{ - char *str; - struct xenbus_transaction xbt; - int error, howto; - - howto = 0; - again: - error = xenbus_transaction_start(&xbt); - if (error) - return; +#include - error = xenbus_read(xbt, "control", "shutdown", NULL, (void **) &str); +#include +#include - /* Ignore read errors and empty reads. */ - if (error || strlen(str) == 0) { - xenbus_transaction_end(xbt, 1); - return; - } +#include +#include +#include - xenbus_write(xbt, "control", "shutdown", ""); +#include +#include +#include +#include - error = xenbus_transaction_end(xbt, 0); - if (error == EAGAIN) { - free(str, M_DEVBUF); - goto again; - } +#include +#include - if (strcmp(str, "reboot") == 0) - howto = 0; - else if (strcmp(str, "poweroff") == 0) - howto |= (RB_POWEROFF | RB_HALT); - else if (strcmp(str, "halt") == 0) -#ifdef XENHVM - /* - * We rely on acpi powerdown to halt the VM. - */ - howto |= (RB_POWEROFF | RB_HALT); -#else - howto |= RB_HALT; -#endif - else if (strcmp(str, "suspend") == 0) - howto = -1; - else { - printf("Ignoring shutdown request: %s\n", str); - goto done; - } +#include - if (howto == -1) { - xen_suspend(); - goto done; - } +#define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*(x))) - shutdown_nice(howto); - done: - free(str, M_DEVBUF); -} +/*--------------------------- Forward Declarations --------------------------*/ +/** Function signature for shutdown event handlers. */ +typedef void (xctrl_shutdown_handler_t)(void); + +static xctrl_shutdown_handler_t xctrl_poweroff; +static xctrl_shutdown_handler_t xctrl_reboot; +static xctrl_shutdown_handler_t xctrl_suspend; +static xctrl_shutdown_handler_t xctrl_crash; +static xctrl_shutdown_handler_t xctrl_halt; + +/*-------------------------- Private Data Structures -------------------------*/ +/** Element type for lookup table of event name to handler. */ +struct xctrl_shutdown_reason { + const char *name; + xctrl_shutdown_handler_t *handler; +}; -#ifndef XENHVM +/** Lookup table for shutdown event name to handler. */ +static struct xctrl_shutdown_reason xctrl_shutdown_reasons[] = { + { "poweroff", xctrl_poweroff }, + { "reboot", xctrl_reboot }, + { "suspend", xctrl_suspend }, + { "crash", xctrl_crash }, + { "halt", xctrl_halt }, +}; -/* - * In HV mode, we let acpi take care of halts and reboots. - */ +struct xctrl_softc { + + /** Must be first */ + struct xs_watch xctrl_watch; +}; +/*------------------------------ Event Handlers ------------------------------*/ static void -xen_shutdown_final(void *arg, int howto) +xctrl_poweroff() { - - if (howto & (RB_HALT | RB_POWEROFF)) - HYPERVISOR_shutdown(SHUTDOWN_poweroff); - else - HYPERVISOR_shutdown(SHUTDOWN_reboot); + shutdown_nice(RB_POWEROFF|RB_HALT); } -#endif - -static struct xenbus_watch shutdown_watch = { - .node = "control/shutdown", - .callback = shutdown_handler -}; - static void -setup_shutdown_watcher(void *unused) +xctrl_reboot() { - - if (register_xenbus_watch(&shutdown_watch)) - printf("Failed to set shutdown watcher\n"); -#ifndef XENHVM - EVENTHANDLER_REGISTER(shutdown_final, xen_shutdown_final, NULL, - SHUTDOWN_PRI_LAST); -#endif + shutdown_nice(0); } -SYSINIT(shutdown, SI_SUB_PSEUDO, SI_ORDER_ANY, setup_shutdown_watcher, NULL); - #ifndef XENHVM - extern void xencons_suspend(void); extern void xencons_resume(void); -static void -xen_suspend() +/* Full PV mode suspension. */ +static void +xctrl_suspend() { int i, j, k, fpp; unsigned long max_pfn, start_info_mfn; @@ -263,4 +294,200 @@ #endif } +static void +xen_pv_shutdown_final(void *arg, int howto) +{ + /* + * Inform the hypervisor that shutdown is complete. + * This is not necessary in HVM domains since Xen + * emulates ACPI in that mode and FreeBSD's ACPI + * support will request this transition. + */ + if (howto & (RB_HALT | RB_POWEROFF)) + HYPERVISOR_shutdown(SHUTDOWN_poweroff); + else + HYPERVISOR_shutdown(SHUTDOWN_reboot); +} + +#else +extern void xenpci_resume(void); + +/* HVM mode suspension. */ +static void +xctrl_suspend() +{ + int suspend_cancelled; + + if (DEVICE_SUSPEND(root_bus)) { + printf("xen_suspend: device_suspend failed\n"); + return; + } + + /* + * Make sure we don't change cpus or switch to some other + * thread. for the duration. + */ + critical_enter(); + + /* + * Prevent any races with evtchn_interrupt() handler. + */ + irq_suspend(); + disable_intr(); + + suspend_cancelled = HYPERVISOR_suspend(0); + if (!suspend_cancelled) + xenpci_resume(); + + /* + * Re-enable interrupts and put the scheduler back to normal. + */ + enable_intr(); + critical_exit(); + + /* + * FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or + * similar. + */ + if (!suspend_cancelled) + DEVICE_RESUME(root_bus); +} +#endif + +static void +xctrl_crash() +{ + panic("Xen directed crash"); +} + +static void +xctrl_halt() +{ + shutdown_nice(RB_HALT); +} + +/*------------------------------ Event Reception -----------------------------*/ +static void +xctrl_on_watch_event(struct xs_watch *watch, const char **vec, unsigned int len) +{ + struct xctrl_shutdown_reason *reason; + struct xctrl_shutdown_reason *last_reason; + char *result; + int error; + int result_len; + + error = xs_read(XST_NIL, "control", "shutdown", + &result_len, (void **)&result); + if (error != 0) + return; + + reason = xctrl_shutdown_reasons; + last_reason = reason + NUM_ELEMENTS(xctrl_shutdown_reasons); + while (reason < last_reason) { + + if (!strcmp(result, reason->name)) { + reason->handler(); + break; + } + reason++; + } + + free(result, M_XENSTORE); +} + +/*------------------ Private Device Attachment Functions --------------------*/ +/** + * \brief Identify instances of this device type in the system. + * + * \param driver The driver performing this identify action. + * \param parent The NewBus parent device for any devices this method adds. + */ +static void +xctrl_identify(driver_t *driver __unused, device_t parent) +{ + /* + * A single device instance for our driver is always present + * in a system operating under Xen. + */ + BUS_ADD_CHILD(parent, 0, driver->name, 0); +} + +/** + * \brief Probe for the existance of the Xen Control device + * + * \param dev NewBus device_t for this Xen control instance. + * + * \return Always returns 0 indicating success. + */ +static int +xctrl_probe(device_t dev) +{ + device_set_desc(dev, "Xen Control Device"); + + return (0); +} + +/** + * \brief Attach the Xen control device. + * + * \param dev NewBus device_t for this Xen control instance. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + */ +static int +xctrl_attach(device_t dev) +{ + struct xctrl_softc *xctrl; + + xctrl = device_get_softc(dev); + + /* Activate watch */ + xctrl->xctrl_watch.node = "control/shutdown"; + xctrl->xctrl_watch.callback = xctrl_on_watch_event; + xs_register_watch(&xctrl->xctrl_watch); + +#ifndef XENHVM + EVENTHANDLER_REGISTER(shutdown_final, xen_pv_shutdown_final, NULL, + SHUTDOWN_PRI_LAST); #endif + + return (0); +} + +/** + * \brief Detach the Xen control device. + * + * \param dev NewBus device_t for this Xen control device instance. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + */ +static int +xctrl_detach(device_t dev) +{ + struct xctrl_softc *xctrl; + + xctrl = device_get_softc(dev); + + /* Release watch */ + xs_unregister_watch(&xctrl->xctrl_watch); + + return (0); +} + +/*-------------------- Private Device Attachment Data -----------------------*/ +static device_method_t xctrl_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, xctrl_identify), + DEVMETHOD(device_probe, xctrl_probe), + DEVMETHOD(device_attach, xctrl_attach), + DEVMETHOD(device_detach, xctrl_detach), + + { 0, 0 } +}; + +DEFINE_CLASS_0(xctrl, xctrl_driver, xctrl_methods, sizeof(struct xctrl_softc)); +devclass_t xctrl_devclass; + +DRIVER_MODULE(xctrl, xenstore, xctrl_driver, xctrl_devclass, 0, 0); diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/dev/xen/netfront/netfront.c head.xen/sys/dev/xen/netfront/netfront.c --- head.moves/sys/dev/xen/netfront/netfront.c 2010-09-16 16:31:08.596198000 -0600 +++ head.xen/sys/dev/xen/netfront/netfront.c 2010-09-16 16:49:09.634143570 -0600 @@ -91,8 +91,6 @@ #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP | CSUM_TSO) -#define GRANT_INVALID_REF 0 - #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) @@ -373,7 +371,8 @@ { int i = xennet_rxidx(ri); grant_ref_t ref = np->grant_rx_ref[i]; - np->grant_rx_ref[i] = GRANT_INVALID_REF; + KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); + np->grant_rx_ref[i] = GRANT_REF_INVALID; return ref; } @@ -404,7 +403,7 @@ int error, i; char *s, *e, *macstr; - error = xenbus_read(XBT_NIL, xenbus_get_node(dev), "mac", NULL, + error = xs_read(XST_NIL, xenbus_get_node(dev), "mac", NULL, (void **) &macstr); if (error) return (error); @@ -413,12 +412,12 @@ for (i = 0; i < ETHER_ADDR_LEN; i++) { mac[i] = strtoul(s, &e, 16); if (s == e || (e[0] != ':' && e[0] != 0)) { - free(macstr, M_DEVBUF); + free(macstr, M_XENBUS); return (ENOENT); } s = &e[1]; } - free(macstr, M_DEVBUF); + free(macstr, M_XENBUS); return (0); } @@ -483,7 +482,7 @@ talk_to_backend(device_t dev, struct netfront_info *info) { const char *message; - struct xenbus_transaction xbt; + struct xs_transaction xst; const char *node = xenbus_get_node(dev); int err; @@ -499,54 +498,54 @@ goto out; again: - err = xenbus_transaction_start(&xbt); + err = xs_transaction_start(&xst); if (err) { xenbus_dev_fatal(dev, err, "starting transaction"); goto destroy_ring; } - err = xenbus_printf(xbt, node, "tx-ring-ref","%u", + err = xs_printf(xst, node, "tx-ring-ref","%u", info->tx_ring_ref); if (err) { message = "writing tx ring-ref"; goto abort_transaction; } - err = xenbus_printf(xbt, node, "rx-ring-ref","%u", + err = xs_printf(xst, node, "rx-ring-ref","%u", info->rx_ring_ref); if (err) { message = "writing rx ring-ref"; goto abort_transaction; } - err = xenbus_printf(xbt, node, + err = xs_printf(xst, node, "event-channel", "%u", irq_to_evtchn_port(info->irq)); if (err) { message = "writing event-channel"; goto abort_transaction; } - err = xenbus_printf(xbt, node, "request-rx-copy", "%u", + err = xs_printf(xst, node, "request-rx-copy", "%u", info->copying_receiver); if (err) { message = "writing request-rx-copy"; goto abort_transaction; } - err = xenbus_printf(xbt, node, "feature-rx-notify", "%d", 1); + err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); if (err) { message = "writing feature-rx-notify"; goto abort_transaction; } - err = xenbus_printf(xbt, node, "feature-sg", "%d", 1); + err = xs_printf(xst, node, "feature-sg", "%d", 1); if (err) { message = "writing feature-sg"; goto abort_transaction; } #if __FreeBSD_version >= 700000 - err = xenbus_printf(xbt, node, "feature-gso-tcpv4", "%d", 1); + err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); if (err) { message = "writing feature-gso-tcpv4"; goto abort_transaction; } #endif - err = xenbus_transaction_end(xbt, 0); + err = xs_transaction_end(xst, 0); if (err) { if (err == EAGAIN) goto again; @@ -557,7 +556,7 @@ return 0; abort_transaction: - xenbus_transaction_end(xbt, 1); + xs_transaction_end(xst, 1); xenbus_dev_fatal(dev, err, "%s", message); destroy_ring: netif_free(info); @@ -576,8 +575,8 @@ ifp = info->xn_ifp; - info->tx_ring_ref = GRANT_INVALID_REF; - info->rx_ring_ref = GRANT_INVALID_REF; + info->tx_ring_ref = GRANT_REF_INVALID; + info->rx_ring_ref = GRANT_REF_INVALID; info->rx.sring = NULL; info->tx.sring = NULL; info->irq = 0; @@ -750,7 +749,7 @@ GNTMAP_readonly); gnttab_release_grant_reference(&np->gref_tx_head, np->grant_tx_ref[i]); - np->grant_tx_ref[i] = GRANT_INVALID_REF; + np->grant_tx_ref[i] = GRANT_REF_INVALID; add_id_to_freelist(np->tx_mbufs, i); np->xn_cdata.xn_tx_chain_cnt--; if (np->xn_cdata.xn_tx_chain_cnt < 0) { @@ -854,7 +853,8 @@ sc->rx_mbufs[id] = m_new; ref = gnttab_claim_grant_reference(&sc->gref_rx_head); - KASSERT((short)ref >= 0, ("negative ref")); + KASSERT(ref != GNTTAB_LIST_END, + ("reserved grant references exhuasted")); sc->grant_rx_ref[id] = ref; vaddr = mtod(m_new, vm_offset_t); @@ -1135,7 +1135,7 @@ np->grant_tx_ref[id]); gnttab_release_grant_reference( &np->gref_tx_head, np->grant_tx_ref[id]); - np->grant_tx_ref[id] = GRANT_INVALID_REF; + np->grant_tx_ref[id] = GRANT_REF_INVALID; np->tx_mbufs[id] = NULL; add_id_to_freelist(np->tx_mbufs, id); @@ -1318,12 +1318,13 @@ * the backend driver. In future this should flag the bad * situation to the system controller to reboot the backed. */ - if (ref == GRANT_INVALID_REF) { + if (ref == GRANT_REF_INVALID) { #if 0 if (net_ratelimit()) WPRINTK("Bad rx response id %d.\n", rx->id); #endif + printf("%s: Bad rx response id %d.\n", __func__,rx->id); err = EINVAL; goto next; } @@ -1384,7 +1385,7 @@ err = ENOENT; printf("%s: cons %u frags %u rp %u, not enough frags\n", __func__, *cons, frags, rp); - break; + break; } /* * Note that m can be NULL, if rx->status < 0 or if @@ -1526,6 +1527,11 @@ * tell the TCP stack to generate a shorter chain of packets. */ if (nfrags > MAX_TX_REQ_FRAGS) { +#ifdef DEBUG + printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " + "won't be able to handle it, dropping\n", + __func__, nfrags, MAX_TX_REQ_FRAGS); +#endif m_freem(m_head); return (EMSGSIZE); } @@ -1881,11 +1887,11 @@ netif_rx_request_t *req; u_int feature_rx_copy, feature_rx_flip; - error = xenbus_scanf(XBT_NIL, xenbus_get_otherend_path(np->xbdev), + error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-rx-copy", NULL, "%u", &feature_rx_copy); if (error) feature_rx_copy = 0; - error = xenbus_scanf(XBT_NIL, xenbus_get_otherend_path(np->xbdev), + error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-rx-flip", NULL, "%u", &feature_rx_flip); if (error) feature_rx_flip = 1; @@ -1999,14 +2005,14 @@ /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ for (i = 0; i <= NET_TX_RING_SIZE; i++) { np->tx_mbufs[i] = (void *) ((u_long) i+1); - np->grant_tx_ref[i] = GRANT_INVALID_REF; + np->grant_tx_ref[i] = GRANT_REF_INVALID; } np->tx_mbufs[NET_TX_RING_SIZE] = (void *)0; for (i = 0; i <= NET_RX_RING_SIZE; i++) { np->rx_mbufs[i] = NULL; - np->grant_rx_ref[i] = GRANT_INVALID_REF; + np->grant_rx_ref[i] = GRANT_REF_INVALID; } /* A grant for every tx ring slot */ if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, @@ -2128,8 +2134,8 @@ end_access(info->tx_ring_ref, info->tx.sring); end_access(info->rx_ring_ref, info->rx.sring); - info->tx_ring_ref = GRANT_INVALID_REF; - info->rx_ring_ref = GRANT_INVALID_REF; + info->tx_ring_ref = GRANT_REF_INVALID; + info->rx_ring_ref = GRANT_REF_INVALID; info->tx.sring = NULL; info->rx.sring = NULL; @@ -2143,7 +2149,7 @@ static void end_access(int ref, void *page) { - if (ref != GRANT_INVALID_REF) + if (ref != GRANT_REF_INVALID) gnttab_end_foreign_access(ref, page); } @@ -2171,7 +2177,7 @@ DEVMETHOD(device_resume, netfront_resume), /* Xenbus interface */ - DEVMETHOD(xenbus_backend_changed, netfront_backend_changed), + DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), { 0, 0 } }; @@ -2183,4 +2189,4 @@ }; devclass_t netfront_devclass; -DRIVER_MODULE(xe, xenbus, netfront_driver, netfront_devclass, 0, 0); +DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, 0, 0); diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/dev/xen/xenpci/evtchn.c head.xen/sys/dev/xen/xenpci/evtchn.c --- head.moves/sys/dev/xen/xenpci/evtchn.c 2010-09-16 16:31:08.617866000 -0600 +++ head.xen/sys/dev/xen/xenpci/evtchn.c 2010-09-16 16:49:09.651844154 -0600 @@ -181,6 +181,49 @@ return (0); } +int +bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, + unsigned int remote_port, const char *devname, driver_intr_t handler, + void *arg, unsigned long irqflags, unsigned int *irqp) +{ + struct evtchn_bind_interdomain bind_interdomain; + unsigned int irq; + int error; + + irq = alloc_xen_irq(); + if (irq < 0) + return irq; + + mtx_lock(&irq_evtchn[irq].lock); + + bind_interdomain.remote_dom = remote_domain; + bind_interdomain.remote_port = remote_port; + error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + if (error) { + mtx_unlock(&irq_evtchn[irq].lock); + free_xen_irq(irq); + return (-error); + } + + irq_evtchn[irq].handler = handler; + irq_evtchn[irq].arg = arg; + irq_evtchn[irq].evtchn = bind_interdomain.local_port; + irq_evtchn[irq].close = 1; + irq_evtchn[irq].mpsafe = (irqflags & INTR_MPSAFE) != 0; + + evtchn_to_irq[bind_interdomain.local_port] = irq; + + unmask_evtchn(bind_interdomain.local_port); + + mtx_unlock(&irq_evtchn[irq].lock); + + if (irqp) + *irqp = irq; + return (0); +} + + int bind_caller_port_to_irqhandler(unsigned int caller_port, const char *devname, driver_intr_t handler, void *arg, diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/dev/xen/xenpci/xenpci.c head.xen/sys/dev/xen/xenpci/xenpci.c --- head.moves/sys/dev/xen/xenpci/xenpci.c 2010-09-16 16:31:08.614615000 -0600 +++ head.xen/sys/dev/xen/xenpci/xenpci.c 2010-09-16 16:49:09.655995998 -0600 @@ -66,6 +66,7 @@ char *hypercall_stubs; shared_info_t *HYPERVISOR_shared_info; static vm_paddr_t shared_info_pa; +static device_t nexus; /* * This is used to find our platform device instance. @@ -80,7 +81,7 @@ { uint32_t base, regs[4]; - for (base = 0x40000000; base < 0x40001000; base += 0x100) { + for (base = 0x40000000; base < 0x40010000; base += 0x100) { do_cpuid(base, regs); if (!memcmp("XenVMMXenVMM", ®s[1], 12) && (regs[0] - base) >= 2) @@ -204,14 +205,21 @@ scp->res_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &scp->rid_irq, RF_SHAREABLE|RF_ACTIVE); - if (scp->res_irq == NULL) + if (scp->res_irq == NULL) { + printf("xenpci Could not allocate irq.\n"); goto errexit; + } scp->rid_memory = PCIR_BAR(1); scp->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &scp->rid_memory, RF_ACTIVE); - if (scp->res_memory == NULL) + if (scp->res_memory == NULL) { + printf("xenpci Could not allocate memory bar.\n"); goto errexit; + } + + scp->phys_next = rman_get_start(scp->res_memory); + return (0); errexit: @@ -254,6 +262,36 @@ } } +static struct resource * +xenpci_alloc_resource(device_t dev, device_t child, int type, int *rid, + u_long start, u_long end, u_long count, u_int flags) +{ + return (BUS_ALLOC_RESOURCE(nexus, child, type, rid, start, + end, count, flags)); +} + + +static int +xenpci_release_resource(device_t dev, device_t child, int type, int rid, + struct resource *r) +{ + return (BUS_RELEASE_RESOURCE(nexus, child, type, rid, r)); +} + +static int +xenpci_activate_resource(device_t dev, device_t child, int type, int rid, + struct resource *r) +{ + return (BUS_ACTIVATE_RESOURCE(nexus, child, type, rid, r)); +} + +static int +xenpci_deactivate_resource(device_t dev, device_t child, int type, + int rid, struct resource *r) +{ + return (BUS_DEACTIVATE_RESOURCE(nexus, child, type, rid, r)); +} + /* * Called very early in the resume sequence - reinitialise the various * bits of Xen machinery including the hypercall page and the shared @@ -303,20 +341,36 @@ static int xenpci_attach(device_t dev) { - int error; + int error; struct xenpci_softc *scp = device_get_softc(dev); struct xen_add_to_physmap xatp; vm_offset_t shared_va; + devclass_t dc; + + /* + * Find and record nexus0. Since we are not really on the + * PCI bus, all resource operations are directed to nexus + * instead of through our parent. + */ + if ((dc = devclass_find("nexus")) == 0 + || (nexus = devclass_get_device(dc, 0)) == 0) { + device_printf(dev, "unable to find nexus."); + return (ENOENT); + } error = xenpci_allocate_resources(dev); - if (error) + if (error) { + device_printf(dev, "xenpci_allocate_resources failed(%d).\n", + error); goto errexit; - - scp->phys_next = rman_get_start(scp->res_memory); + } error = xenpci_init_hypercall_stubs(dev, scp); - if (error) + if (error) { + device_printf(dev, "xenpci_init_hypercall_stubs failed(%d).\n", + error); goto errexit; + } setup_xen_features(); @@ -346,7 +400,7 @@ * Undo anything we may have done. */ xenpci_deallocate_resources(dev); - return (error); + return (error); } /* @@ -364,8 +418,9 @@ */ if (scp->intr_cookie != NULL) { if (BUS_TEARDOWN_INTR(parent, dev, - scp->res_irq, scp->intr_cookie) != 0) - printf("intr teardown failed.. continuing\n"); + scp->res_irq, scp->intr_cookie) != 0) + device_printf(dev, + "intr teardown failed.. continuing\n"); scp->intr_cookie = NULL; } @@ -386,6 +441,10 @@ /* Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), + DEVMETHOD(bus_alloc_resource, xenpci_alloc_resource), + DEVMETHOD(bus_release_resource, xenpci_release_resource), + DEVMETHOD(bus_activate_resource, xenpci_activate_resource), + DEVMETHOD(bus_deactivate_resource, xenpci_deactivate_resource), { 0, 0 } }; diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/i386/xen/xen_machdep.c head.xen/sys/i386/xen/xen_machdep.c --- head.moves/sys/i386/xen/xen_machdep.c 2010-09-16 16:31:09.181645000 -0600 +++ head.xen/sys/i386/xen/xen_machdep.c 2010-09-16 16:49:09.661144385 -0600 @@ -722,7 +722,9 @@ pteinfo_t *pteinfo_list; void initvalues(start_info_t *startinfo); -struct ringbuf_head *xen_store; /* XXX move me */ +struct xenstore_domain_interface; +extern struct xenstore_domain_interface *xen_store; + char *console_page; void * @@ -1082,7 +1084,7 @@ HYPERVISOR_shared_info = (shared_info_t *)cur_space; cur_space += PAGE_SIZE; - xen_store = (struct ringbuf_head *)cur_space; + xen_store = (struct xenstore_domain_interface *)cur_space; cur_space += PAGE_SIZE; console_page = (char *)cur_space; diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/evtchn/evtchn.c head.xen/sys/xen/evtchn/evtchn.c --- head.moves/sys/xen/evtchn/evtchn.c 2010-09-16 16:30:38.691806000 -0600 +++ head.xen/sys/xen/evtchn/evtchn.c 2010-09-16 16:49:09.666316799 -0600 @@ -492,15 +492,15 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, unsigned int remote_port, const char *devname, - driver_filter_t filter, driver_intr_t handler, - unsigned long irqflags, unsigned int *irqp) + driver_intr_t handler, void *arg, unsigned long irqflags, + unsigned int *irqp) { unsigned int irq; int error; irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); intr_register_source(&xp->xp_pins[irq].xp_intsrc); - error = intr_add_handler(devname, irq, filter, handler, NULL, + error = intr_add_handler(devname, irq, NULL, handler, arg, irqflags, &xp->xp_pins[irq].xp_cookie); if (error) { unbind_from_irq(irq); diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/gnttab.c head.xen/sys/xen/gnttab.c --- head.moves/sys/xen/gnttab.c 2010-09-16 16:30:38.639500000 -0600 +++ head.xen/sys/xen/gnttab.c 2010-09-16 16:49:09.671495918 -0600 @@ -42,7 +42,6 @@ /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 -#define GNTTAB_LIST_END 0xffffffff #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t)) static grant_ref_t **gnttab_list; @@ -66,7 +65,7 @@ { int ref, error; grant_ref_t head; - + mtx_lock(&gnttab_list_lock); if ((gnttab_free_count < count) && ((error = gnttab_expand(count - gnttab_free_count)) != 0)) { @@ -79,7 +78,7 @@ head = gnttab_entry(head); gnttab_free_head = gnttab_entry(head); gnttab_entry(head) = GNTTAB_LIST_END; - mtx_unlock(&gnttab_list_lock); + mtx_unlock(&gnttab_list_lock); *entries = ref; return (0); @@ -122,7 +121,7 @@ gnttab_free_head = ref; gnttab_free_count++; check_free_callbacks(); - mtx_unlock(&gnttab_list_lock); + mtx_unlock(&gnttab_list_lock); } /* @@ -136,7 +135,7 @@ int error, ref; error = get_free_entries(1, &ref); - + if (unlikely(error)) return (error); @@ -166,9 +165,9 @@ gnttab_query_foreign_access(grant_ref_t ref) { uint16_t nflags; - + nflags = shared[ref].flags; - + return (nflags & (GTF_reading|GTF_writing)); } @@ -180,7 +179,7 @@ nflags = shared[ref].flags; do { if ( (flags = nflags) & (GTF_reading|GTF_writing) ) { - printf("WARNING: g.e. still in use!\n"); + printf("%s: WARNING: g.e. still in use!\n", __func__); return (0); } } while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) != @@ -201,7 +200,44 @@ else { /* XXX This needs to be fixed so that the ref and page are placed on a list to be freed up later. */ - printf("WARNING: leaking g.e. and page still in use!\n"); + printf("%s: WARNING: leaking g.e. and page still in use!\n", + __func__); + } +} + +void +gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs) +{ + grant_ref_t *last_ref; + grant_ref_t head; + grant_ref_t tail; + + head = GNTTAB_LIST_END; + tail = *refs; + last_ref = refs + count; + while (refs != last_ref) { + + if (gnttab_end_foreign_access_ref(*refs)) { + gnttab_entry(*refs) = head; + head = *refs; + } else { + /* + * XXX This needs to be fixed so that the ref + * is placed on a list to be freed up later. + */ + printf("%s: WARNING: leaking g.e. still in use!\n", + __func__); + count--; + } + refs++; + } + + if (count != 0) { + mtx_lock(&gnttab_list_lock); + gnttab_free_count += count; + gnttab_entry(tail) = gnttab_free_head; + gnttab_free_head = head; + mtx_unlock(&gnttab_list_lock); } } @@ -216,7 +252,7 @@ return (error); gnttab_grant_foreign_transfer_ref(ref, domid, pfn); - + *result = ref; return (0); } @@ -282,16 +318,16 @@ { grant_ref_t ref; int count = 1; - + if (head == GNTTAB_LIST_END) return; - - mtx_lock(&gnttab_list_lock); + ref = head; while (gnttab_entry(ref) != GNTTAB_LIST_END) { ref = gnttab_entry(ref); count++; } + mtx_lock(&gnttab_list_lock); gnttab_entry(ref) = gnttab_free_head; gnttab_free_head = head; gnttab_free_count += count; @@ -403,7 +439,7 @@ check_free_callbacks(); return (0); - + grow_nomem: for ( ; i >= nr_grant_frames; i--) free(gnttab_list[i], M_DEVBUF); @@ -490,7 +526,7 @@ if (shared == NULL) { vm_offset_t area; - + area = kmem_alloc_nofault(kernel_map, PAGE_SIZE * max_nr_grant_frames()); KASSERT(area, ("can't allocate VM space for grant table")); @@ -502,7 +538,7 @@ ((vm_paddr_t)frames[i]) << PAGE_SHIFT | PG_RW | PG_V); free(frames, M_DEVBUF); - + return (0); } @@ -517,7 +553,7 @@ int gnttab_suspend(void) -{ +{ int i; for (i = 0; i < nr_grant_frames; i++) @@ -532,7 +568,8 @@ static vm_paddr_t resume_frames; -static int gnttab_map(unsigned int start_idx, unsigned int end_idx) +static int +gnttab_map(unsigned int start_idx, unsigned int end_idx) { struct xen_add_to_physmap xatp; unsigned int i = end_idx; @@ -552,7 +589,7 @@ if (shared == NULL) { vm_offset_t area; - + area = kmem_alloc_nofault(kernel_map, PAGE_SIZE * max_nr_grant_frames()); KASSERT(area, ("can't allocate VM space for grant table")); @@ -643,10 +680,10 @@ if (gnttab_list[i] == NULL) goto ini_nomem; } - + if (gnttab_resume()) return (ENODEV); - + nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) @@ -670,4 +707,3 @@ } MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF); -//SYSINIT(gnttab, SI_SUB_PSEUDO, SI_ORDER_FIRST, gnttab_init, NULL); diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/gnttab.h head.xen/sys/xen/gnttab.h --- head.moves/sys/xen/gnttab.h 2010-09-16 16:30:38.686756000 -0600 +++ head.xen/sys/xen/gnttab.h 2010-09-16 16:49:09.675656422 -0600 @@ -43,6 +43,8 @@ #include #include +#define GNTTAB_LIST_END GRANT_REF_INVALID + struct gnttab_free_callback { struct gnttab_free_callback *next; void (*fn)(void *); @@ -74,6 +76,13 @@ */ void gnttab_end_foreign_access(grant_ref_t ref, void *page); +/* + * Eventually end access through the given array of grant references. + * Access will be ended immediately iff the grant entry is not in use, + * otherwise it will happen some time later + */ +void gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs); + int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, grant_ref_t *result); unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/interface/grant_table.h head.xen/sys/xen/interface/grant_table.h --- head.moves/sys/xen/interface/grant_table.h 2010-09-16 16:30:38.581063000 -0600 +++ head.xen/sys/xen/interface/grant_table.h 2010-09-16 16:49:09.679808266 -0600 @@ -159,6 +159,8 @@ */ typedef uint32_t grant_ref_t; +#define GRANT_REF_INVALID 0xffffffff + /* * Handle to track a mapping created via a grant reference. */ diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/interface/hvm/params.h head.xen/sys/xen/interface/hvm/params.h --- head.moves/sys/xen/interface/hvm/params.h 2010-09-16 16:30:38.564341000 -0600 +++ head.xen/sys/xen/interface/hvm/params.h 2010-09-16 16:49:09.684947993 -0600 @@ -95,4 +95,30 @@ #define HVM_NR_PARAMS 15 +#ifdef XENHVM +/** + * Retrieve an HVM setting from the hypervisor. + * + * \param index The index of the HVM parameter to retrieve. + * + * \return On error, 0. Otherwise the value of the requested parameter. + */ +static inline unsigned long +hvm_get_parameter(int index) +{ + struct xen_hvm_param xhv; + int error; + + xhv.domid = DOMID_SELF; + xhv.index = index; + error = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); + if (error) { + printf("hvm_get_parameter: failed to get %d, error %d\n", + index, error); + return (0); + } + return (xhv.value); +} +#endif + #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/interface/io/blkif.h head.xen/sys/xen/interface/io/blkif.h --- head.moves/sys/xen/interface/io/blkif.h 2010-09-16 16:30:38.631458000 -0600 +++ head.xen/sys/xen/interface/io/blkif.h 2010-09-16 16:49:09.689092573 -0600 @@ -78,11 +78,19 @@ #define BLKIF_OP_FLUSH_DISKCACHE 3 /* - * Maximum scatter/gather segments per request. - * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. - * NB. This could be 12 if the ring indexes weren't stored in the same page. + * Maximum scatter/gather segments associated with a request header block. */ -#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 +#define BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK 11 + +/* + * Maximum scatter/gather segments associated with a segment block. + */ +#define BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK 14 + +/* + * Maximum scatter/gather segments per request (header + segment blocks). + */ +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 255 struct blkif_request_segment { grant_ref_t gref; /* reference to I/O buffer frame */ @@ -90,6 +98,7 @@ /* @last_sect: last sector in frame to transfer (inclusive). */ uint8_t first_sect, last_sect; }; +typedef struct blkif_request_segment blkif_request_segment_t; struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ @@ -97,7 +106,7 @@ blkif_vdev_t handle; /* only for read/write requests */ uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK]; }; typedef struct blkif_request blkif_request_t; @@ -124,10 +133,22 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); +#define BLKRING_GET_SG_REQUEST(_r, _idx) \ + ((struct blkif_request_segment *)RING_GET_REQUEST(_r, _idx)) + #define VDISK_CDROM 0x1 #define VDISK_REMOVABLE 0x2 #define VDISK_READONLY 0x4 +/* + * The number of ring request blocks required to handle an I/O + * request containing _segs segments. + */ +#define BLKIF_SEGS_TO_BLOCKS(_segs) \ + ((((_segs - BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK) \ + + (BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK - 1)) \ + / BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK) + /*header_block*/1) + #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ /* diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/interface/io/protocols.h head.xen/sys/xen/interface/io/protocols.h --- head.moves/sys/xen/interface/io/protocols.h 2010-09-16 16:30:38.603207000 -0600 +++ head.xen/sys/xen/interface/io/protocols.h 2010-09-16 16:49:09.693236874 -0600 @@ -26,6 +26,7 @@ #define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" #define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" #define XEN_IO_PROTO_ABI_IA64 "ia64-abi" +#define XEN_IO_PROTO_ABI_POWERPC64 "powerpc64-abi" #if defined(__i386__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 @@ -33,6 +34,8 @@ # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 #elif defined(__ia64__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64 +#elif defined(__powerpc64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64 #else # error arch fixup needed here #endif diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/interface/io/ring.h head.xen/sys/xen/interface/io/ring.h --- head.moves/sys/xen/interface/io/ring.h 2010-09-16 16:30:38.599191000 -0600 +++ head.xen/sys/xen/interface/io/ring.h 2010-09-16 16:49:09.698392804 -0600 @@ -45,13 +45,29 @@ #define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x)) /* + * The amount of space reserved in the shared ring for accounting information. + */ +#define __RING_HEADER_SIZE(_s) \ + ((intptr_t)(_s)->ring - (intptr_t)(_s)) + +/* * Calculate size of a shared ring, given the total available space for the * ring and indexes (_sz), and the name tag of the request/response structure. * A ring contains as many entries as will fit, rounded down to the nearest * power of two (so we can mask with (size-1) to loop around). */ #define __RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) + (__RD32(((_sz) - __RING_HEADER_SIZE(_s)) / sizeof((_s)->ring[0]))) + +/* + * The number of pages needed to support a given number of request/reponse + * entries. The entry count is rounded down to the nearest power of two + * as required by the ring macros. + */ +#define __RING_PAGES(_s, _entries) \ + ((__RING_HEADER_SIZE(_s) \ + + (__RD32(_entries) * sizeof((_s)->ring[0])) \ + + PAGE_SIZE - 1) / PAGE_SIZE) /* * Macros to make the correct C datatypes for a new kind of ring. diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/interface/io/xenbus.h head.xen/sys/xen/interface/io/xenbus.h --- head.moves/sys/xen/interface/io/xenbus.h 2010-09-16 16:30:38.613248000 -0600 +++ head.xen/sys/xen/interface/io/xenbus.h 2010-09-16 16:49:09.702545486 -0600 @@ -36,6 +36,9 @@ enum xenbus_state { XenbusStateUnknown = 0, + /* + * Initializing: Back-end is initializing. + */ XenbusStateInitialising = 1, /* @@ -49,6 +52,9 @@ */ XenbusStateInitialised = 3, + /* + * Connected: The normal state for a front to backend connection. + */ XenbusStateConnected = 4, /* @@ -56,6 +62,9 @@ */ XenbusStateClosing = 5, + /* + * Closed: No connection exists between front and back end. + */ XenbusStateClosed = 6, /* diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xen_intr.h head.xen/sys/xen/xen_intr.h --- head.moves/sys/xen/xen_intr.h 2010-09-16 16:30:38.697881000 -0600 +++ head.xen/sys/xen/xen_intr.h 2010-09-16 16:49:09.717049142 -0600 @@ -76,7 +76,7 @@ */ extern int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, unsigned int remote_port, const char *devname, - driver_filter_t filter, driver_intr_t handler, + driver_intr_t handler, void *arg, unsigned long irqflags, unsigned int *irqp); /* diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenbus/xenbus.c head.xen/sys/xen/xenbus/xenbus.c --- head.moves/sys/xen/xenbus/xenbus.c 2010-09-17 07:58:37.170160333 -0600 +++ head.xen/sys/xen/xenbus/xenbus.c 2010-09-16 17:23:24.789313968 -0600 @@ -1,8 +1,4 @@ /****************************************************************************** - * Client-facing interface for the Xenbus driver. In other words, the - * interface between the Xenbus and the device-specific code, be it the - * frontend or the backend of that driver. - * * Copyright (C) 2005 XenSource Ltd * * This file may be distributed separately from the Linux kernel, or @@ -27,6 +23,14 @@ * IN THE SOFTWARE. */ +/** + * \file xenbus.c + * + * \brief Client-facing interface for the Xenbus driver. + * + * In other words, the interface between the Xenbus and the device-specific + * code, be it the frontend or the backend of that driver. + */ #if 0 #define DPRINTK(fmt, args...) \ @@ -36,12 +40,15 @@ #endif #include -__FBSDID("$FreeBSD: head/sys/xen/xenbus/xenbus.c -1 $"); +__FBSDID("$FreeBSD: head/sys/xen/xenbus/xenbus_client.c 186557 2008-12-29 06:31:03Z kmacy $"); #include +#include +#include #include #include #include +#include #include #include @@ -50,6 +57,34 @@ #include #include +MALLOC_DEFINE(M_XENBUS, "xenbus", "XenBus Support"); + +/*------------------------- Private Functions --------------------------------*/ +/** + * \brief Construct the error path corresponding to the given XenBus + * device. + * + * \param dev The XenBus device for which we are constructing an error path. + * + * \return On success, the contructed error path. Otherwise NULL. + * + * It is the caller's responsibility to free any returned error path + * node using the M_XENBUS malloc type. + */ +static char * +error_path(device_t dev) +{ + char *path_buffer = malloc(strlen("error/") + + strlen(xenbus_get_node(dev)) + 1,M_XENBUS, M_WAITOK); + + strcpy(path_buffer, "error/"); + strcpy(path_buffer + strlen("error/"), xenbus_get_node(dev)); + + return (path_buffer); +} + +/*--------------------------- Public Functions -------------------------------*/ +/*-------- API comments for these methods can be found in xenbusvar.h --------*/ const char * xenbus_strstate(XenbusState state) { @@ -67,15 +102,15 @@ } int -xenbus_watch_path(device_t dev, char *path, struct xenbus_watch *watch, - void (*callback)(struct xenbus_watch *, const char **, unsigned int)) +xenbus_watch_path(device_t dev, char *path, struct xs_watch *watch, + xs_watch_cb_t *callback) { int error; watch->node = path; watch->callback = callback; - error = register_xenbus_watch(watch); + error = xs_register_watch(watch); if (error) { watch->node = NULL; @@ -88,12 +123,12 @@ int xenbus_watch_path2(device_t dev, const char *path, - const char *path2, struct xenbus_watch *watch, - void (*callback)(struct xenbus_watch *, const char **, unsigned int)) + const char *path2, struct xs_watch *watch, + xs_watch_cb_t *callback) { int error; char *state = malloc(strlen(path) + 1 + strlen(path2) + 1, - M_DEVBUF, M_WAITOK); + M_XENBUS, M_WAITOK); strcpy(state, path); strcat(state, "/"); @@ -101,46 +136,27 @@ error = xenbus_watch_path(dev, state, watch, callback); if (error) { - free(state, M_DEVBUF); + free(state,M_XENBUS); } return (error); } -/** - * Return the path to the error node for the given device, or NULL on failure. - * If the value returned is non-NULL, then it is the caller's to kfree. - */ -static char * -error_path(device_t dev) -{ - char *path_buffer = malloc(strlen("error/") - + strlen(xenbus_get_node(dev)) + 1, M_DEVBUF, M_WAITOK); - - strcpy(path_buffer, "error/"); - strcpy(path_buffer + strlen("error/"), xenbus_get_node(dev)); - - return (path_buffer); -} - - -static void -_dev_error(device_t dev, int err, const char *fmt, va_list ap) +void +xenbus_dev_verror(device_t dev, int err, const char *fmt, va_list ap) { int ret; unsigned int len; char *printf_buffer = NULL, *path_buffer = NULL; #define PRINTF_BUFFER_SIZE 4096 - printf_buffer = malloc(PRINTF_BUFFER_SIZE, M_DEVBUF, M_WAITOK); + printf_buffer = malloc(PRINTF_BUFFER_SIZE,M_XENBUS, M_WAITOK); len = sprintf(printf_buffer, "%i ", err); ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap); KASSERT(len + ret <= PRINTF_BUFFER_SIZE-1, ("xenbus error message too big")); -#if 0 - dev_err(&dev->dev, "%s\n", printf_buffer); -#endif + device_printf(dev, "Error %s\n", printf_buffer); path_buffer = error_path(dev); if (path_buffer == NULL) { @@ -149,7 +165,7 @@ goto fail; } - if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) { + if (xs_write(XST_NIL, path_buffer, "error", printf_buffer) != 0) { printf("xenbus: failed to write error node for %s (%s)\n", xenbus_get_node(dev), printf_buffer); goto fail; @@ -157,9 +173,9 @@ fail: if (printf_buffer) - free(printf_buffer, M_DEVBUF); + free(printf_buffer,M_XENBUS); if (path_buffer) - free(path_buffer, M_DEVBUF); + free(path_buffer,M_XENBUS); } void @@ -168,41 +184,45 @@ va_list ap; va_start(ap, fmt); - _dev_error(dev, err, fmt, ap); + xenbus_dev_verror(dev, err, fmt, ap); va_end(ap); } void +xenbus_dev_vfatal(device_t dev, int err, const char *fmt, va_list ap) +{ + xenbus_dev_verror(dev, err, fmt, ap); + device_printf(dev, "Fatal error. Transitioning to Closing State\n"); + xenbus_set_state(dev, XenbusStateClosing); +} + +void xenbus_dev_fatal(device_t dev, int err, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - _dev_error(dev, err, fmt, ap); + xenbus_dev_vfatal(dev, err, fmt, ap); va_end(ap); - - xenbus_set_state(dev, XenbusStateClosing); } int -xenbus_grant_ring(device_t dev, unsigned long ring_mfn, int *refp) +xenbus_grant_ring(device_t dev, unsigned long ring_mfn, grant_ref_t *refp) { int error; - grant_ref_t ref; error = gnttab_grant_foreign_access( - xenbus_get_otherend_id(dev), ring_mfn, 0, &ref); + xenbus_get_otherend_id(dev), ring_mfn, 0, refp); if (error) { xenbus_dev_fatal(dev, error, "granting access to ring page"); return (error); } - *refp = ref; return (0); } int -xenbus_alloc_evtchn(device_t dev, int *port) +xenbus_alloc_evtchn(device_t dev, evtchn_port_t *port) { struct evtchn_alloc_unbound alloc_unbound; int err; @@ -222,7 +242,7 @@ } int -xenbus_free_evtchn(device_t dev, int port) +xenbus_free_evtchn(device_t dev, evtchn_port_t port) { struct evtchn_close close; int err; @@ -240,12 +260,29 @@ XenbusState xenbus_read_driver_state(const char *path) { - XenbusState result; + XenbusState result; + int error; + + error = xs_gather(XST_NIL, path, "state", "%d", &result, NULL); + if (error) + result = XenbusStateClosed; + + return (result); +} + +int +xenbus_dev_is_online(device_t dev) +{ + const char *path; int error; + int value; - error = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL); - if (error) - result = XenbusStateClosed; + path = xenbus_get_node(dev); + error = xs_gather(XST_NIL, path, "online", "%d", &value, NULL); + if (error != 0) { + /* Default to not online. */ + value = 0; + } - return (result); + return (value); } diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenbus/xenbus.c.orig head.xen/sys/xen/xenbus/xenbus.c.orig --- head.moves/sys/xen/xenbus/xenbus.c.orig 1969-12-31 17:00:00.000000000 -0700 +++ head.xen/sys/xen/xenbus/xenbus.c.orig 2010-09-16 17:06:38.663549483 -0600 @@ -0,0 +1,251 @@ +/****************************************************************************** + * Client-facing interface for the Xenbus driver. In other words, the + * interface between the Xenbus and the device-specific code, be it the + * frontend or the backend of that driver. + * + * Copyright (C) 2005 XenSource Ltd + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#if 0 +#define DPRINTK(fmt, args...) \ + printk("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTK(fmt, args...) ((void)0) +#endif + +#include +__FBSDID("$FreeBSD: head/sys/xen/xenbus/xenbus.c -1 $"); + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +const char * +xenbus_strstate(XenbusState state) +{ + static const char *const name[] = { + [ XenbusStateUnknown ] = "Unknown", + [ XenbusStateInitialising ] = "Initialising", + [ XenbusStateInitWait ] = "InitWait", + [ XenbusStateInitialised ] = "Initialised", + [ XenbusStateConnected ] = "Connected", + [ XenbusStateClosing ] = "Closing", + [ XenbusStateClosed ] = "Closed", + }; + + return ((state < (XenbusStateClosed + 1)) ? name[state] : "INVALID"); +} + +int +xenbus_watch_path(device_t dev, char *path, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, const char **, unsigned int)) +{ + int error; + + watch->node = path; + watch->callback = callback; + + error = register_xenbus_watch(watch); + + if (error) { + watch->node = NULL; + watch->callback = NULL; + xenbus_dev_fatal(dev, error, "adding watch on %s", path); + } + + return (error); +} + +int +xenbus_watch_path2(device_t dev, const char *path, + const char *path2, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, const char **, unsigned int)) +{ + int error; + char *state = malloc(strlen(path) + 1 + strlen(path2) + 1, + M_DEVBUF, M_WAITOK); + + strcpy(state, path); + strcat(state, "/"); + strcat(state, path2); + + error = xenbus_watch_path(dev, state, watch, callback); + if (error) { + free(state, M_DEVBUF); + } + + return (error); +} + +/** + * Return the path to the error node for the given device, or NULL on failure. + * If the value returned is non-NULL, then it is the caller's to kfree. + */ +static char * +error_path(device_t dev) +{ + char *path_buffer = malloc(strlen("error/") + + strlen(xenbus_get_node(dev)) + 1, M_DEVBUF, M_WAITOK); + + strcpy(path_buffer, "error/"); + strcpy(path_buffer + strlen("error/"), xenbus_get_node(dev)); + + return (path_buffer); +} + + +static void +_dev_error(device_t dev, int err, const char *fmt, va_list ap) +{ + int ret; + unsigned int len; + char *printf_buffer = NULL, *path_buffer = NULL; + +#define PRINTF_BUFFER_SIZE 4096 + printf_buffer = malloc(PRINTF_BUFFER_SIZE, M_DEVBUF, M_WAITOK); + + len = sprintf(printf_buffer, "%i ", err); + ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap); + + KASSERT(len + ret <= PRINTF_BUFFER_SIZE-1, ("xenbus error message too big")); +#if 0 + dev_err(&dev->dev, "%s\n", printf_buffer); +#endif + path_buffer = error_path(dev); + + if (path_buffer == NULL) { + printf("xenbus: failed to write error node for %s (%s)\n", + xenbus_get_node(dev), printf_buffer); + goto fail; + } + + if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) { + printf("xenbus: failed to write error node for %s (%s)\n", + xenbus_get_node(dev), printf_buffer); + goto fail; + } + + fail: + if (printf_buffer) + free(printf_buffer, M_DEVBUF); + if (path_buffer) + free(path_buffer, M_DEVBUF); +} + +void +xenbus_dev_error(device_t dev, int err, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _dev_error(dev, err, fmt, ap); + va_end(ap); +} + +void +xenbus_dev_fatal(device_t dev, int err, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _dev_error(dev, err, fmt, ap); + va_end(ap); + + xenbus_set_state(dev, XenbusStateClosing); +} + +int +xenbus_grant_ring(device_t dev, unsigned long ring_mfn, int *refp) +{ + int error; + grant_ref_t ref; + + error = gnttab_grant_foreign_access( + xenbus_get_otherend_id(dev), ring_mfn, 0, &ref); + if (error) { + xenbus_dev_fatal(dev, error, "granting access to ring page"); + return (error); + } + + *refp = ref; + return (0); +} + +int +xenbus_alloc_evtchn(device_t dev, int *port) +{ + struct evtchn_alloc_unbound alloc_unbound; + int err; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = xenbus_get_otherend_id(dev); + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + + if (err) { + xenbus_dev_fatal(dev, -err, "allocating event channel"); + return (-err); + } + *port = alloc_unbound.port; + return (0); +} + +int +xenbus_free_evtchn(device_t dev, int port) +{ + struct evtchn_close close; + int err; + + close.port = port; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + if (err) { + xenbus_dev_error(dev, -err, "freeing event channel %d", port); + return (-err); + } + return (0); +} + +XenbusState +xenbus_read_driver_state(const char *path) +{ + XenbusState result; + int error; + + error = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL); + if (error) + result = XenbusStateClosed; + + return (result); +} diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenbus/xenbus_if.m head.xen/sys/xen/xenbus/xenbus_if.m --- head.moves/sys/xen/xenbus/xenbus_if.m 2010-09-16 16:29:32.793981000 -0600 +++ head.xen/sys/xen/xenbus/xenbus_if.m 2010-09-16 16:49:09.737753045 -0600 @@ -31,7 +31,15 @@ INTERFACE xenbus; -METHOD int backend_changed { - device_t dev; - enum xenbus_state newstate; +/** + * \brief Callback triggered when the state of the otherend + * of a split device changes. + * + * \param _dev NewBus device_t for this XenBus device whose otherend's + * state has changed.. + * \param _newstate The new state of the otherend device. + */ +METHOD int otherend_changed { + device_t _dev; + enum xenbus_state _newstate; }; diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenbus/xenbusb.c head.xen/sys/xen/xenbus/xenbusb.c --- head.moves/sys/xen/xenbus/xenbusb.c 2010-09-17 07:58:46.792441245 -0600 +++ head.xen/sys/xen/xenbus/xenbusb.c 2010-09-16 17:23:24.805381584 -0600 @@ -1,6 +1,7 @@ /****************************************************************************** * Talks to Xen Store to figure out what devices we have. * + * Copyright (C) 2010 Justin T. Gibbs, Spectra Logic Corporation * Copyright (C) 2008 Doug Rabson * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2005 Mike Wray, Hewlett-Packard @@ -28,15 +29,32 @@ * IN THE SOFTWARE. */ -#if 0 -#define DPRINTK(fmt, args...) \ - printf("xenbus_probe (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) -#else -#define DPRINTK(fmt, args...) ((void)0) -#endif - +/** + * \file xenbusb.c + * + * \brief Shared support functions for managing the NewBus busses that contain + * Xen front and back end device instances. + * + * The NewBus implementation of XenBus attaches a xenbusb_front and xenbusb_back + * child bus to the xenstore device. This strategy allows the small differences + * in the handling of XenBus operations for front and back devices to be handled + * as overrides in xenbusb_front/back.c. Front and back specific device + * classes are also provided so device drivers can register for the devices they + * can handle without the need to filter within their probe routines. The + * net result is a device hierarchy that might look like this: + * + * xenstore0/ + * xenbusb_front0/ + * xn0 + * xbd0 + * xbd1 + * xenbusb_back0/ + * xbbd0 + * xnb0 + * xnb1 + */ #include -__FBSDID("$FreeBSD: head/sys/xen/xenbus/xenbusb.c -1 $"); +__FBSDID("$FreeBSD: head/sys/xen/xenbus/xenbus_probe.c 201758 2010-01-07 21:01:37Z mbr $"); #include #include @@ -44,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -54,89 +73,70 @@ #include #include +#include +#include #include -#include - -struct xenbus_softc { - struct xenbus_watch xs_devicewatch; - struct task xs_probechildren; - struct intr_config_hook xs_attachcb; - device_t xs_dev; -}; - -struct xenbus_device_ivars { - struct xenbus_watch xd_otherend_watch; /* must be first */ - struct sx xd_lock; - device_t xd_dev; - char *xd_node; /* node name in xenstore */ - char *xd_type; /* xen device type */ - enum xenbus_state xd_state; - int xd_otherend_id; - char *xd_otherend_path; -}; - -/* Simplified asprintf. */ -char * -kasprintf(const char *fmt, ...) -{ - va_list ap; - unsigned int len; - char *p, dummy[1]; - - va_start(ap, fmt); - /* FIXME: vsnprintf has a bug, NULL should work */ - len = vsnprintf(dummy, 0, fmt, ap); - va_end(ap); - - p = malloc(len + 1, M_DEVBUF, M_WAITOK); - va_start(ap, fmt); - vsprintf(p, fmt, ap); - va_end(ap); - return p; -} +/*------------------------- Private Functions --------------------------------*/ +/** + * \brief Deallocate XenBus device instance variables. + * + * \param ivars The instance variable block to free. + */ static void -xenbus_identify(driver_t *driver, device_t parent) +xenbusb_free_child_ivars(struct xenbus_device_ivars *ivars) { + if (ivars->xd_otherend_watch.node != NULL) { + xs_unregister_watch(&ivars->xd_otherend_watch); + free(ivars->xd_otherend_watch.node, M_XENBUS); + ivars->xd_otherend_watch.node = NULL; + } - BUS_ADD_CHILD(parent, 0, "xenbus", 0); -} - -static int -xenbus_probe(device_t dev) -{ - int err = 0; - - DPRINTK(""); - - /* Initialize the interface to xenstore. */ - err = xs_init(); - if (err) { - log(LOG_WARNING, - "XENBUS: Error initializing xenstore comms: %i\n", err); - return (ENXIO); + if (ivars->xd_node != NULL) { + free(ivars->xd_node, M_XENBUS); + ivars->xd_node = NULL; } - err = gnttab_init(); - if (err) { - log(LOG_WARNING, - "XENBUS: Error initializing grant table: %i\n", err); - return (ENXIO); + + if (ivars->xd_type != NULL) { + free(ivars->xd_type, M_XENBUS); + ivars->xd_type = NULL; } - device_set_desc(dev, "Xen Devices"); - return (0); -} + if (ivars->xd_devid != NULL) { + free(ivars->xd_devid, M_XENBUS); + ivars->xd_devid = NULL; + } -static enum xenbus_state -xenbus_otherend_state(struct xenbus_device_ivars *ivars) -{ + if (ivars->xd_otherend_path != NULL) { + free(ivars->xd_otherend_path, M_XENBUS); + ivars->xd_otherend_path = NULL; + } - return (xenbus_read_driver_state(ivars->xd_otherend_path)); + free(ivars, M_XENBUS); } +/** + * XenBus watch callback registered against the "state" XenStore + * node of the other-end of a split device connection. + * + * This callback is invoked whenever the state of a device instance's + * peer changes. + * + * \param watch The xs_watch object used to register this callback + * function. + * \param vec An array of pointers to NUL terminated strings containing + * watch event data. The vector should be indexed via the + * xs_watch_type enum in xs_wire.h. + * \param vec_size The number of elements in vec. + * + * \return The device_t of the found device if any, or NULL. + * + * \note device_t is a pointer type, so it can be compared against + * NULL for validity. + */ static void -xenbus_backend_changed(struct xenbus_watch *watch, const char **vec, - unsigned int len) +xenbusb_otherend_changed(struct xs_watch *watch, const char **vec, + unsigned int vec_size __unused) { struct xenbus_device_ivars *ivars; device_t dev; @@ -146,29 +146,43 @@ dev = ivars->xd_dev; if (!ivars->xd_otherend_path - || strncmp(ivars->xd_otherend_path, vec[XS_WATCH_PATH], - strlen(ivars->xd_otherend_path))) + || strncmp(ivars->xd_otherend_path, vec[XS_WATCH_PATH], + strlen(ivars->xd_otherend_path))) return; - newstate = xenbus_otherend_state(ivars); - XENBUS_BACKEND_CHANGED(dev, newstate); + newstate = xenbus_read_driver_state(ivars->xd_otherend_path); + XENBUS_OTHEREND_CHANGED(dev, newstate); } -static int -xenbus_device_exists(device_t dev, const char *node) +/** + * Search our internal record of configured devices (not the XenStore) + * to determine if the XenBus device indicated by \a node is known to + * the system. + * + * \param dev The XenBus bus instance to search for device children. + * \param node The XenStore node path for the device to find. + * + * \return The device_t of the found device if any, or NULL. + * + * \note device_t is a pointer type, so it can be compared against + * NULL for validity. + */ +static device_t +xenbusb_device_exists(device_t dev, const char *node) { device_t *kids; + device_t result; struct xenbus_device_ivars *ivars; - int i, count, result; + int i, count; if (device_get_children(dev, &kids, &count)) return (FALSE); - result = FALSE; + result = NULL; for (i = 0; i < count; i++) { ivars = device_get_ivars(kids[i]); if (!strcmp(ivars->xd_node, node)) { - result = TRUE; + result = kids[i]; break; } } @@ -177,136 +191,135 @@ return (result); } -static int -xenbus_add_device(device_t dev, const char *bus, - const char *type, const char *id) +static void +xenbusb_delete_child(device_t dev, device_t child) { - device_t child; struct xenbus_device_ivars *ivars; - enum xenbus_state state; - char *statepath; - int error; - ivars = malloc(sizeof(struct xenbus_device_ivars), - M_DEVBUF, M_ZERO|M_WAITOK); - ivars->xd_node = kasprintf("%s/%s/%s", bus, type, id); + ivars = device_get_ivars(child); - if (xenbus_device_exists(dev, ivars->xd_node)) { - /* - * We are already tracking this node - */ - free(ivars->xd_node, M_DEVBUF); - free(ivars, M_DEVBUF); - return (0); - } + /* + * We no longer care about the otherend of the + * connection. Cancel the watch now so that we + * don't try to handle an event for a partially + * detached child. + */ + if (ivars->xd_otherend_watch.node != NULL) + xs_unregister_watch(&ivars->xd_otherend_watch); + + device_delete_child(dev, child); + xenbusb_free_child_ivars(ivars); +} - state = xenbus_read_driver_state(ivars->xd_node); +/** + * \param dev The NewBus device representing this XenBus bus. + * \param child The NewBus device representing a child of dev%'s XenBus bus. + */ +static void +xenbusb_verify_device(device_t dev, device_t child) +{ + if (xs_exists(XST_NIL, xenbus_get_node(child), "") == 0) { - if (state != XenbusStateInitialising) { /* - * Device is not new, so ignore it. This can - * happen if a device is going away after - * switching to Closed. + * Device tree has been removed from Xenbus. + * Tear down the device. */ - free(ivars->xd_node, M_DEVBUF); - free(ivars, M_DEVBUF); - return (0); + xenbusb_delete_child(dev, child); } - - /* - * Find the backend details - */ - error = xenbus_gather(XBT_NIL, ivars->xd_node, - "backend-id", "%i", &ivars->xd_otherend_id, - "backend", NULL, &ivars->xd_otherend_path, - NULL); - if (error) - return (error); - - sx_init(&ivars->xd_lock, "xdlock"); - ivars->xd_type = strdup(type, M_DEVBUF); - ivars->xd_state = XenbusStateInitialising; - - statepath = malloc(strlen(ivars->xd_otherend_path) - + strlen("/state") + 1, M_DEVBUF, M_WAITOK); - sprintf(statepath, "%s/state", ivars->xd_otherend_path); - - ivars->xd_otherend_watch.node = statepath; - ivars->xd_otherend_watch.callback = xenbus_backend_changed; - - child = device_add_child(dev, NULL, -1); - ivars->xd_dev = child; - device_set_ivars(child, ivars); - - return (0); } +/** + * \brief Enumerate the devices on a XenBus bus and register them with + * the NewBus device tree. + * + * xenbusb_enumerate_bus() will create entries (in state DS_NOTPRESENT) + * for nodes that appear in the XenStore, but will not invoke probe/attach + * operations on drivers. Probe/Attach processing must be separately + * performed via an invocation of xenbusb_probe_children(). This is usually + * done via the xbs_probe_children task. + * + * \param xbs XenBus Bus device softc of the owner of the bus to enumerate. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + */ static int -xenbus_enumerate_type(device_t dev, const char *bus, const char *type) +xenbusb_enumerate_bus(struct xenbusb_softc *xbs) { - char **dir; - unsigned int i, count; + const char **types; + u_int type_idx; + u_int type_count; int error; - error = xenbus_directory(XBT_NIL, bus, type, &count, &dir); + error = xs_directory(XST_NIL, xbs->xbs_node, "", &type_count, &types); if (error) return (error); - for (i = 0; i < count; i++) - xenbus_add_device(dev, bus, type, dir[i]); - free(dir, M_DEVBUF); + for (type_idx = 0; type_idx < type_count; type_idx++) + XENBUSB_ENUMERATE_TYPE(xbs->xbs_dev, types[type_idx]); - return (0); -} - -static int -xenbus_enumerate_bus(device_t dev, const char *bus) -{ - char **dir; - unsigned int i, count; - int error; - - error = xenbus_directory(XBT_NIL, bus, "", &count, &dir); - if (error) - return (error); - for (i = 0; i < count; i++) { - xenbus_enumerate_type(dev, bus, dir[i]); - } - free(dir, M_DEVBUF); + free(types, M_XENSTORE); return (0); } +/** + * \brief Verify the existance of attached device instances and perform + * probe/attach processing for newly arrived devices. + * + * \param dev The NewBus device representing this XenBus bus. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + */ static int -xenbus_probe_children(device_t dev) +xenbusb_probe_children(device_t dev) { device_t *kids; struct xenbus_device_ivars *ivars; int i, count; - /* - * Probe any new devices and register watches for any that - * attach successfully. Since part of the protocol which - * establishes a connection with the other end is interrupt - * driven, we sleep until the device reaches a stable state - * (closed or connected). - */ if (device_get_children(dev, &kids, &count) == 0) { for (i = 0; i < count; i++) { - if (device_get_state(kids[i]) != DS_NOTPRESENT) + if (device_get_state(kids[i]) != DS_NOTPRESENT) { + /* + * We already know about this one. + * Make sure it's still here. + */ + xenbusb_verify_device(dev, kids[i]); continue; + } + + if (device_probe_and_attach(kids[i])) { + /* + * Transition device to the closed state + * so the world knows that attachment will + * not occur. + */ + xenbus_set_state(kids[i], XenbusStateClosed); + + /* + * Remove our record of this device. + * So long as it remains in the closed + * state in the XenStore, we will not find + * it again. The state will only change + * if the control domain actively reconfigures + * this device. + */ + xenbusb_delete_child(dev, kids[i]); - if (device_probe_and_attach(kids[i])) continue; + } + + /* + * Now that we have a driver managing this device + * that can receive otherend state change events, + * hook up a watch for them. This is the only + * aspect of ivar intialization that is not (and + * cannot) be handled by xenbusb_add_device(). + */ ivars = device_get_ivars(kids[i]); - register_xenbus_watch( - &ivars->xd_otherend_watch); - sx_xlock(&ivars->xd_lock); - while (ivars->xd_state != XenbusStateClosed - && ivars->xd_state != XenbusStateConnected) - sx_sleep(&ivars->xd_state, &ivars->xd_lock, - 0, "xdattach", 0); - sx_xunlock(&ivars->xd_lock); + xs_register_watch(&ivars->xd_otherend_watch); } free(kids, M_TEMP); } @@ -314,108 +327,290 @@ return (0); } +/** + * \brief Task callback function to perform XenBus probe operations + * from a known safe context. + * + * \param arg The NewBus device_t representing the bus instance to + * on which to perform probe processing. + * \param pending The number of times this task was queued before it could + * be run. + */ static void -xenbus_probe_children_cb(void *arg, int pending) +xenbusb_probe_children_cb(void *arg, int pending __unused) { - device_t dev = (device_t) arg; + device_t dev = (device_t)arg; - xenbus_probe_children(dev); + /* + * Hold Giant until the Giant free newbus changes are committed. + */ + mtx_lock(&Giant); + xenbusb_probe_children(dev); + mtx_unlock(&Giant); } +/** + * \brief XenStore watch callback for the root node of the XenStore + * subtree representing a XenBus. + * + * This callback performs, or delegates to the xbs_probe_children task, + * all processing necessary to handle dynmaic device arrival and departure + * events from a XenBus. + * + * \param watch The XenStore watch object associated with this callback. + * \param vec The XenStore watch event data. + * \param len The number of fields in the event data stream. + */ static void -xenbus_devices_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) +xenbusb_devices_changed(struct xs_watch *watch, const char **vec, + unsigned int len) { - struct xenbus_softc *sc = (struct xenbus_softc *) watch; - device_t dev = sc->xs_dev; - char *node, *bus, *type, *id, *p; + struct xenbusb_softc *xbs; + device_t dev; + char *node; + char *bus; + char *type; + char *id; + char *p; + u_int component; + + xbs = (struct xenbusb_softc *)watch; + dev = xbs->xbs_dev; + + if (len <= XS_WATCH_PATH) { + device_printf(dev, "xenbusb_devices_changed: " + "Short Event Data.\n"); + return; + } - node = strdup(vec[XS_WATCH_PATH], M_DEVBUF); + node = strdup(vec[XS_WATCH_PATH], M_XENBUS); p = strchr(node, '/'); - if (!p) + if (p == NULL) goto out; bus = node; *p = 0; type = p + 1; p = strchr(type, '/'); - if (!p) + if (p == NULL) goto out; - *p = 0; - id = p + 1; + *p++ = 0; - p = strchr(id, '/'); - if (p) + /* + * Extract the device ID. A device ID has one or more path + * components separated by the '/' character. + * + * e.g. "/" for backend devices. + */ + id = p; + for (component = 0; component < xbs->xbs_id_components; component++) { + p = strchr(p, '/'); + if (p == NULL) + break; + p++; + } + if (p != NULL) *p = 0; - xenbus_add_device(dev, bus, type, id); - taskqueue_enqueue(taskqueue_thread, &sc->xs_probechildren); + if (*id != 0 && component >= xbs->xbs_id_components - 1) { + xenbusb_add_device(xbs->xbs_dev, type, id); + taskqueue_enqueue(taskqueue_thread, &xbs->xbs_probe_children); + } out: - free(node, M_DEVBUF); + free(node, M_XENBUS); } +/** + * \brief Interrupt configuration hook callback associated with xbs_attch_ch. + * + * Since interrupts are always functional at the time of XenBus configuration, + * there is nothing to be done when the callback occurs. This hook is only + * registered to hold up boot processing while XenBus devices come online. + * + * \param arg Unused configuration hook callback argument. + */ static void -xenbus_attach_deferred(void *arg) +xenbusb_nop_confighook_cb(void *arg __unused) { - device_t dev = (device_t) arg; - struct xenbus_softc *sc = device_get_softc(dev); +} + +/** + * \brief Decrement the number of XenBus child devices in the + * connecting state by one and release the xbs_attch_ch + * interrupt configuration hook if the connecting count + * drops to zero. + * + * \param xbs XenBus Bus device softc of the owner of the bus to enumerate. + */ +static void +xenbusb_release_confighook(struct xenbusb_softc *xbs) +{ + mtx_lock(&xbs->xbs_lock); + KASSERT(xbs->xbs_connecting_children > 0, + ("Connecting device count error\n")); + xbs->xbs_connecting_children--; + if (xbs->xbs_connecting_children == 0 + && (xbs->xbs_flags & XBS_ATTACH_CH_ACTIVE) != 0) { + xbs->xbs_flags &= ~XBS_ATTACH_CH_ACTIVE; + mtx_unlock(&xbs->xbs_lock); + config_intrhook_disestablish(&xbs->xbs_attach_ch); + } else { + mtx_unlock(&xbs->xbs_lock); + } +} + +/*--------------------------- Public Functions -------------------------------*/ +/*--------- API comments for these methods can be found in xenbusb.h ---------*/ +void +xenbusb_identify(driver_t *driver __unused, device_t parent) +{ + /* + * A single instance of each bus type for which we have a driver + * is always present in a system operating under Xen. + */ + BUS_ADD_CHILD(parent, 0, driver->name, 0); +} + +int +xenbusb_add_device(device_t dev, const char *type, const char *id) +{ + struct xenbusb_softc *xbs; + struct sbuf *devpath_sbuf; + char *devpath; + struct xenbus_device_ivars *ivars; int error; - - error = xenbus_enumerate_bus(dev, "device"); - if (error) - return; - xenbus_probe_children(dev); - sc->xs_dev = dev; - sc->xs_devicewatch.node = "device"; - sc->xs_devicewatch.callback = xenbus_devices_changed; + xbs = device_get_softc(dev); + devpath_sbuf = sbuf_new_auto(); + sbuf_printf(devpath_sbuf, "%s/%s/%s", xbs->xbs_node, type, id); + sbuf_finish(devpath_sbuf); + devpath = sbuf_data(devpath_sbuf); + + ivars = malloc(sizeof(*ivars), M_XENBUS, M_ZERO|M_WAITOK); + error = ENXIO; + + if (xs_exists(XST_NIL, devpath, "") != 0) { + device_t child; + enum xenbus_state state; + char *statepath; + + child = xenbusb_device_exists(dev, devpath); + if (child != NULL) { + /* + * We are already tracking this node + */ + error = 0; + goto out; + } + + state = xenbus_read_driver_state(devpath); + if (state != XenbusStateInitialising) { + /* + * Device is not new, so ignore it. This can + * happen if a device is going away after + * switching to Closed. + */ + printf("xenbusb_add_device: Device %s ignored. " + "State %d\n", devpath, state); + error = 0; + goto out; + } + + sx_init(&ivars->xd_lock, "xdlock"); + ivars->xd_flags = XDF_CONNECTING; + ivars->xd_node = strdup(devpath, M_XENBUS); + ivars->xd_type = strdup(type, M_XENBUS); + ivars->xd_devid = strdup(id, M_XENBUS); + ivars->xd_state = XenbusStateInitialising; - TASK_INIT(&sc->xs_probechildren, 0, xenbus_probe_children_cb, dev); + error = XENBUSB_GET_OTHEREND_NODE(dev, ivars); + if (error) { + printf("xenbus_update_device: %s no otherend id\n", + devpath); + goto out; + } + + statepath = malloc(strlen(ivars->xd_otherend_path) + + strlen("/state") + 1, M_XENBUS, M_WAITOK); + sprintf(statepath, "%s/state", ivars->xd_otherend_path); + + ivars->xd_otherend_watch.node = statepath; + ivars->xd_otherend_watch.callback = xenbusb_otherend_changed; + + mtx_lock(&xbs->xbs_lock); + xbs->xbs_connecting_children++; + mtx_unlock(&xbs->xbs_lock); + + child = device_add_child(dev, NULL, -1); + ivars->xd_dev = child; + device_set_ivars(child, ivars); + } - register_xenbus_watch(&sc->xs_devicewatch); +out: + sbuf_delete(devpath_sbuf); + if (error != 0) + xenbusb_free_child_ivars(ivars); - config_intrhook_disestablish(&sc->xs_attachcb); + return (error); } -static int -xenbus_attach(device_t dev) +int +xenbusb_attach(device_t dev, char *bus_node, u_int id_components) { - struct xenbus_softc *sc = device_get_softc(dev); + struct xenbusb_softc *xbs; - sc->xs_attachcb.ich_func = xenbus_attach_deferred; - sc->xs_attachcb.ich_arg = dev; - config_intrhook_establish(&sc->xs_attachcb); + xbs = device_get_softc(dev); + mtx_init(&xbs->xbs_lock, "xenbusb softc lock", NULL, MTX_DEF); + xbs->xbs_node = bus_node; + xbs->xbs_id_components = id_components; + xbs->xbs_dev = dev; - return (0); -} + /* + * Since XenBus busses are attached to the XenStore, and + * the XenStore does not probe children until after interrupt + * services are available, this config hook is used solely + * to ensure that the remainder of the boot process (e.g. + * mount root) is deferred until child devices are adequately + * probed. We unblock the boot process as soon as the + * connecting child count in our softc goes to 0. + */ + xbs->xbs_attach_ch.ich_func = xenbusb_nop_confighook_cb; + xbs->xbs_attach_ch.ich_arg = dev; + config_intrhook_establish(&xbs->xbs_attach_ch); + xbs->xbs_flags |= XBS_ATTACH_CH_ACTIVE; + xbs->xbs_connecting_children = 1; -static int -xenbus_suspend(device_t dev) -{ - int error; + /* + * The subtree for this bus type may not yet exist + * causing initial enumeration to fail. We still + * want to return success from our attach though + * so that we are ready to handle devices for this + * bus when they are dynamically attached to us + * by a Xen management action. + */ + (void)xenbusb_enumerate_bus(xbs); + xenbusb_probe_children(dev); - DPRINTK(""); + xbs->xbs_device_watch.node = bus_node; + xbs->xbs_device_watch.callback = xenbusb_devices_changed; - error = bus_generic_suspend(dev); - if (error) - return (error); + TASK_INIT(&xbs->xbs_probe_children, 0, xenbusb_probe_children_cb, dev); + + xs_register_watch(&xbs->xbs_device_watch); - xs_suspend(); + xenbusb_release_confighook(xbs); return (0); } -static int -xenbus_resume(device_t dev) +int +xenbusb_resume(device_t dev) { device_t *kids; struct xenbus_device_ivars *ivars; int i, count, error; char *statepath; - xb_init_comms(); - xs_resume(); - /* * We must re-examine each device and find the new path for * its backend. @@ -427,32 +622,26 @@ ivars = device_get_ivars(kids[i]); - unregister_xenbus_watch( - &ivars->xd_otherend_watch); + xs_unregister_watch(&ivars->xd_otherend_watch); ivars->xd_state = XenbusStateInitialising; /* * Find the new backend details and * re-register our watch. */ - free(ivars->xd_otherend_path, M_DEVBUF); - error = xenbus_gather(XBT_NIL, ivars->xd_node, - "backend-id", "%i", &ivars->xd_otherend_id, - "backend", NULL, &ivars->xd_otherend_path, - NULL); + error = XENBUSB_GET_OTHEREND_NODE(dev, ivars); if (error) return (error); DEVICE_RESUME(kids[i]); statepath = malloc(strlen(ivars->xd_otherend_path) - + strlen("/state") + 1, M_DEVBUF, M_WAITOK); + + strlen("/state") + 1, M_XENBUS, M_WAITOK); sprintf(statepath, "%s/state", ivars->xd_otherend_path); - free(ivars->xd_otherend_watch.node, M_DEVBUF); + free(ivars->xd_otherend_watch.node, M_XENBUS); ivars->xd_otherend_watch.node = statepath; - register_xenbus_watch( - &ivars->xd_otherend_watch); + xs_register_watch(&ivars->xd_otherend_watch); #if 0 /* @@ -475,8 +664,8 @@ return (0); } -static int -xenbus_print_child(device_t dev, device_t child) +int +xenbusb_print_child(device_t dev, device_t child) { struct xenbus_device_ivars *ivars = device_get_ivars(child); int retval = 0; @@ -488,9 +677,8 @@ return (retval); } -static int -xenbus_read_ivar(device_t dev, device_t child, int index, - uintptr_t * result) +int +xenbusb_read_ivar(device_t dev, device_t child, int index, uintptr_t *result) { struct xenbus_device_ivars *ivars = device_get_ivars(child); @@ -519,38 +707,62 @@ return (ENOENT); } -static int -xenbus_write_ivar(device_t dev, device_t child, int index, uintptr_t value) +int +xenbusb_write_ivar(device_t dev, device_t child, int index, uintptr_t value) { struct xenbus_device_ivars *ivars = device_get_ivars(child); enum xenbus_state newstate; int currstate; - int error; switch (index) { case XENBUS_IVAR_STATE: + { + int error; + newstate = (enum xenbus_state) value; sx_xlock(&ivars->xd_lock); - if (ivars->xd_state == newstate) + if (ivars->xd_state == newstate) { + error = 0; goto out; + } - error = xenbus_scanf(XBT_NIL, ivars->xd_node, "state", + error = xs_scanf(XST_NIL, ivars->xd_node, "state", NULL, "%d", &currstate); if (error) goto out; - error = xenbus_printf(XBT_NIL, ivars->xd_node, "state", - "%d", newstate); + do { + error = xs_printf(XST_NIL, ivars->xd_node, "state", + "%d", newstate); + } while (error == EAGAIN); if (error) { - if (newstate != XenbusStateClosing) /* Avoid looping */ - xenbus_dev_fatal(dev, error, "writing new state"); + /* + * Avoid looping through xenbus_dev_fatal() + * which calls xenbus_write_ivar to set the + * state to closing. + */ + if (newstate != XenbusStateClosing) + xenbus_dev_fatal(dev, error, + "writing new state"); goto out; } ivars->xd_state = newstate; + + if ((ivars->xd_flags & XDF_CONNECTING) != 0 + && (newstate == XenbusStateClosed + || newstate == XenbusStateConnected)) { + struct xenbusb_softc *xbs; + + ivars->xd_flags &= ~XDF_CONNECTING; + xbs = device_get_softc(dev); + xenbusb_release_confighook(xbs); + } + wakeup(&ivars->xd_state); out: sx_xunlock(&ivars->xd_lock); - return (0); + return (error); + } case XENBUS_IVAR_NODE: case XENBUS_IVAR_TYPE: @@ -564,39 +776,3 @@ return (ENOENT); } - -SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen"); -SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xen_store_evtchn, 0, ""); -SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, ""); - -static device_method_t xenbus_methods[] = { - /* Device interface */ - DEVMETHOD(device_identify, xenbus_identify), - DEVMETHOD(device_probe, xenbus_probe), - DEVMETHOD(device_attach, xenbus_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, xenbus_suspend), - DEVMETHOD(device_resume, xenbus_resume), - - /* Bus interface */ - DEVMETHOD(bus_print_child, xenbus_print_child), - DEVMETHOD(bus_read_ivar, xenbus_read_ivar), - DEVMETHOD(bus_write_ivar, xenbus_write_ivar), - - { 0, 0 } -}; - -static char driver_name[] = "xenbus"; -static driver_t xenbus_driver = { - driver_name, - xenbus_methods, - sizeof(struct xenbus_softc), -}; -devclass_t xenbus_devclass; - -#ifdef XENHVM -DRIVER_MODULE(xenbus, xenpci, xenbus_driver, xenbus_devclass, 0, 0); -#else -DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0); -#endif diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenbus/xenbusb.c.orig head.xen/sys/xen/xenbus/xenbusb.c.orig --- head.moves/sys/xen/xenbus/xenbusb.c.orig 1969-12-31 17:00:00.000000000 -0700 +++ head.xen/sys/xen/xenbus/xenbusb.c.orig 2010-09-16 17:07:40.945712835 -0600 @@ -0,0 +1,602 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2008 Doug Rabson + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005 XenSource Ltd + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#if 0 +#define DPRINTK(fmt, args...) \ + printf("xenbus_probe (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTK(fmt, args...) ((void)0) +#endif + +#include +__FBSDID("$FreeBSD: head/sys/xen/xenbus/xenbusb.c -1 $"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +struct xenbus_softc { + struct xenbus_watch xs_devicewatch; + struct task xs_probechildren; + struct intr_config_hook xs_attachcb; + device_t xs_dev; +}; + +struct xenbus_device_ivars { + struct xenbus_watch xd_otherend_watch; /* must be first */ + struct sx xd_lock; + device_t xd_dev; + char *xd_node; /* node name in xenstore */ + char *xd_type; /* xen device type */ + enum xenbus_state xd_state; + int xd_otherend_id; + char *xd_otherend_path; +}; + +/* Simplified asprintf. */ +char * +kasprintf(const char *fmt, ...) +{ + va_list ap; + unsigned int len; + char *p, dummy[1]; + + va_start(ap, fmt); + /* FIXME: vsnprintf has a bug, NULL should work */ + len = vsnprintf(dummy, 0, fmt, ap); + va_end(ap); + + p = malloc(len + 1, M_DEVBUF, M_WAITOK); + va_start(ap, fmt); + vsprintf(p, fmt, ap); + va_end(ap); + return p; +} + +static void +xenbus_identify(driver_t *driver, device_t parent) +{ + + BUS_ADD_CHILD(parent, 0, "xenbus", 0); +} + +static int +xenbus_probe(device_t dev) +{ + int err = 0; + + DPRINTK(""); + + /* Initialize the interface to xenstore. */ + err = xs_init(); + if (err) { + log(LOG_WARNING, + "XENBUS: Error initializing xenstore comms: %i\n", err); + return (ENXIO); + } + err = gnttab_init(); + if (err) { + log(LOG_WARNING, + "XENBUS: Error initializing grant table: %i\n", err); + return (ENXIO); + } + device_set_desc(dev, "Xen Devices"); + + return (0); +} + +static enum xenbus_state +xenbus_otherend_state(struct xenbus_device_ivars *ivars) +{ + + return (xenbus_read_driver_state(ivars->xd_otherend_path)); +} + +static void +xenbus_backend_changed(struct xenbus_watch *watch, const char **vec, + unsigned int len) +{ + struct xenbus_device_ivars *ivars; + device_t dev; + enum xenbus_state newstate; + + ivars = (struct xenbus_device_ivars *) watch; + dev = ivars->xd_dev; + + if (!ivars->xd_otherend_path + || strncmp(ivars->xd_otherend_path, vec[XS_WATCH_PATH], + strlen(ivars->xd_otherend_path))) + return; + + newstate = xenbus_otherend_state(ivars); + XENBUS_BACKEND_CHANGED(dev, newstate); +} + +static int +xenbus_device_exists(device_t dev, const char *node) +{ + device_t *kids; + struct xenbus_device_ivars *ivars; + int i, count, result; + + if (device_get_children(dev, &kids, &count)) + return (FALSE); + + result = FALSE; + for (i = 0; i < count; i++) { + ivars = device_get_ivars(kids[i]); + if (!strcmp(ivars->xd_node, node)) { + result = TRUE; + break; + } + } + free(kids, M_TEMP); + + return (result); +} + +static int +xenbus_add_device(device_t dev, const char *bus, + const char *type, const char *id) +{ + device_t child; + struct xenbus_device_ivars *ivars; + enum xenbus_state state; + char *statepath; + int error; + + ivars = malloc(sizeof(struct xenbus_device_ivars), + M_DEVBUF, M_ZERO|M_WAITOK); + ivars->xd_node = kasprintf("%s/%s/%s", bus, type, id); + + if (xenbus_device_exists(dev, ivars->xd_node)) { + /* + * We are already tracking this node + */ + free(ivars->xd_node, M_DEVBUF); + free(ivars, M_DEVBUF); + return (0); + } + + state = xenbus_read_driver_state(ivars->xd_node); + + if (state != XenbusStateInitialising) { + /* + * Device is not new, so ignore it. This can + * happen if a device is going away after + * switching to Closed. + */ + free(ivars->xd_node, M_DEVBUF); + free(ivars, M_DEVBUF); + return (0); + } + + /* + * Find the backend details + */ + error = xenbus_gather(XBT_NIL, ivars->xd_node, + "backend-id", "%i", &ivars->xd_otherend_id, + "backend", NULL, &ivars->xd_otherend_path, + NULL); + if (error) + return (error); + + sx_init(&ivars->xd_lock, "xdlock"); + ivars->xd_type = strdup(type, M_DEVBUF); + ivars->xd_state = XenbusStateInitialising; + + statepath = malloc(strlen(ivars->xd_otherend_path) + + strlen("/state") + 1, M_DEVBUF, M_WAITOK); + sprintf(statepath, "%s/state", ivars->xd_otherend_path); + + ivars->xd_otherend_watch.node = statepath; + ivars->xd_otherend_watch.callback = xenbus_backend_changed; + + child = device_add_child(dev, NULL, -1); + ivars->xd_dev = child; + device_set_ivars(child, ivars); + + return (0); +} + +static int +xenbus_enumerate_type(device_t dev, const char *bus, const char *type) +{ + char **dir; + unsigned int i, count; + int error; + + error = xenbus_directory(XBT_NIL, bus, type, &count, &dir); + if (error) + return (error); + for (i = 0; i < count; i++) + xenbus_add_device(dev, bus, type, dir[i]); + + free(dir, M_DEVBUF); + + return (0); +} + +static int +xenbus_enumerate_bus(device_t dev, const char *bus) +{ + char **dir; + unsigned int i, count; + int error; + + error = xenbus_directory(XBT_NIL, bus, "", &count, &dir); + if (error) + return (error); + for (i = 0; i < count; i++) { + xenbus_enumerate_type(dev, bus, dir[i]); + } + free(dir, M_DEVBUF); + + return (0); +} + +static int +xenbus_probe_children(device_t dev) +{ + device_t *kids; + struct xenbus_device_ivars *ivars; + int i, count; + + /* + * Probe any new devices and register watches for any that + * attach successfully. Since part of the protocol which + * establishes a connection with the other end is interrupt + * driven, we sleep until the device reaches a stable state + * (closed or connected). + */ + if (device_get_children(dev, &kids, &count) == 0) { + for (i = 0; i < count; i++) { + if (device_get_state(kids[i]) != DS_NOTPRESENT) + continue; + + if (device_probe_and_attach(kids[i])) + continue; + ivars = device_get_ivars(kids[i]); + register_xenbus_watch( + &ivars->xd_otherend_watch); + sx_xlock(&ivars->xd_lock); + while (ivars->xd_state != XenbusStateClosed + && ivars->xd_state != XenbusStateConnected) + sx_sleep(&ivars->xd_state, &ivars->xd_lock, + 0, "xdattach", 0); + sx_xunlock(&ivars->xd_lock); + } + free(kids, M_TEMP); + } + + return (0); +} + +static void +xenbus_probe_children_cb(void *arg, int pending) +{ + device_t dev = (device_t) arg; + + xenbus_probe_children(dev); +} + +static void +xenbus_devices_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + struct xenbus_softc *sc = (struct xenbus_softc *) watch; + device_t dev = sc->xs_dev; + char *node, *bus, *type, *id, *p; + + node = strdup(vec[XS_WATCH_PATH], M_DEVBUF); + p = strchr(node, '/'); + if (!p) + goto out; + bus = node; + *p = 0; + type = p + 1; + + p = strchr(type, '/'); + if (!p) + goto out; + *p = 0; + id = p + 1; + + p = strchr(id, '/'); + if (p) + *p = 0; + + xenbus_add_device(dev, bus, type, id); + taskqueue_enqueue(taskqueue_thread, &sc->xs_probechildren); +out: + free(node, M_DEVBUF); +} + +static void +xenbus_attach_deferred(void *arg) +{ + device_t dev = (device_t) arg; + struct xenbus_softc *sc = device_get_softc(dev); + int error; + + error = xenbus_enumerate_bus(dev, "device"); + if (error) + return; + xenbus_probe_children(dev); + + sc->xs_dev = dev; + sc->xs_devicewatch.node = "device"; + sc->xs_devicewatch.callback = xenbus_devices_changed; + + TASK_INIT(&sc->xs_probechildren, 0, xenbus_probe_children_cb, dev); + + register_xenbus_watch(&sc->xs_devicewatch); + + config_intrhook_disestablish(&sc->xs_attachcb); +} + +static int +xenbus_attach(device_t dev) +{ + struct xenbus_softc *sc = device_get_softc(dev); + + sc->xs_attachcb.ich_func = xenbus_attach_deferred; + sc->xs_attachcb.ich_arg = dev; + config_intrhook_establish(&sc->xs_attachcb); + + return (0); +} + +static int +xenbus_suspend(device_t dev) +{ + int error; + + DPRINTK(""); + + error = bus_generic_suspend(dev); + if (error) + return (error); + + xs_suspend(); + + return (0); +} + +static int +xenbus_resume(device_t dev) +{ + device_t *kids; + struct xenbus_device_ivars *ivars; + int i, count, error; + char *statepath; + + xb_init_comms(); + xs_resume(); + + /* + * We must re-examine each device and find the new path for + * its backend. + */ + if (device_get_children(dev, &kids, &count) == 0) { + for (i = 0; i < count; i++) { + if (device_get_state(kids[i]) == DS_NOTPRESENT) + continue; + + ivars = device_get_ivars(kids[i]); + + unregister_xenbus_watch( + &ivars->xd_otherend_watch); + ivars->xd_state = XenbusStateInitialising; + + /* + * Find the new backend details and + * re-register our watch. + */ + free(ivars->xd_otherend_path, M_DEVBUF); + error = xenbus_gather(XBT_NIL, ivars->xd_node, + "backend-id", "%i", &ivars->xd_otherend_id, + "backend", NULL, &ivars->xd_otherend_path, + NULL); + if (error) + return (error); + + DEVICE_RESUME(kids[i]); + + statepath = malloc(strlen(ivars->xd_otherend_path) + + strlen("/state") + 1, M_DEVBUF, M_WAITOK); + sprintf(statepath, "%s/state", ivars->xd_otherend_path); + + free(ivars->xd_otherend_watch.node, M_DEVBUF); + ivars->xd_otherend_watch.node = statepath; + register_xenbus_watch( + &ivars->xd_otherend_watch); + +#if 0 + /* + * Can't do this yet since we are running in + * the xenwatch thread and if we sleep here, + * we will stop delivering watch notifications + * and the device will never come back online. + */ + sx_xlock(&ivars->xd_lock); + while (ivars->xd_state != XenbusStateClosed + && ivars->xd_state != XenbusStateConnected) + sx_sleep(&ivars->xd_state, &ivars->xd_lock, + 0, "xdresume", 0); + sx_xunlock(&ivars->xd_lock); +#endif + } + free(kids, M_TEMP); + } + + return (0); +} + +static int +xenbus_print_child(device_t dev, device_t child) +{ + struct xenbus_device_ivars *ivars = device_get_ivars(child); + int retval = 0; + + retval += bus_print_child_header(dev, child); + retval += printf(" at %s", ivars->xd_node); + retval += bus_print_child_footer(dev, child); + + return (retval); +} + +static int +xenbus_read_ivar(device_t dev, device_t child, int index, + uintptr_t * result) +{ + struct xenbus_device_ivars *ivars = device_get_ivars(child); + + switch (index) { + case XENBUS_IVAR_NODE: + *result = (uintptr_t) ivars->xd_node; + return (0); + + case XENBUS_IVAR_TYPE: + *result = (uintptr_t) ivars->xd_type; + return (0); + + case XENBUS_IVAR_STATE: + *result = (uintptr_t) ivars->xd_state; + return (0); + + case XENBUS_IVAR_OTHEREND_ID: + *result = (uintptr_t) ivars->xd_otherend_id; + return (0); + + case XENBUS_IVAR_OTHEREND_PATH: + *result = (uintptr_t) ivars->xd_otherend_path; + return (0); + } + + return (ENOENT); +} + +static int +xenbus_write_ivar(device_t dev, device_t child, int index, uintptr_t value) +{ + struct xenbus_device_ivars *ivars = device_get_ivars(child); + enum xenbus_state newstate; + int currstate; + int error; + + switch (index) { + case XENBUS_IVAR_STATE: + newstate = (enum xenbus_state) value; + sx_xlock(&ivars->xd_lock); + if (ivars->xd_state == newstate) + goto out; + + error = xenbus_scanf(XBT_NIL, ivars->xd_node, "state", + NULL, "%d", &currstate); + if (error) + goto out; + + error = xenbus_printf(XBT_NIL, ivars->xd_node, "state", + "%d", newstate); + if (error) { + if (newstate != XenbusStateClosing) /* Avoid looping */ + xenbus_dev_fatal(dev, error, "writing new state"); + goto out; + } + ivars->xd_state = newstate; + wakeup(&ivars->xd_state); + out: + sx_xunlock(&ivars->xd_lock); + return (0); + + case XENBUS_IVAR_NODE: + case XENBUS_IVAR_TYPE: + case XENBUS_IVAR_OTHEREND_ID: + case XENBUS_IVAR_OTHEREND_PATH: + /* + * These variables are read-only. + */ + return (EINVAL); + } + + return (ENOENT); +} + +SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen"); +SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xen_store_evtchn, 0, ""); +SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, ""); + +static device_method_t xenbus_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, xenbus_identify), + DEVMETHOD(device_probe, xenbus_probe), + DEVMETHOD(device_attach, xenbus_attach), + DEVMETHOD(device_detach, bus_generic_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, xenbus_suspend), + DEVMETHOD(device_resume, xenbus_resume), + + /* Bus interface */ + DEVMETHOD(bus_print_child, xenbus_print_child), + DEVMETHOD(bus_read_ivar, xenbus_read_ivar), + DEVMETHOD(bus_write_ivar, xenbus_write_ivar), + + { 0, 0 } +}; + +static char driver_name[] = "xenbus"; +static driver_t xenbus_driver = { + driver_name, + xenbus_methods, + sizeof(struct xenbus_softc), +}; +devclass_t xenbus_devclass; + +#ifdef XENHVM +DRIVER_MODULE(xenbus, xenpci, xenbus_driver, xenbus_devclass, 0, 0); +#else +DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0); +#endif diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenbus/xenbusb.h head.xen/sys/xen/xenbus/xenbusb.h --- head.moves/sys/xen/xenbus/xenbusb.h 2010-09-17 07:59:40.076350806 -0600 +++ head.xen/sys/xen/xenbus/xenbusb.h 2010-09-16 17:23:24.811402749 -0600 @@ -1,602 +1,275 @@ -/****************************************************************************** - * Talks to Xen Store to figure out what devices we have. +/*- + * Core definitions and data structures shareable across OS platforms. * + * Copyright (c) 2010 Justin T. Gibbs, Spectra Logic Corporation * Copyright (C) 2008 Doug Rabson - * Copyright (C) 2005 Rusty Russell, IBM Corporation - * Copyright (C) 2005 Mike Wray, Hewlett-Packard - * Copyright (C) 2005 XenSource Ltd - * - * This file may be distributed separately from the Linux kernel, or - * incorporated into other software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#if 0 -#define DPRINTK(fmt, args...) \ - printf("xenbus_probe (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) -#else -#define DPRINTK(fmt, args...) ((void)0) -#endif - -#include -__FBSDID("$FreeBSD: head/sys/xen/xenbus/xenbusb.h -1 $"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -struct xenbus_softc { - struct xenbus_watch xs_devicewatch; - struct task xs_probechildren; - struct intr_config_hook xs_attachcb; - device_t xs_dev; + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * $FreeBSD$ + */ +#ifndef _XEN_XENBUS_XENBUSB_H +#define _XEN_XENBUS_XENBUSB_H + +/** + * \file xenbusb.h + * + * Datastructures and function declarations for use in implementing + * bus attachements (e.g. frontend and backend device busses) for XenBus. + */ +#include "xenbusb_if.h" + +/** + * Enumeration of state flag values for the xbs_flags field of + * the xenbusb_softc structure. + */ +typedef enum { + /** */ + XBS_ATTACH_CH_ACTIVE = 0x01 +} xenbusb_softc_flag; + +/** + * \brief Container for all state needed to manage a Xenbus Bus + * attachment. + */ +struct xenbusb_softc { + /** + * XenStore watch used to monitor the subtree of the + * XenStore where devices for this bus attachment arrive + * and depart. + * + * \note This field must be the first in the softc structure + * so that a simple cast can be used to retrieve the + * softc from within a XenStore watch event callback. + */ + struct xs_watch xbs_device_watch; + + /** Mutex used to protect fields of the xenbusb_softc. */ + struct mtx xbs_lock; + + /** State flags. */ + xenbusb_softc_flag xbs_flags; + + /** + * A dedicated task for processing child arrival and + * departure events. + */ + struct task xbs_probe_children; + + /** + * Config Hook used to block boot processing until + * XenBus devices complete their connection processing + * with other VMs. + */ + struct intr_config_hook xbs_attach_ch; + + /** + * The number of children for this bus that are still + * in the connecting (to other VMs) state. This variable + * is used to determine when to release xbs_attach_ch. + */ + u_int xbs_connecting_children; + + /** The NewBus device_t for this bus attachment. */ + device_t xbs_dev; + + /** + * The VM relative path to the XenStore subtree this + * bus attachment manages. + */ + const char *xbs_node; + + /** + * The number of path components (strings separated by the '/' + * character) that make up the device ID on this bus. + */ + u_int xbs_id_components; }; +/** + * Enumeration of state flag values for the xbs_flags field of + * the xenbusb_softc structure. + */ +typedef enum { + + /** + * This device is contributing to the xbs_connecting_children + * count of its parent bus. + */ + XDF_CONNECTING = 0x01 +} xenbus_dev_flag; + +/** Instance variables for devices on a XenBus bus. */ struct xenbus_device_ivars { - struct xenbus_watch xd_otherend_watch; /* must be first */ - struct sx xd_lock; - device_t xd_dev; - char *xd_node; /* node name in xenstore */ - char *xd_type; /* xen device type */ - enum xenbus_state xd_state; - int xd_otherend_id; - char *xd_otherend_path; + /** + * XenStore watch used to monitor the subtree of the + * XenStore where information about the otherend of + * the split Xen device this device instance represents. + * + * \note This field must be the first in the instance + * variable structure so that a simple cast can be + * used to retrieve ivar data from within a XenStore + * watch event callback. + */ + struct xs_watch xd_otherend_watch; + + /** Sleepable lock used to protect instance data. */ + struct sx xd_lock; + + /** State flags. */ + xenbus_dev_flag xd_flags; + + /** The NewBus device_t for this XenBus device instance. */ + device_t xd_dev; + + /** + * The VM relative path to the XenStore subtree representing + * this VMs half of this device. + */ + char *xd_node; + + /** XenBus device type ("vbd", "vif", etc.). */ + char *xd_type; + + /** XenBus device identifier (eg. "832", "6/768") */ + char *xd_devid; + + /** + * Cached version of /state node in the XenStore. + */ + enum xenbus_state xd_state; + + /** The VM identifier of the other end of this split device. */ + int xd_otherend_id; + + /** + * The path to the subtree of the XenStore where information + * about the otherend of this split device instance. + */ + char *xd_otherend_path; }; -/* Simplified asprintf. */ -char * -kasprintf(const char *fmt, ...) -{ - va_list ap; - unsigned int len; - char *p, dummy[1]; - - va_start(ap, fmt); - /* FIXME: vsnprintf has a bug, NULL should work */ - len = vsnprintf(dummy, 0, fmt, ap); - va_end(ap); - - p = malloc(len + 1, M_DEVBUF, M_WAITOK); - va_start(ap, fmt); - vsprintf(p, fmt, ap); - va_end(ap); - return p; -} - -static void -xenbus_identify(driver_t *driver, device_t parent) -{ - - BUS_ADD_CHILD(parent, 0, "xenbus", 0); -} - -static int -xenbus_probe(device_t dev) -{ - int err = 0; - - DPRINTK(""); - - /* Initialize the interface to xenstore. */ - err = xs_init(); - if (err) { - log(LOG_WARNING, - "XENBUS: Error initializing xenstore comms: %i\n", err); - return (ENXIO); - } - err = gnttab_init(); - if (err) { - log(LOG_WARNING, - "XENBUS: Error initializing grant table: %i\n", err); - return (ENXIO); - } - device_set_desc(dev, "Xen Devices"); - - return (0); -} - -static enum xenbus_state -xenbus_otherend_state(struct xenbus_device_ivars *ivars) -{ - - return (xenbus_read_driver_state(ivars->xd_otherend_path)); -} - -static void -xenbus_backend_changed(struct xenbus_watch *watch, const char **vec, - unsigned int len) -{ - struct xenbus_device_ivars *ivars; - device_t dev; - enum xenbus_state newstate; - - ivars = (struct xenbus_device_ivars *) watch; - dev = ivars->xd_dev; - - if (!ivars->xd_otherend_path - || strncmp(ivars->xd_otherend_path, vec[XS_WATCH_PATH], - strlen(ivars->xd_otherend_path))) - return; - - newstate = xenbus_otherend_state(ivars); - XENBUS_BACKEND_CHANGED(dev, newstate); -} - -static int -xenbus_device_exists(device_t dev, const char *node) -{ - device_t *kids; - struct xenbus_device_ivars *ivars; - int i, count, result; - - if (device_get_children(dev, &kids, &count)) - return (FALSE); - - result = FALSE; - for (i = 0; i < count; i++) { - ivars = device_get_ivars(kids[i]); - if (!strcmp(ivars->xd_node, node)) { - result = TRUE; - break; - } - } - free(kids, M_TEMP); - - return (result); -} - -static int -xenbus_add_device(device_t dev, const char *bus, - const char *type, const char *id) -{ - device_t child; - struct xenbus_device_ivars *ivars; - enum xenbus_state state; - char *statepath; - int error; - - ivars = malloc(sizeof(struct xenbus_device_ivars), - M_DEVBUF, M_ZERO|M_WAITOK); - ivars->xd_node = kasprintf("%s/%s/%s", bus, type, id); - - if (xenbus_device_exists(dev, ivars->xd_node)) { - /* - * We are already tracking this node - */ - free(ivars->xd_node, M_DEVBUF); - free(ivars, M_DEVBUF); - return (0); - } - - state = xenbus_read_driver_state(ivars->xd_node); - - if (state != XenbusStateInitialising) { - /* - * Device is not new, so ignore it. This can - * happen if a device is going away after - * switching to Closed. - */ - free(ivars->xd_node, M_DEVBUF); - free(ivars, M_DEVBUF); - return (0); - } - - /* - * Find the backend details - */ - error = xenbus_gather(XBT_NIL, ivars->xd_node, - "backend-id", "%i", &ivars->xd_otherend_id, - "backend", NULL, &ivars->xd_otherend_path, - NULL); - if (error) - return (error); - - sx_init(&ivars->xd_lock, "xdlock"); - ivars->xd_type = strdup(type, M_DEVBUF); - ivars->xd_state = XenbusStateInitialising; - - statepath = malloc(strlen(ivars->xd_otherend_path) - + strlen("/state") + 1, M_DEVBUF, M_WAITOK); - sprintf(statepath, "%s/state", ivars->xd_otherend_path); - - ivars->xd_otherend_watch.node = statepath; - ivars->xd_otherend_watch.callback = xenbus_backend_changed; - - child = device_add_child(dev, NULL, -1); - ivars->xd_dev = child; - device_set_ivars(child, ivars); - - return (0); -} - -static int -xenbus_enumerate_type(device_t dev, const char *bus, const char *type) -{ - char **dir; - unsigned int i, count; - int error; - - error = xenbus_directory(XBT_NIL, bus, type, &count, &dir); - if (error) - return (error); - for (i = 0; i < count; i++) - xenbus_add_device(dev, bus, type, dir[i]); - - free(dir, M_DEVBUF); - - return (0); -} - -static int -xenbus_enumerate_bus(device_t dev, const char *bus) -{ - char **dir; - unsigned int i, count; - int error; - - error = xenbus_directory(XBT_NIL, bus, "", &count, &dir); - if (error) - return (error); - for (i = 0; i < count; i++) { - xenbus_enumerate_type(dev, bus, dir[i]); - } - free(dir, M_DEVBUF); - - return (0); -} - -static int -xenbus_probe_children(device_t dev) -{ - device_t *kids; - struct xenbus_device_ivars *ivars; - int i, count; - - /* - * Probe any new devices and register watches for any that - * attach successfully. Since part of the protocol which - * establishes a connection with the other end is interrupt - * driven, we sleep until the device reaches a stable state - * (closed or connected). - */ - if (device_get_children(dev, &kids, &count) == 0) { - for (i = 0; i < count; i++) { - if (device_get_state(kids[i]) != DS_NOTPRESENT) - continue; - - if (device_probe_and_attach(kids[i])) - continue; - ivars = device_get_ivars(kids[i]); - register_xenbus_watch( - &ivars->xd_otherend_watch); - sx_xlock(&ivars->xd_lock); - while (ivars->xd_state != XenbusStateClosed - && ivars->xd_state != XenbusStateConnected) - sx_sleep(&ivars->xd_state, &ivars->xd_lock, - 0, "xdattach", 0); - sx_xunlock(&ivars->xd_lock); - } - free(kids, M_TEMP); - } - - return (0); -} - -static void -xenbus_probe_children_cb(void *arg, int pending) -{ - device_t dev = (device_t) arg; - - xenbus_probe_children(dev); -} - -static void -xenbus_devices_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) -{ - struct xenbus_softc *sc = (struct xenbus_softc *) watch; - device_t dev = sc->xs_dev; - char *node, *bus, *type, *id, *p; - - node = strdup(vec[XS_WATCH_PATH], M_DEVBUF); - p = strchr(node, '/'); - if (!p) - goto out; - bus = node; - *p = 0; - type = p + 1; - - p = strchr(type, '/'); - if (!p) - goto out; - *p = 0; - id = p + 1; - - p = strchr(id, '/'); - if (p) - *p = 0; - - xenbus_add_device(dev, bus, type, id); - taskqueue_enqueue(taskqueue_thread, &sc->xs_probechildren); -out: - free(node, M_DEVBUF); -} - -static void -xenbus_attach_deferred(void *arg) -{ - device_t dev = (device_t) arg; - struct xenbus_softc *sc = device_get_softc(dev); - int error; - - error = xenbus_enumerate_bus(dev, "device"); - if (error) - return; - xenbus_probe_children(dev); - - sc->xs_dev = dev; - sc->xs_devicewatch.node = "device"; - sc->xs_devicewatch.callback = xenbus_devices_changed; - - TASK_INIT(&sc->xs_probechildren, 0, xenbus_probe_children_cb, dev); - - register_xenbus_watch(&sc->xs_devicewatch); - - config_intrhook_disestablish(&sc->xs_attachcb); -} - -static int -xenbus_attach(device_t dev) -{ - struct xenbus_softc *sc = device_get_softc(dev); - - sc->xs_attachcb.ich_func = xenbus_attach_deferred; - sc->xs_attachcb.ich_arg = dev; - config_intrhook_establish(&sc->xs_attachcb); - - return (0); -} - -static int -xenbus_suspend(device_t dev) -{ - int error; - - DPRINTK(""); - - error = bus_generic_suspend(dev); - if (error) - return (error); - - xs_suspend(); - - return (0); -} - -static int -xenbus_resume(device_t dev) -{ - device_t *kids; - struct xenbus_device_ivars *ivars; - int i, count, error; - char *statepath; - - xb_init_comms(); - xs_resume(); - - /* - * We must re-examine each device and find the new path for - * its backend. - */ - if (device_get_children(dev, &kids, &count) == 0) { - for (i = 0; i < count; i++) { - if (device_get_state(kids[i]) == DS_NOTPRESENT) - continue; - - ivars = device_get_ivars(kids[i]); - - unregister_xenbus_watch( - &ivars->xd_otherend_watch); - ivars->xd_state = XenbusStateInitialising; - - /* - * Find the new backend details and - * re-register our watch. - */ - free(ivars->xd_otherend_path, M_DEVBUF); - error = xenbus_gather(XBT_NIL, ivars->xd_node, - "backend-id", "%i", &ivars->xd_otherend_id, - "backend", NULL, &ivars->xd_otherend_path, - NULL); - if (error) - return (error); - - DEVICE_RESUME(kids[i]); - - statepath = malloc(strlen(ivars->xd_otherend_path) - + strlen("/state") + 1, M_DEVBUF, M_WAITOK); - sprintf(statepath, "%s/state", ivars->xd_otherend_path); - - free(ivars->xd_otherend_watch.node, M_DEVBUF); - ivars->xd_otherend_watch.node = statepath; - register_xenbus_watch( - &ivars->xd_otherend_watch); - -#if 0 - /* - * Can't do this yet since we are running in - * the xenwatch thread and if we sleep here, - * we will stop delivering watch notifications - * and the device will never come back online. - */ - sx_xlock(&ivars->xd_lock); - while (ivars->xd_state != XenbusStateClosed - && ivars->xd_state != XenbusStateConnected) - sx_sleep(&ivars->xd_state, &ivars->xd_lock, - 0, "xdresume", 0); - sx_xunlock(&ivars->xd_lock); -#endif - } - free(kids, M_TEMP); - } - - return (0); -} - -static int -xenbus_print_child(device_t dev, device_t child) -{ - struct xenbus_device_ivars *ivars = device_get_ivars(child); - int retval = 0; - - retval += bus_print_child_header(dev, child); - retval += printf(" at %s", ivars->xd_node); - retval += bus_print_child_footer(dev, child); - - return (retval); -} - -static int -xenbus_read_ivar(device_t dev, device_t child, int index, - uintptr_t * result) -{ - struct xenbus_device_ivars *ivars = device_get_ivars(child); - - switch (index) { - case XENBUS_IVAR_NODE: - *result = (uintptr_t) ivars->xd_node; - return (0); - - case XENBUS_IVAR_TYPE: - *result = (uintptr_t) ivars->xd_type; - return (0); - - case XENBUS_IVAR_STATE: - *result = (uintptr_t) ivars->xd_state; - return (0); - - case XENBUS_IVAR_OTHEREND_ID: - *result = (uintptr_t) ivars->xd_otherend_id; - return (0); - - case XENBUS_IVAR_OTHEREND_PATH: - *result = (uintptr_t) ivars->xd_otherend_path; - return (0); - } - - return (ENOENT); -} - -static int -xenbus_write_ivar(device_t dev, device_t child, int index, uintptr_t value) -{ - struct xenbus_device_ivars *ivars = device_get_ivars(child); - enum xenbus_state newstate; - int currstate; - int error; - - switch (index) { - case XENBUS_IVAR_STATE: - newstate = (enum xenbus_state) value; - sx_xlock(&ivars->xd_lock); - if (ivars->xd_state == newstate) - goto out; - - error = xenbus_scanf(XBT_NIL, ivars->xd_node, "state", - NULL, "%d", &currstate); - if (error) - goto out; - - error = xenbus_printf(XBT_NIL, ivars->xd_node, "state", - "%d", newstate); - if (error) { - if (newstate != XenbusStateClosing) /* Avoid looping */ - xenbus_dev_fatal(dev, error, "writing new state"); - goto out; - } - ivars->xd_state = newstate; - wakeup(&ivars->xd_state); - out: - sx_xunlock(&ivars->xd_lock); - return (0); - - case XENBUS_IVAR_NODE: - case XENBUS_IVAR_TYPE: - case XENBUS_IVAR_OTHEREND_ID: - case XENBUS_IVAR_OTHEREND_PATH: - /* - * These variables are read-only. - */ - return (EINVAL); - } - - return (ENOENT); -} - -SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen"); -SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xen_store_evtchn, 0, ""); -SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, ""); - -static device_method_t xenbus_methods[] = { - /* Device interface */ - DEVMETHOD(device_identify, xenbus_identify), - DEVMETHOD(device_probe, xenbus_probe), - DEVMETHOD(device_attach, xenbus_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, xenbus_suspend), - DEVMETHOD(device_resume, xenbus_resume), - - /* Bus interface */ - DEVMETHOD(bus_print_child, xenbus_print_child), - DEVMETHOD(bus_read_ivar, xenbus_read_ivar), - DEVMETHOD(bus_write_ivar, xenbus_write_ivar), - - { 0, 0 } -}; - -static char driver_name[] = "xenbus"; -static driver_t xenbus_driver = { - driver_name, - xenbus_methods, - sizeof(struct xenbus_softc), -}; -devclass_t xenbus_devclass; - -#ifdef XENHVM -DRIVER_MODULE(xenbus, xenpci, xenbus_driver, xenbus_devclass, 0, 0); -#else -DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0); -#endif +/** + * \brief Identify instances of this device type in the system. + * + * \param driver The driver performing this identify action. + * \param parent The NewBus parent device for any devices this method adds. + */ +void xenbusb_identify(driver_t *driver __unused, device_t parent); + +/** + * \brief Perform common XenBus bus attach processing. + * + * \param dev The NewBus device representing this XenBus bus. + * \param bus_node The XenStore path to the XenStore subtree for + * this XenBus bus. + * \param id_components The number of '/' separated path components that + * make up a unique device ID on this XenBus bus. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * Intiailizes the softc for this bus, installs an interrupt driven + * configuration hook to block boot processing until XenBus devices fully + * configure, performs an initial probe/attach of the bus, and registers + * a XenStore watch so we are notified when the bus topology changes. + */ +int xenbusb_attach(device_t dev, char *bus_node, u_int id_components); + +/** + * \brief Perform common XenBus bus resume handling. + * + * \param dev The NewBus device representing this XenBus bus. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + */ +int xenbusb_resume(device_t dev); + +/** + * \brief Pretty-prints information about a child of a XenBus bus. + * + * \param dev The NewBus device representing this XenBus bus. + * \param child The NewBus device representing a child of dev%'s XenBus bus. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + */ +int xenbusb_print_child(device_t dev, device_t child); + +/** + * \brief Common XenBus child instance variable read access method. + * + * \param dev The NewBus device representing this XenBus bus. + * \param child The NewBus device representing a child of dev%'s XenBus bus. + * \param index The index of the instance variable to access. + * \param result The value of the instance variable accessed. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + */ +int xenbusb_read_ivar(device_t dev, device_t child, int index, + uintptr_t *result); + +/** + * \brief Common XenBus child instance variable write access method. + * + * \param dev The NewBus device representing this XenBus bus. + * \param child The NewBus device representing a child of dev%'s XenBus bus. + * \param index The index of the instance variable to access. + * \param value The new value to set in the instance variable accessed. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + */ +int xenbusb_write_ivar(device_t dev, device_t child, int index, + uintptr_t value); + +/** + * \brief Attempt to add a XenBus device instance to this XenBus bus. + * + * \param dev The NewBus device representing this XenBus bus. + * \param type The device type being added (e.g. "vbd", "vif"). + * \param id The device ID for this device. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. Failure indicates that either the + * path to this device no longer exists or insufficient + * information exists in the XenStore to create a new + * device. + * + * If successful, this routine will add a device_t with instance + * variable storage to the NewBus device topology. Probe/Attach + * processing is not performed by this routine, but must be scheduled + * via the xbs_probe_children task. This separation of responsibilities + * is required to avoid hanging up the XenStore event delivery thread + * with our probe/attach work in the event a device is added via + * a callback from the XenStore. + */ +int xenbusb_add_device(device_t dev, const char *type, const char *id); + +#endif /* _XEN_XENBUS_XENBUSB_H */ diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenbus/xenbusb.h.orig head.xen/sys/xen/xenbus/xenbusb.h.orig --- head.moves/sys/xen/xenbus/xenbusb.h.orig 1969-12-31 17:00:00.000000000 -0700 +++ head.xen/sys/xen/xenbus/xenbusb.h.orig 2010-09-16 17:13:36.826411487 -0600 @@ -0,0 +1,602 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2008 Doug Rabson + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005 XenSource Ltd + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#if 0 +#define DPRINTK(fmt, args...) \ + printf("xenbus_probe (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTK(fmt, args...) ((void)0) +#endif + +#include +__FBSDID("$FreeBSD: head/sys/xen/xenbus/xenbusb.h -1 $"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +struct xenbus_softc { + struct xenbus_watch xs_devicewatch; + struct task xs_probechildren; + struct intr_config_hook xs_attachcb; + device_t xs_dev; +}; + +struct xenbus_device_ivars { + struct xenbus_watch xd_otherend_watch; /* must be first */ + struct sx xd_lock; + device_t xd_dev; + char *xd_node; /* node name in xenstore */ + char *xd_type; /* xen device type */ + enum xenbus_state xd_state; + int xd_otherend_id; + char *xd_otherend_path; +}; + +/* Simplified asprintf. */ +char * +kasprintf(const char *fmt, ...) +{ + va_list ap; + unsigned int len; + char *p, dummy[1]; + + va_start(ap, fmt); + /* FIXME: vsnprintf has a bug, NULL should work */ + len = vsnprintf(dummy, 0, fmt, ap); + va_end(ap); + + p = malloc(len + 1, M_DEVBUF, M_WAITOK); + va_start(ap, fmt); + vsprintf(p, fmt, ap); + va_end(ap); + return p; +} + +static void +xenbus_identify(driver_t *driver, device_t parent) +{ + + BUS_ADD_CHILD(parent, 0, "xenbus", 0); +} + +static int +xenbus_probe(device_t dev) +{ + int err = 0; + + DPRINTK(""); + + /* Initialize the interface to xenstore. */ + err = xs_init(); + if (err) { + log(LOG_WARNING, + "XENBUS: Error initializing xenstore comms: %i\n", err); + return (ENXIO); + } + err = gnttab_init(); + if (err) { + log(LOG_WARNING, + "XENBUS: Error initializing grant table: %i\n", err); + return (ENXIO); + } + device_set_desc(dev, "Xen Devices"); + + return (0); +} + +static enum xenbus_state +xenbus_otherend_state(struct xenbus_device_ivars *ivars) +{ + + return (xenbus_read_driver_state(ivars->xd_otherend_path)); +} + +static void +xenbus_backend_changed(struct xenbus_watch *watch, const char **vec, + unsigned int len) +{ + struct xenbus_device_ivars *ivars; + device_t dev; + enum xenbus_state newstate; + + ivars = (struct xenbus_device_ivars *) watch; + dev = ivars->xd_dev; + + if (!ivars->xd_otherend_path + || strncmp(ivars->xd_otherend_path, vec[XS_WATCH_PATH], + strlen(ivars->xd_otherend_path))) + return; + + newstate = xenbus_otherend_state(ivars); + XENBUS_BACKEND_CHANGED(dev, newstate); +} + +static int +xenbus_device_exists(device_t dev, const char *node) +{ + device_t *kids; + struct xenbus_device_ivars *ivars; + int i, count, result; + + if (device_get_children(dev, &kids, &count)) + return (FALSE); + + result = FALSE; + for (i = 0; i < count; i++) { + ivars = device_get_ivars(kids[i]); + if (!strcmp(ivars->xd_node, node)) { + result = TRUE; + break; + } + } + free(kids, M_TEMP); + + return (result); +} + +static int +xenbus_add_device(device_t dev, const char *bus, + const char *type, const char *id) +{ + device_t child; + struct xenbus_device_ivars *ivars; + enum xenbus_state state; + char *statepath; + int error; + + ivars = malloc(sizeof(struct xenbus_device_ivars), + M_DEVBUF, M_ZERO|M_WAITOK); + ivars->xd_node = kasprintf("%s/%s/%s", bus, type, id); + + if (xenbus_device_exists(dev, ivars->xd_node)) { + /* + * We are already tracking this node + */ + free(ivars->xd_node, M_DEVBUF); + free(ivars, M_DEVBUF); + return (0); + } + + state = xenbus_read_driver_state(ivars->xd_node); + + if (state != XenbusStateInitialising) { + /* + * Device is not new, so ignore it. This can + * happen if a device is going away after + * switching to Closed. + */ + free(ivars->xd_node, M_DEVBUF); + free(ivars, M_DEVBUF); + return (0); + } + + /* + * Find the backend details + */ + error = xenbus_gather(XBT_NIL, ivars->xd_node, + "backend-id", "%i", &ivars->xd_otherend_id, + "backend", NULL, &ivars->xd_otherend_path, + NULL); + if (error) + return (error); + + sx_init(&ivars->xd_lock, "xdlock"); + ivars->xd_type = strdup(type, M_DEVBUF); + ivars->xd_state = XenbusStateInitialising; + + statepath = malloc(strlen(ivars->xd_otherend_path) + + strlen("/state") + 1, M_DEVBUF, M_WAITOK); + sprintf(statepath, "%s/state", ivars->xd_otherend_path); + + ivars->xd_otherend_watch.node = statepath; + ivars->xd_otherend_watch.callback = xenbus_backend_changed; + + child = device_add_child(dev, NULL, -1); + ivars->xd_dev = child; + device_set_ivars(child, ivars); + + return (0); +} + +static int +xenbus_enumerate_type(device_t dev, const char *bus, const char *type) +{ + char **dir; + unsigned int i, count; + int error; + + error = xenbus_directory(XBT_NIL, bus, type, &count, &dir); + if (error) + return (error); + for (i = 0; i < count; i++) + xenbus_add_device(dev, bus, type, dir[i]); + + free(dir, M_DEVBUF); + + return (0); +} + +static int +xenbus_enumerate_bus(device_t dev, const char *bus) +{ + char **dir; + unsigned int i, count; + int error; + + error = xenbus_directory(XBT_NIL, bus, "", &count, &dir); + if (error) + return (error); + for (i = 0; i < count; i++) { + xenbus_enumerate_type(dev, bus, dir[i]); + } + free(dir, M_DEVBUF); + + return (0); +} + +static int +xenbus_probe_children(device_t dev) +{ + device_t *kids; + struct xenbus_device_ivars *ivars; + int i, count; + + /* + * Probe any new devices and register watches for any that + * attach successfully. Since part of the protocol which + * establishes a connection with the other end is interrupt + * driven, we sleep until the device reaches a stable state + * (closed or connected). + */ + if (device_get_children(dev, &kids, &count) == 0) { + for (i = 0; i < count; i++) { + if (device_get_state(kids[i]) != DS_NOTPRESENT) + continue; + + if (device_probe_and_attach(kids[i])) + continue; + ivars = device_get_ivars(kids[i]); + register_xenbus_watch( + &ivars->xd_otherend_watch); + sx_xlock(&ivars->xd_lock); + while (ivars->xd_state != XenbusStateClosed + && ivars->xd_state != XenbusStateConnected) + sx_sleep(&ivars->xd_state, &ivars->xd_lock, + 0, "xdattach", 0); + sx_xunlock(&ivars->xd_lock); + } + free(kids, M_TEMP); + } + + return (0); +} + +static void +xenbus_probe_children_cb(void *arg, int pending) +{ + device_t dev = (device_t) arg; + + xenbus_probe_children(dev); +} + +static void +xenbus_devices_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + struct xenbus_softc *sc = (struct xenbus_softc *) watch; + device_t dev = sc->xs_dev; + char *node, *bus, *type, *id, *p; + + node = strdup(vec[XS_WATCH_PATH], M_DEVBUF); + p = strchr(node, '/'); + if (!p) + goto out; + bus = node; + *p = 0; + type = p + 1; + + p = strchr(type, '/'); + if (!p) + goto out; + *p = 0; + id = p + 1; + + p = strchr(id, '/'); + if (p) + *p = 0; + + xenbus_add_device(dev, bus, type, id); + taskqueue_enqueue(taskqueue_thread, &sc->xs_probechildren); +out: + free(node, M_DEVBUF); +} + +static void +xenbus_attach_deferred(void *arg) +{ + device_t dev = (device_t) arg; + struct xenbus_softc *sc = device_get_softc(dev); + int error; + + error = xenbus_enumerate_bus(dev, "device"); + if (error) + return; + xenbus_probe_children(dev); + + sc->xs_dev = dev; + sc->xs_devicewatch.node = "device"; + sc->xs_devicewatch.callback = xenbus_devices_changed; + + TASK_INIT(&sc->xs_probechildren, 0, xenbus_probe_children_cb, dev); + + register_xenbus_watch(&sc->xs_devicewatch); + + config_intrhook_disestablish(&sc->xs_attachcb); +} + +static int +xenbus_attach(device_t dev) +{ + struct xenbus_softc *sc = device_get_softc(dev); + + sc->xs_attachcb.ich_func = xenbus_attach_deferred; + sc->xs_attachcb.ich_arg = dev; + config_intrhook_establish(&sc->xs_attachcb); + + return (0); +} + +static int +xenbus_suspend(device_t dev) +{ + int error; + + DPRINTK(""); + + error = bus_generic_suspend(dev); + if (error) + return (error); + + xs_suspend(); + + return (0); +} + +static int +xenbus_resume(device_t dev) +{ + device_t *kids; + struct xenbus_device_ivars *ivars; + int i, count, error; + char *statepath; + + xb_init_comms(); + xs_resume(); + + /* + * We must re-examine each device and find the new path for + * its backend. + */ + if (device_get_children(dev, &kids, &count) == 0) { + for (i = 0; i < count; i++) { + if (device_get_state(kids[i]) == DS_NOTPRESENT) + continue; + + ivars = device_get_ivars(kids[i]); + + unregister_xenbus_watch( + &ivars->xd_otherend_watch); + ivars->xd_state = XenbusStateInitialising; + + /* + * Find the new backend details and + * re-register our watch. + */ + free(ivars->xd_otherend_path, M_DEVBUF); + error = xenbus_gather(XBT_NIL, ivars->xd_node, + "backend-id", "%i", &ivars->xd_otherend_id, + "backend", NULL, &ivars->xd_otherend_path, + NULL); + if (error) + return (error); + + DEVICE_RESUME(kids[i]); + + statepath = malloc(strlen(ivars->xd_otherend_path) + + strlen("/state") + 1, M_DEVBUF, M_WAITOK); + sprintf(statepath, "%s/state", ivars->xd_otherend_path); + + free(ivars->xd_otherend_watch.node, M_DEVBUF); + ivars->xd_otherend_watch.node = statepath; + register_xenbus_watch( + &ivars->xd_otherend_watch); + +#if 0 + /* + * Can't do this yet since we are running in + * the xenwatch thread and if we sleep here, + * we will stop delivering watch notifications + * and the device will never come back online. + */ + sx_xlock(&ivars->xd_lock); + while (ivars->xd_state != XenbusStateClosed + && ivars->xd_state != XenbusStateConnected) + sx_sleep(&ivars->xd_state, &ivars->xd_lock, + 0, "xdresume", 0); + sx_xunlock(&ivars->xd_lock); +#endif + } + free(kids, M_TEMP); + } + + return (0); +} + +static int +xenbus_print_child(device_t dev, device_t child) +{ + struct xenbus_device_ivars *ivars = device_get_ivars(child); + int retval = 0; + + retval += bus_print_child_header(dev, child); + retval += printf(" at %s", ivars->xd_node); + retval += bus_print_child_footer(dev, child); + + return (retval); +} + +static int +xenbus_read_ivar(device_t dev, device_t child, int index, + uintptr_t * result) +{ + struct xenbus_device_ivars *ivars = device_get_ivars(child); + + switch (index) { + case XENBUS_IVAR_NODE: + *result = (uintptr_t) ivars->xd_node; + return (0); + + case XENBUS_IVAR_TYPE: + *result = (uintptr_t) ivars->xd_type; + return (0); + + case XENBUS_IVAR_STATE: + *result = (uintptr_t) ivars->xd_state; + return (0); + + case XENBUS_IVAR_OTHEREND_ID: + *result = (uintptr_t) ivars->xd_otherend_id; + return (0); + + case XENBUS_IVAR_OTHEREND_PATH: + *result = (uintptr_t) ivars->xd_otherend_path; + return (0); + } + + return (ENOENT); +} + +static int +xenbus_write_ivar(device_t dev, device_t child, int index, uintptr_t value) +{ + struct xenbus_device_ivars *ivars = device_get_ivars(child); + enum xenbus_state newstate; + int currstate; + int error; + + switch (index) { + case XENBUS_IVAR_STATE: + newstate = (enum xenbus_state) value; + sx_xlock(&ivars->xd_lock); + if (ivars->xd_state == newstate) + goto out; + + error = xenbus_scanf(XBT_NIL, ivars->xd_node, "state", + NULL, "%d", &currstate); + if (error) + goto out; + + error = xenbus_printf(XBT_NIL, ivars->xd_node, "state", + "%d", newstate); + if (error) { + if (newstate != XenbusStateClosing) /* Avoid looping */ + xenbus_dev_fatal(dev, error, "writing new state"); + goto out; + } + ivars->xd_state = newstate; + wakeup(&ivars->xd_state); + out: + sx_xunlock(&ivars->xd_lock); + return (0); + + case XENBUS_IVAR_NODE: + case XENBUS_IVAR_TYPE: + case XENBUS_IVAR_OTHEREND_ID: + case XENBUS_IVAR_OTHEREND_PATH: + /* + * These variables are read-only. + */ + return (EINVAL); + } + + return (ENOENT); +} + +SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen"); +SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xen_store_evtchn, 0, ""); +SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, ""); + +static device_method_t xenbus_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, xenbus_identify), + DEVMETHOD(device_probe, xenbus_probe), + DEVMETHOD(device_attach, xenbus_attach), + DEVMETHOD(device_detach, bus_generic_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, xenbus_suspend), + DEVMETHOD(device_resume, xenbus_resume), + + /* Bus interface */ + DEVMETHOD(bus_print_child, xenbus_print_child), + DEVMETHOD(bus_read_ivar, xenbus_read_ivar), + DEVMETHOD(bus_write_ivar, xenbus_write_ivar), + + { 0, 0 } +}; + +static char driver_name[] = "xenbus"; +static driver_t xenbus_driver = { + driver_name, + xenbus_methods, + sizeof(struct xenbus_softc), +}; +devclass_t xenbus_devclass; + +#ifdef XENHVM +DRIVER_MODULE(xenbus, xenpci, xenbus_driver, xenbus_devclass, 0, 0); +#else +DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0); +#endif diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenbus/xenbusb_back.c head.xen/sys/xen/xenbus/xenbusb_back.c --- head.moves/sys/xen/xenbus/xenbusb_back.c 1969-12-31 17:00:00.000000000 -0700 +++ head.xen/sys/xen/xenbus/xenbusb_back.c 2010-09-16 17:18:05.744769421 -0600 @@ -0,0 +1,295 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2009, 2010 Justin Gibbs, Spectra Logic Corporation + * Copyright (C) 2008 Doug Rabson + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005 XenSource Ltd + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * \file xenbusb_back.c + * + * XenBus management of the NewBus bus containing the backend instances of + * Xen split devices. + */ +#include +__FBSDID("$FreeBSD: head/sys/xen/xenbus/xenbus_probe.c 201758 2010-01-07 21:01:37Z mbr $"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + + +/*------------------ Private Device Attachment Functions --------------------*/ +/** + * \brief Probe for the existance of the XenBus back bus. + * + * \param dev NewBus device_t for this XenBus back bus instance. + * + * \return Always returns 0 indicating success. + */ +static int +xenbusb_back_probe(device_t dev) +{ + device_set_desc(dev, "Xen Backend Devices"); + + return (0); +} + +/** + * \brief Attach the XenBus back bus. + * + * \param dev NewBus device_t for this XenBus back bus instance. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + */ +static int +xenbusb_back_attach(device_t dev) +{ + struct xenbusb_softc *xbs; + int error; + + xbs = device_get_softc(dev); + error = xenbusb_attach(dev, "backend", /*id_components*/2); + + /* + * Backend devices operate to serve other domains, + * so there is no need to hold up boot processing + * while connections to foreign domains are made. + */ + mtx_lock(&xbs->xbs_lock); + if ((xbs->xbs_flags & XBS_ATTACH_CH_ACTIVE) != 0) { + xbs->xbs_flags &= ~XBS_ATTACH_CH_ACTIVE; + mtx_unlock(&xbs->xbs_lock); + config_intrhook_disestablish(&xbs->xbs_attach_ch); + } else { + mtx_unlock(&xbs->xbs_lock); + } + + return (error); +} + +/** + * \brief Enumerate all devices of the given type on this bus. + * + * \param dev NewBus device_t for this XenBus backend bus instance. + * \param type String indicating the device sub-tree (e.g. "vfb", "vif") + * to enumerate. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * Devices that are found are entered into the NewBus hierarchy via + * xenbusb_add_device(). xenbusb_add_device() ignores duplicate detects + * and ignores duplicate devices, so it can be called unconditionally + * for any device found in the XenStore. + * + * The backend XenStore hierarchy has the following format: + * + * backend/// + * + */ +static int +xenbusb_back_enumerate_type(device_t dev, const char *type) +{ + struct xenbusb_softc *xbs; + const char **vms; + u_int vm_idx; + u_int vm_count; + int error; + + xbs = device_get_softc(dev); + error = xs_directory(XST_NIL, xbs->xbs_node, type, &vm_count, &vms); + if (error) + return (error); + for (vm_idx = 0; vm_idx < vm_count; vm_idx++) { + struct sbuf *vm_path; + const char *vm; + const char **devs; + u_int dev_idx; + u_int dev_count; + + vm = vms[vm_idx]; + + vm_path = xs_join(type, vm); + error = xs_directory(XST_NIL, xbs->xbs_node, sbuf_data(vm_path), + &dev_count, &devs); + sbuf_delete(vm_path); + if (error) + break; + + for (dev_idx = 0; dev_idx < dev_count; dev_idx++) { + const char *dev_num; + struct sbuf *id; + + dev_num = devs[dev_idx]; + id = xs_join(vm, dev_num); + xenbusb_add_device(dev, type, sbuf_data(id)); + sbuf_delete(id); + } + free(devs, M_XENSTORE); + } + + free(vms, M_XENSTORE); + + return (0); +} + +/** + * \brief Determine and store the XenStore path for the other end of + * a split device whose local end is represented by ivars. + * + * \param dev NewBus device_t for this XenBus backend bus instance. + * \param ivars Instance variables from the XenBus child device for + * which to perform this function. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * If successful, the xd_otherend_path field of the child's instance + * variables will be updated. + * + */ +static int +xenbusb_back_get_otherend_node(device_t dev, struct xenbus_device_ivars *ivars) +{ + char *otherend_path; + int error; + + if (ivars->xd_otherend_path != NULL) { + free(ivars->xd_otherend_path, M_XENBUS); + ivars->xd_otherend_path = NULL; + } + + error = xs_gather(XST_NIL, ivars->xd_node, + "frontend-id", "%i", &ivars->xd_otherend_id, + "frontend", NULL, &otherend_path, + NULL); + + if (error == 0) { + ivars->xd_otherend_path = strdup(otherend_path, M_XENBUS); + free(otherend_path, M_XENSTORE); + } + return (error); +} + +/** + * \brief Backend XenBus child instance variable write access method. + * + * \param dev The NewBus device representing this XenBus bus. + * \param child The NewBus device representing a child of dev%'s XenBus bus. + * \param index The index of the instance variable to access. + * \param value The new value to set in the instance variable accessed. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * Xenbus_back overrides this method so that it can trap state transitions + * of local backend devices and clean up their XenStore entries as necessary + * during device instance teardown. + */ +static int +xenbusb_back_write_ivar(device_t dev, device_t child, int index, + uintptr_t value) +{ + int error; + + error = xenbusb_write_ivar(dev, child, index, value); + + if (index == XENBUS_IVAR_STATE + && (enum xenbus_state)value == XenbusStateClosed + && xenbus_dev_is_online(child) == 0) { + + /* + * Cleanup the hotplug entry in the XenStore if + * present. The control domain expects any userland + * component associated with this device to destroy + * this node in order to signify it is safe to + * teardown the device. However, not all backends + * rely on userland components, and those that + * do should either use a communication channel + * other than the XenStore, or ensure the hotplug + * data is already cleaned up. + * + * This removal ensures that no matter what path + * is taken to mark a back-end closed, the control + * domain will understand that it is closed. + */ + xs_rm(XST_NIL, xenbus_get_node(child), "hotplug-status"); + } + + return (error); +} + +/*-------------------- Private Device Attachment Data -----------------------*/ +static device_method_t xenbusb_back_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, xenbusb_identify), + DEVMETHOD(device_probe, xenbusb_back_probe), + DEVMETHOD(device_attach, xenbusb_back_attach), + DEVMETHOD(device_detach, bus_generic_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, bus_generic_suspend), + DEVMETHOD(device_resume, bus_generic_resume), + + /* Bus Interface */ + DEVMETHOD(bus_print_child, xenbusb_print_child), + DEVMETHOD(bus_read_ivar, xenbusb_read_ivar), + DEVMETHOD(bus_write_ivar, xenbusb_back_write_ivar), + DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), + DEVMETHOD(bus_release_resource, bus_generic_release_resource), + DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), + DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), + + /* XenBus Bus Interface */ + DEVMETHOD(xenbusb_enumerate_type, xenbusb_back_enumerate_type), + DEVMETHOD(xenbusb_get_otherend_node, xenbusb_back_get_otherend_node), + { 0, 0 } +}; + +DEFINE_CLASS_0(xenbusb_back, xenbusb_back_driver, xenbusb_back_methods, + sizeof(struct xenbusb_softc)); +devclass_t xenbusb_back_devclass; + +DRIVER_MODULE(xenbusb_back, xenstore, xenbusb_back_driver, + xenbusb_back_devclass, 0, 0); diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenbus/xenbusb_front.c head.xen/sys/xen/xenbus/xenbusb_front.c --- head.moves/sys/xen/xenbus/xenbusb_front.c 1969-12-31 17:00:00.000000000 -0700 +++ head.xen/sys/xen/xenbus/xenbusb_front.c 2010-09-16 17:18:05.716656401 -0600 @@ -0,0 +1,195 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2009, 2010 Justin Gibbs, Spectra Logic Corporation + * Copyright (C) 2008 Doug Rabson + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005 XenSource Ltd + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * \file xenbusb_front.c + * + * XenBus management of the NewBus bus containing the frontend instances of + * Xen split devices. + */ +#include +__FBSDID("$FreeBSD: head/sys/xen/xenbus/xenbus_probe.c 201758 2010-01-07 21:01:37Z mbr $"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + + +/*------------------ Private Device Attachment Functions --------------------*/ +/** + * \brief Probe for the existance of the XenBus front bus. + * + * \param dev NewBus device_t for this XenBus front bus instance. + * + * \return Always returns 0 indicating success. + */ +static int +xenbusb_front_probe(device_t dev) +{ + device_set_desc(dev, "Xen Frontend Devices"); + + return (0); +} + +/** + * \brief Attach the XenBus front bus. + * + * \param dev NewBus device_t for this XenBus front bus instance. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + */ +static int +xenbusb_front_attach(device_t dev) +{ + return (xenbusb_attach(dev, "device", /*id_components*/1)); +} + +/** + * \brief Enumerate all devices of the given type on this bus. + * + * \param dev NewBus device_t for this XenBus front bus instance. + * \param type String indicating the device sub-tree (e.g. "vfb", "vif") + * to enumerate. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * Devices that are found are entered into the NewBus hierarchy via + * xenbusb_add_device(). xenbusb_add_device() ignores duplicate detects + * and ignores duplicate devices, so it can be called unconditionally + * for any device found in the XenStore. + */ +static int +xenbusb_front_enumerate_type(device_t dev, const char *type) +{ + struct xenbusb_softc *xbs; + const char **dir; + unsigned int i, count; + int error; + + xbs = device_get_softc(dev); + error = xs_directory(XST_NIL, xbs->xbs_node, type, &count, &dir); + if (error) + return (error); + for (i = 0; i < count; i++) + xenbusb_add_device(dev, type, dir[i]); + + free(dir, M_XENSTORE); + + return (0); +} + +/** + * \brief Determine and store the XenStore path for the other end of + * a split device whose local end is represented by ivars. + * + * If successful, the xd_otherend_path field of the child's instance + * variables will be updated. + * + * \param dev NewBus device_t for this XenBus front bus instance. + * \param ivars Instance variables from the XenBus child device for + * which to perform this function. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + */ +static int +xenbusb_front_get_otherend_node(device_t dev, struct xenbus_device_ivars *ivars) +{ + char *otherend_path; + int error; + + if (ivars->xd_otherend_path != NULL) { + free(ivars->xd_otherend_path, M_XENBUS); + ivars->xd_otherend_path = NULL; + } + + error = xs_gather(XST_NIL, ivars->xd_node, + "backend-id", "%i", &ivars->xd_otherend_id, + "backend", NULL, &otherend_path, + NULL); + + if (error == 0) { + ivars->xd_otherend_path = strdup(otherend_path, M_XENBUS); + free(otherend_path, M_XENSTORE); + } + return (error); +} + +/*-------------------- Private Device Attachment Data -----------------------*/ +static device_method_t xenbusb_front_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, xenbusb_identify), + DEVMETHOD(device_probe, xenbusb_front_probe), + DEVMETHOD(device_attach, xenbusb_front_attach), + DEVMETHOD(device_detach, bus_generic_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, bus_generic_suspend), + DEVMETHOD(device_resume, bus_generic_resume), + + /* Bus Interface */ + DEVMETHOD(bus_print_child, xenbusb_print_child), + DEVMETHOD(bus_read_ivar, xenbusb_read_ivar), + DEVMETHOD(bus_write_ivar, xenbusb_write_ivar), + DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), + DEVMETHOD(bus_release_resource, bus_generic_release_resource), + DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), + DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), + + /* XenBus Bus Interface */ + DEVMETHOD(xenbusb_enumerate_type, xenbusb_front_enumerate_type), + DEVMETHOD(xenbusb_get_otherend_node, xenbusb_front_get_otherend_node), + { 0, 0 } +}; + +DEFINE_CLASS_0(xenbusb_front, xenbusb_front_driver, xenbusb_front_methods, + sizeof(struct xenbusb_softc)); +devclass_t xenbusb_front_devclass; + +DRIVER_MODULE(xenbusb_front, xenstore, xenbusb_front_driver, + xenbusb_front_devclass, 0, 0); diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenbus/xenbusb_if.m head.xen/sys/xen/xenbus/xenbusb_if.m --- head.moves/sys/xen/xenbus/xenbusb_if.m 1969-12-31 17:00:00.000000000 -0700 +++ head.xen/sys/xen/xenbus/xenbusb_if.m 2010-09-16 17:02:09.178190751 -0600 @@ -0,0 +1,78 @@ +#- +# Copyright (c) 2010 Justin T. Gibbs, Spectra Logic Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions, and the following disclaimer, +# without modification. +# 2. Redistributions in binary form must reproduce at minimum a disclaimer +# substantially similar to the "NO WARRANTY" disclaimer below +# ("Disclaimer") and any redistribution must be conditioned upon +# including a substantially similar Disclaimer requirement for further +# binary redistribution. +# +# NO WARRANTY +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGES. +# +# $FreeBSD$ +# + +#include + +HEADER { +struct xenbus_device_ivars; +} + +INTERFACE xenbusb; + +/** + * \brief Enumerate all devices of the given type on this bus. + * + * \param _dev NewBus device_t for this XenBus (front/back) bus instance. + * \param _type String indicating the device sub-tree (e.g. "vfb", "vif") + * to enumerate. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * Devices that are found should be entered into the NewBus hierarchy via + * xenbusb_add_device(). xenbusb_add_device() ignores duplicate detects + * and ignores duplicate devices, so it can be called unconditionally + * for any device found in the XenStore. + */ +METHOD int enumerate_type { + device_t _dev; + const char *_type; +}; + +/** + * \brief Determine and store the XenStore path for the other end of + * a split device whose local end is represented by ivars. + * + * If successful, the xd_otherend_path field of the child's instance + * variables must be updated. + * + * \param _dev NewBus device_t for this XenBus (front/back) bus instance. + * \param _ivars Instance variables from the XenBus child device for + * which to perform this function. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + */ +METHOD int get_otherend_node { + device_t _dev; + struct xenbus_device_ivars *_ivars; +} diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenbus/xenbusvar.h head.xen/sys/xen/xenbus/xenbusvar.h --- head.moves/sys/xen/xenbus/xenbusvar.h 2010-09-16 16:29:32.800221000 -0600 +++ head.xen/sys/xen/xenbus/xenbusvar.h 2010-09-16 16:49:09.741897066 -0600 @@ -1,8 +1,4 @@ /****************************************************************************** - * xenbus.h - * - * Talks to Xen Store to figure out what devices we have. - * * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2005 XenSource Ltd. * @@ -30,46 +26,69 @@ * $FreeBSD: head/sys/xen/xenbus/xenbusvar.h 186557 2008-12-29 06:31:03Z kmacy $ */ +/** + * \file xenbusvar.h + * + * \brief Datastructures and function declarations for usedby device + * drivers operating on the XenBus. + */ + #ifndef _XEN_XENBUS_XENBUSVAR_H #define _XEN_XENBUS_XENBUSVAR_H #include #include #include +#include +#include + +#include #include + +#include #include #include +#include + #include "xenbus_if.h" +/* XenBus allocations including XenStore data returned to clients. */ +MALLOC_DECLARE(M_XENBUS); + enum { - /* + /** * Path of this device node. */ XENBUS_IVAR_NODE, - /* + /** * The device type (e.g. vif, vbd). */ XENBUS_IVAR_TYPE, - /* + /** + * The device identifier (e.g. "832"). + */ + XENBUS_IVAR_DEVID, + + /** * The state of this device (not the otherend's state). */ XENBUS_IVAR_STATE, - /* + /** * Domain ID of the other end device. */ XENBUS_IVAR_OTHEREND_ID, - /* + /** * Path of the other end device. */ XENBUS_IVAR_OTHEREND_PATH }; -/* +/** * Simplified accessors for xenbus devices */ #define XENBUS_ACCESSOR(var, ivar, type) \ @@ -77,183 +96,189 @@ XENBUS_ACCESSOR(node, NODE, const char *) XENBUS_ACCESSOR(type, TYPE, const char *) +XENBUS_ACCESSOR(devid, DEVID, const char *) XENBUS_ACCESSOR(state, STATE, enum xenbus_state) XENBUS_ACCESSOR(otherend_id, OTHEREND_ID, int) XENBUS_ACCESSOR(otherend_path, OTHEREND_PATH, const char *) -/* Register callback to watch this node. */ -struct xenbus_watch -{ - LIST_ENTRY(xenbus_watch) list; - - /* Path being watched. */ - char *node; - - /* Callback (executed in a process context with no locks held). */ - void (*callback)(struct xenbus_watch *, - const char **vec, unsigned int len); -}; - -typedef int (*xenstore_event_handler_t)(void *); - -struct xenbus_transaction -{ - uint32_t id; -}; - -#define XBT_NIL ((struct xenbus_transaction) { 0 }) +/** + * Return the state of a XenBus device. + * + * \param path The root XenStore path for the device. + * + * \return The current state of the device or XenbusStateClosed if no + * state can be read. + */ +XenbusState xenbus_read_driver_state(const char *path); -int xenbus_directory(struct xenbus_transaction t, const char *dir, - const char *node, unsigned int *num, char ***result); -int xenbus_read(struct xenbus_transaction t, const char *dir, - const char *node, unsigned int *len, void **result); -int xenbus_write(struct xenbus_transaction t, const char *dir, - const char *node, const char *string); -int xenbus_mkdir(struct xenbus_transaction t, const char *dir, - const char *node); -int xenbus_exists(struct xenbus_transaction t, const char *dir, - const char *node); -int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node); -int xenbus_transaction_start(struct xenbus_transaction *t); -int xenbus_transaction_end(struct xenbus_transaction t, int abort); - -/* - * Single read and scanf: returns errno or zero. If scancountp is - * non-null, then number of items scanned is returned in *scanncountp. - */ -int xenbus_scanf(struct xenbus_transaction t, - const char *dir, const char *node, int *scancountp, const char *fmt, ...) - __attribute__((format(scanf, 5, 6))); - -/* Single printf and write: returns errno or 0. */ -int xenbus_printf(struct xenbus_transaction t, - const char *dir, const char *node, const char *fmt, ...) - __attribute__((format(printf, 4, 5))); - -/* - * Generic read function: NULL-terminated triples of name, - * sprintf-style type string, and pointer. Returns 0 or errno. - */ -int xenbus_gather(struct xenbus_transaction t, const char *dir, ...); - -/* notifer routines for when the xenstore comes up */ -int register_xenstore_notifier(xenstore_event_handler_t func, void *arg, int priority); -#if 0 -void unregister_xenstore_notifier(); -#endif -int register_xenbus_watch(struct xenbus_watch *watch); -void unregister_xenbus_watch(struct xenbus_watch *watch); -void xs_suspend(void); -void xs_resume(void); - -/* Used by xenbus_dev to borrow kernel's store connection. */ -int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **result); - -#if 0 - -#define XENBUS_IS_ERR_READ(str) ({ \ - if (!IS_ERR(str) && strlen(str) == 0) { \ - free(str, M_DEVBUF); \ - str = ERR_PTR(-ERANGE); \ - } \ - IS_ERR(str); \ -}) - -#endif - -#define XENBUS_EXIST_ERR(err) ((err) == ENOENT || (err) == ERANGE) - - -/** - * Register a watch on the given path, using the given xenbus_watch structure - * for storage, and the given callback function as the callback. Return 0 on - * success, or errno on error. On success, the given path will be saved as - * watch->node, and remains the caller's to free. On error, watch->node will - * be NULL, the device will switch to XenbusStateClosing, and the error will - * be saved in the store. +/** + * Initialize and register a watch on the given path (client suplied storage). + * + * \param dev The XenBus device requesting the watch service. + * \param path The XenStore path of the object to be watched. The + * storage for this string must be stable for the lifetime + * of the watch. + * \param watch The watch object to use for this request. This object + * must be stable for the lifetime of the watch. + * \param callback The function to call when XenStore objects at or below + * path are modified. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * \note On error, the device 'dev' will be switched to the XenbusStateClosing + * state and the returned error is saved in the per-device error node + * for dev in the XenStore. */ int xenbus_watch_path(device_t dev, char *path, - struct xenbus_watch *watch, - void (*callback)(struct xenbus_watch *, - const char **, unsigned int)); - + struct xs_watch *watch, + xs_watch_cb_t *callback); /** - * Register a watch on the given path/path2, using the given xenbus_watch - * structure for storage, and the given callback function as the callback. - * Return 0 on success, or errno on error. On success, the watched path - * (path/path2) will be saved as watch->node, and becomes the caller's to - * kfree(). On error, watch->node will be NULL, so the caller has nothing to - * free, the device will switch to XenbusStateClosing, and the error will be - * saved in the store. + * Initialize and register a watch at path/path2 in the XenStore. + * + * \param dev The XenBus device requesting the watch service. + * \param path The base XenStore path of the object to be watched. + * \param path2 The tail XenStore path of the object to be watched. + * \param watch The watch object to use for this request. This object + * must be stable for the lifetime of the watch. + * \param callback The function to call when XenStore objects at or below + * path are modified. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * \note On error, \a dev will be switched to the XenbusStateClosing + * state and the returned error is saved in the per-device error node + * for \a dev in the XenStore. + * + * Similar to xenbus_watch_path, however the storage for the path to the + * watched object is allocated from the heap and filled with "path '/' path2". + * Should a call to this function succeed, it is the callers responsibility + * to free watch->node using the M_XENBUS malloc type. */ int xenbus_watch_path2(device_t dev, const char *path, - const char *path2, struct xenbus_watch *watch, - void (*callback)(struct xenbus_watch *, - const char **, unsigned int)); - + const char *path2, struct xs_watch *watch, + xs_watch_cb_t *callback); /** - * Advertise in the store a change of the given driver to the given new_state. - * which case this is performed inside its own transaction. Return 0 on - * success, or errno on error. On error, the device will switch to - * XenbusStateClosing, and the error will be saved in the store. + * Grant access to the given ring_mfn to the peer of the given device. + * + * \param dev The device granting access to the ring page. + * \param ring_mfn The guest machine page number of the page to grant + * peer access rights. + * \param refp[out] The grant reference for the page. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * A successful call to xenbus_grant_ring should be paired with a call + * to gnttab_end_foreign_access() when foregn access to this page is no + * longer requried. + * + * \note On error, \a dev will be switched to the XenbusStateClosing + * state and the returned error is saved in the per-device error node + * for \a dev in the XenStore. */ -int xenbus_switch_state(device_t dev, - XenbusState new_state); - +int xenbus_grant_ring(device_t dev, unsigned long ring_mfn, grant_ref_t *refp); /** - * Grant access to the given ring_mfn to the peer of the given device. - * Return 0 on success, or errno on error. On error, the device will - * switch to XenbusStateClosing, and the error will be saved in the - * store. The grant ring reference is returned in *refp. + * Allocate an event channel for the given XenBus device. + * + * \param dev The device for which to allocate the event channel. + * \param port[out] The port identifier for the allocated event channel. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * A successfully allocated event channel should be free'd using + * xenbus_free_evtchn(). + * + * \note On error, \a dev will be switched to the XenbusStateClosing + * state and the returned error is saved in the per-device error node + * for \a dev in the XenStore. */ -int xenbus_grant_ring(device_t dev, unsigned long ring_mfn, int *refp); - +int xenbus_alloc_evtchn(device_t dev, evtchn_port_t *port); /** - * Allocate an event channel for the given xenbus_device, assigning the newly - * created local port to *port. Return 0 on success, or errno on error. On - * error, the device will switch to XenbusStateClosing, and the error will be - * saved in the store. + * Free an existing event channel. + * + * \param dev The device which allocated this event channel. + * \param port The port identifier for the event channel to free. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * \note On error, \a dev will be switched to the XenbusStateClosing + * state and the returned error is saved in the per-device error node + * for \a dev in the XenStore. */ -int xenbus_alloc_evtchn(device_t dev, int *port); - +int xenbus_free_evtchn(device_t dev, evtchn_port_t port); /** - * Free an existing event channel. Returns 0 on success or errno on error. + * Record the given errno, along with the given, printf-style, formatted + * message in dev's device specific error node in the XenStore. + * + * \param dev The device which encountered the error. + * \param err The errno value corresponding to the error. + * \param fmt Printf format string followed by a variable number of + * printf arguments. */ -int xenbus_free_evtchn(device_t dev, int port); - +void xenbus_dev_error(device_t dev, int err, const char *fmt, ...) + __attribute__((format(printf, 3, 4))); /** - * Return the state of the driver rooted at the given store path, or - * XenbusStateClosed if no state can be read. + * va_list version of xenbus_dev_error(). + * + * \param dev The device which encountered the error. + * \param err The errno value corresponding to the error. + * \param fmt Printf format string. + * \param ap Va_list of printf arguments. */ -XenbusState xenbus_read_driver_state(const char *path); - +void xenbus_dev_verror(device_t dev, int err, const char *fmt, va_list ap) + __attribute__((format(printf, 3, 0))); -/*** - * Report the given negative errno into the store, along with the given - * formatted message. +/** + * Equivalent to xenbus_dev_error(), followed by + * xenbus_set_state(dev, XenbusStateClosing). + * + * \param dev The device which encountered the error. + * \param err The errno value corresponding to the error. + * \param fmt Printf format string followed by a variable number of + * printf arguments. */ -void xenbus_dev_error(device_t dev, int err, const char *fmt, - ...); - +void xenbus_dev_fatal(device_t dev, int err, const char *fmt, ...) + __attribute__((format(printf, 3, 4))); -/*** - * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by - * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly - * closedown of this driver and its peer. +/** + * va_list version of xenbus_dev_fatal(). + * + * \param dev The device which encountered the error. + * \param err The errno value corresponding to the error. + * \param fmt Printf format string. + * \param ap Va_list of printf arguments. */ -void xenbus_dev_fatal(device_t dev, int err, const char *fmt, - ...); - -int xenbus_dev_init(void); +void xenbus_dev_vfatal(device_t dev, int err, const char *fmt, va_list) + __attribute__((format(printf, 3, 0))); +/** + * Convert a member of the xenbus_state enum into an ASCII string. + * + * /param state The XenBus state to lookup. + * + * /return A string representing state or, for unrecognized states, + * the string "Unknown". + */ const char *xenbus_strstate(enum xenbus_state state); + +/** + * Return the value of a XenBus device's "online" node within the XenStore. + * + * \param dev The XenBus device to query. + * + * \return The value of the "online" node for the device. If the node + * does not exist, 0 (offline) is returned. + */ int xenbus_dev_is_online(device_t dev); -int xenbus_frontend_closed(device_t dev); #endif /* _XEN_XENBUS_XENBUSVAR_H */ diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenstore/xenstore.c head.xen/sys/xen/xenstore/xenstore.c --- head.moves/sys/xen/xenstore/xenstore.c 2010-09-17 07:58:17.995181057 -0600 +++ head.xen/sys/xen/xenstore/xenstore.c 2010-09-16 17:23:24.826464043 -0600 @@ -1,24 +1,24 @@ /****************************************************************************** - * xenbus_xs.c + * xenstore.c * - * This is the kernel equivalent of the "xs" library. We don't need everything - * and we use xenbus_comms for communication. + * Low-level kernel interface to the XenStore. * * Copyright (C) 2005 Rusty Russell, IBM Corporation - * + * Copyright (C) 2009,2010 Justin Gibbs, Spectra Logic Corporation + * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -30,12 +30,13 @@ #include -__FBSDID("$FreeBSD: head/sys/xen/xenstore/xenstore.c -1 $"); +__FBSDID("$FreeBSD$"); #include -#include +#include #include #include +#include #include #include #include @@ -43,25 +44,83 @@ #include #include #include +#include +#include +#include #include #include -#include #include -#include -#include +#include +#include +#include +#include + #include +#include +#include + #include #include -static int xs_process_msg(enum xsd_sockmsg_type *type); +/** + * \file xenstore.c + * \brief XenStore interface + * + * The XenStore interface is a simple storage system that is a means of + * communicating state and configuration data between the Xen Domain 0 + * and the various guest domains. All configuration data other than + * a small amount of essential information required during the early + * boot process of launching a Xen aware guest, is managed using the + * XenStore. + * + * The XenStore is ASCII string based, and has a structure and semantics + * similar to a filesystem. There are files and directories, the directories + * able to contain files or other directories. The depth of the hierachy + * is only limited by the XenStore's maximum path length. + * + * The communication channel between the XenStore service and other + * domains is via two, guest specific, ring buffers in a shared memory + * area. One ring buffer is used for communicating in each direction. + * The grant table references for this shared memory are given to the + * guest either via the xen_start_info structure for a fully para- + * virtualized guest, or via HVM hypercalls for a hardware virtualized + * guest. + * + * The XenStore communication relies on an event channel and thus + * interrupts. For this reason, the attachment of the XenStore + * relies on an interrupt driven configuration hook to hold off + * boot processing until communication with the XenStore service + * can be established. + * + * Several Xen services depend on the XenStore, most notably the + * XenBus used to discover and manage Xen devices. These services + * are implemented as NewBus child attachments to a bus exported + * by this XenStore driver. + */ + +static struct xs_watch *find_watch(const char *token); + +MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results"); -int xenwatch_running = 0; -int xenbus_running = 0; -int xen_store_evtchn; +/** + * Pointer to shared memory communication structures allowing us + * to communicate with the XenStore service. + * + * When operating in full PV mode, this pointer is set early in kernel + * startup from within xen_machdep.c. In HVM mode, we use hypercalls + * to get the guest frame number for the shared page and then map it + * into kva. See xs_init() for details. + */ +struct xenstore_domain_interface *xen_store; + +/*-------------------------- Private Data Structures ------------------------*/ +/** + * Structure capturing messages received from the XenStore service. + */ struct xs_stored_msg { TAILQ_ENTRY(xs_stored_msg) list; @@ -75,474 +134,1379 @@ /* Queued watch events. */ struct { - struct xenbus_watch *handle; - char **vec; - unsigned int vec_size; + struct xs_watch *handle; + const char **vec; + u_int vec_size; } watch; } u; }; +TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg); -struct xs_handle { - /* A list of replies. Currently only one will ever be outstanding. */ - TAILQ_HEAD(xs_handle_list, xs_stored_msg) reply_list; - struct mtx reply_lock; - int reply_waitq; - - /* One request at a time. */ +/** + * Container for all XenStore related state. + */ +struct xs_softc { + /** Newbus device for the XenStore. */ + device_t xs_dev; + + /** + * Lock serializing access to ring producer/consumer + * indexes. Use of this lock guarantees that wakeups + * of blocking readers/writers are not missed due to + * races with the XenStore service. + */ + struct mtx ring_lock; + + /* + * Mutex used to insure exclusive access to the outgoing + * communication ring. We use a lock type that can be + * held while sleeping so that xs_write() can block waiting + * for space in the ring to free up, without allowing another + * writer to come in and corrupt a partial message write. + */ struct sx request_mutex; - /* Protect transactions against save/restore. */ + /** + * A list of replies to our requests. + * + * The reply list is filled by xs_rcv_thread(). It + * is consumed by the context that issued the request + * to which a reply is made. The requester blocks in + * xs_read_reply(). + * + * /note Only one requesting context can be active at a time. + * This is guaranteed by the request_mutex and insures + * that the requester sees replies matching the order + * of its requests. + */ + struct xs_stored_msg_list reply_list; + + /** Lock protecting the reply list. */ + struct mtx reply_lock; + + /** + * List of registered watches. + */ + struct xs_watch_list registered_watches; + + /** Lock protecting the registered watches list. */ + struct mtx registered_watches_lock; + + /** + * List of pending watch callback events. + */ + struct xs_stored_msg_list watch_events; + + /** Lock protecting the watch calback list. */ + struct mtx watch_events_lock; + + /** + * Sleepable lock used to prevent VM suspension while a + * xenstore transaction is outstanding. + * + * Each active transaction holds a shared lock on the + * suspend mutex. Our suspend method blocks waiting + * to acquire an exclusive lock. This guarantees that + * suspend processing will only proceed once all active + * transactions have been retired. + */ struct sx suspend_mutex; + + /** + * The processid of the xenwatch thread. + */ + pid_t xenwatch_pid; + + /** + * Sleepable mutex used to gate the execution of XenStore + * watch event callbacks. + * + * xenwatch_thread holds an exclusive lock on this mutex + * while delivering event callbacks, and xenstore_unregister_watch() + * uses an exclusive lock of this mutex to guarantee that no + * callbacks of the just unregistered watch are pending + * before returning to its caller. + */ + struct sx xenwatch_mutex; + +#ifdef XENHVM + /** + * The HVM guest pseudo-physical frame number. This is Xen's mapping + * of the true machine frame number into our "physical address space". + */ + unsigned long gpfn; +#endif + + /** + * The event channel for communicating with the + * XenStore service. + */ + int evtchn; + + /** Interrupt number for our event channel. */ + u_int irq; + + /** + * Interrupt driven config hook allowing us to defer + * attaching children until interrupts (and thus communication + * with the XenStore service) are available. + */ + struct intr_config_hook xs_attachcb; }; -static struct xs_handle xs_state; +/*-------------------------------- Global Data ------------------------------*/ +static struct xs_softc xs; + +/*------------------------- Private Utility Functions -----------------------*/ + +/** + * Count and optionally record pointers to a number of NUL terminated + * strings in a buffer. + * + * \param strings A pointer to a contiguous buffer of NUL terminated strings. + * \param dest An array to store pointers to each string found in strings. + * \param len The length of the buffer pointed to by strings. + * + * \return A count of the number of strings found. + */ +static u_int +extract_strings(const char *strings, const char **dest, u_int len) +{ + u_int num; + const char *p; + + for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) { + if (dest != NULL) + *dest++ = p; + num++; + } + + return (num); +} + +/** + * Convert a contiguous buffer containing a series of NUL terminated + * strings into an array of pointers to strings. + * + * The returned pointer references the array of string pointers which + * is followed by the storage for the string data. It is the client's + * responsibility to free this storage. + * + * The storage addressed by strings is free'd prior to split returning. + * + * \param strings A pointer to a contiguous buffer of NUL terminated strings. + * \param len The length of the buffer pointed to by strings. + * \param num The number of strings found and returned in the strings + * array. + * + * \return An array of pointers to the strings found in the input buffer. + */ +static const char ** +split(char *strings, u_int len, u_int *num) +{ + const char **ret; + + /* Protect against unterminated buffers. */ + strings[len - 1] = '\0'; + + /* Count the strings. */ + *num = extract_strings(strings, /*dest*/NULL, len); + + /* Transfer to one big alloc for easy freeing by the caller. */ + ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK); + memcpy(&ret[*num], strings, len); + free(strings, M_XENSTORE); + + /* Extract pointers to newly allocated array. */ + strings = (char *)&ret[*num]; + (void)extract_strings(strings, /*dest*/ret, len); + + return (ret); +} + +/*------------------------- Public Utility Functions -------------------------*/ +/*------- API comments for these methods can be found in xenstorevar.h -------*/ +struct sbuf * +xs_join(const char *dir, const char *name) +{ + struct sbuf *sb; + + sb = sbuf_new_auto(); + sbuf_cat(sb, dir); + if (name[0] != '\0') { + sbuf_putc(sb, '/'); + sbuf_cat(sb, name); + } + sbuf_finish(sb); + + return (sb); +} + +/*-------------------- Low Level Communication Management --------------------*/ +/** + * Interrupt handler for the XenStore event channel. + * + * XenStore reads and writes block on "xen_store" for buffer + * space. Wakeup any blocking operations when the XenStore + * service has modified the queues. + */ +static void +xs_intr(void * arg __unused /*__attribute__((unused))*/) +{ + + /* + * Hold ring lock across wakeup so that clients + * cannot miss a wakeup. + */ + mtx_lock(&xs.ring_lock); + wakeup(xen_store); + mtx_unlock(&xs.ring_lock); +} + +/** + * Verify that the indexes for a ring are valid. + * + * The difference between the producer and consumer cannot + * exceed the size of the ring. + * + * \param cons The consumer index for the ring to test. + * \param prod The producer index for the ring to test. + * + * \retval 1 If indexes are in range. + * \retval 0 If the indexes are out of range. + */ +static int +xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) +{ + + return ((prod - cons) <= XENSTORE_RING_SIZE); +} + +/** + * Return a pointer to, and the length of, the contiguous + * free region available for output in a ring buffer. + * + * \param cons The consumer index for the ring. + * \param prod The producer index for the ring. + * \param buf The base address of the ring's storage. + * \param len The amount of contiguous storage available. + * + * \return A pointer to the start location of the free region. + */ +static void * +xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod, + char *buf, uint32_t *len) +{ + + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); + if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) + *len = XENSTORE_RING_SIZE - (prod - cons); + return (buf + MASK_XENSTORE_IDX(prod)); +} + +/** + * Return a pointer to, and the length of, the contiguous + * data available to read from a ring buffer. + * + * \param cons The consumer index for the ring. + * \param prod The producer index for the ring. + * \param buf The base address of the ring's storage. + * \param len The amount of contiguous data available to read. + * + * \return A pointer to the start location of the available data. + */ +static const void * +xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod, + const char *buf, uint32_t *len) +{ + + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); + if ((prod - cons) < *len) + *len = prod - cons; + return (buf + MASK_XENSTORE_IDX(cons)); +} + +/** + * Transmit data to the XenStore service. + * + * \param tdata A pointer to the contiguous data to send. + * \param len The amount of data to send. + * + * \return On success 0, otherwise an errno value indicating the + * cause of failure. + * + * \invariant Called from thread context. + * \invariant The buffer pointed to by tdata is at least len bytes + * in length. + * \invariant xs.request_mutex exclusively locked. + */ +static int +xs_write_store(const void *tdata, unsigned len) +{ + XENSTORE_RING_IDX cons, prod; + const char *data = (const char *)tdata; + int error; + + sx_assert(&xs.request_mutex, SX_XLOCKED); + while (len != 0) { + void *dst; + u_int avail; + + /* Hold lock so we can't miss wakeups should we block. */ + mtx_lock(&xs.ring_lock); + cons = xen_store->req_cons; + prod = xen_store->req_prod; + if ((prod - cons) == XENSTORE_RING_SIZE) { + /* + * Output ring is full. Wait for a ring event. + * + * Note that the events from both queues + * are combined, so being woken does not + * guarantee that data exist in the read + * ring. + * + * To simplify error recovery and the retry, + * we specify PDROP so our lock is *not* held + * when msleep returns. + */ + error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP, + "xbwrite", /*timeout*/0); + if (error && error != EWOULDBLOCK) + return (error); + + /* Try again. */ + continue; + } + mtx_unlock(&xs.ring_lock); + + /* Verify queue sanity. */ + if (!xs_check_indexes(cons, prod)) { + xen_store->req_cons = xen_store->req_prod = 0; + return (EIO); + } + + dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail); + if (avail > len) + avail = len; + + memcpy(dst, data, avail); + data += avail; + len -= avail; + + /* + * The store to the producer index, which indicates + * to the other side that new data has arrived, must + * be visible only after our copy of the data into the + * ring has completed. + */ + wmb(); + xen_store->req_prod += avail; + + /* + * notify_remote_via_evtchn implies mb(). The other side + * will see the change to req_prod at the time of the + * interrupt. + */ + notify_remote_via_evtchn(xs.evtchn); + } + + return (0); +} + +/** + * Receive data from the XenStore service. + * + * \param tdata A pointer to the contiguous buffer to receive the data. + * \param len The amount of data to receive. + * + * \return On success 0, otherwise an errno value indicating the + * cause of failure. + * + * \invariant Called from thread context. + * \invariant The buffer pointed to by tdata is at least len bytes + * in length. + * + * \note xs_read does not perform any internal locking to guarantee + * serial access to the incoming ring buffer. However, there + * is only one context processing reads: xs_rcv_thread(). + */ +static int +xs_read_store(void *tdata, unsigned len) +{ + XENSTORE_RING_IDX cons, prod; + char *data = (char *)tdata; + int error; + + while (len != 0) { + u_int avail; + const char *src; + + /* Hold lock so we can't miss wakeups should we block. */ + mtx_lock(&xs.ring_lock); + cons = xen_store->rsp_cons; + prod = xen_store->rsp_prod; + if (cons == prod) { + /* + * Nothing to read. Wait for a ring event. + * + * Note that the events from both queues + * are combined, so being woken does not + * guarantee that data exist in the read + * ring. + * + * To simplify error recovery and the retry, + * we specify PDROP so our lock is *not* held + * when msleep returns. + */ + error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP, + "xbread", /*timout*/0); + if (error && error != EWOULDBLOCK) + return (error); + continue; + } + mtx_unlock(&xs.ring_lock); + + /* Verify queue sanity. */ + if (!xs_check_indexes(cons, prod)) { + xen_store->rsp_cons = xen_store->rsp_prod = 0; + return (EIO); + } + + src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail); + if (avail > len) + avail = len; + + /* + * Insure the data we read is related to the indexes + * we read above. + */ + rmb(); + + memcpy(data, src, avail); + data += avail; + len -= avail; + + /* + * Insure that the producer of this ring does not see + * the ring space as free until after we have copied it + * out. + */ + mb(); + xen_store->rsp_cons += avail; + + /* + * notify_remote_via_evtchn implies mb(). The producer + * will see the updated consumer index when the event + * is delivered. + */ + notify_remote_via_evtchn(xs.evtchn); + } + + return (0); +} + +/*----------------------- Received Message Processing ------------------------*/ +/** + * Block reading the next message from the XenStore service and + * process the result. + * + * \param type The returned type of the XenStore message received. + * + * \return 0 on success. Otherwise an errno value indicating the + * type of failure encountered. + */ +static int +xs_process_msg(enum xsd_sockmsg_type *type) +{ + struct xs_stored_msg *msg; + char *body; + int error; + + msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK); + error = xs_read_store(&msg->hdr, sizeof(msg->hdr)); + if (error) { + free(msg, M_XENSTORE); + return (error); + } + + body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK); + error = xs_read_store(body, msg->hdr.len); + if (error) { + free(body, M_XENSTORE); + free(msg, M_XENSTORE); + return (error); + } + body[msg->hdr.len] = '\0'; + + *type = msg->hdr.type; + if (msg->hdr.type == XS_WATCH_EVENT) { + msg->u.watch.vec = split(body, msg->hdr.len, + &msg->u.watch.vec_size); + + mtx_lock(&xs.registered_watches_lock); + msg->u.watch.handle = find_watch( + msg->u.watch.vec[XS_WATCH_TOKEN]); + if (msg->u.watch.handle != NULL) { + mtx_lock(&xs.watch_events_lock); + TAILQ_INSERT_TAIL(&xs.watch_events, msg, list); + wakeup(&xs.watch_events); + mtx_unlock(&xs.watch_events_lock); + } else { + free(msg->u.watch.vec, M_XENSTORE); + free(msg, M_XENSTORE); + } + mtx_unlock(&xs.registered_watches_lock); + } else { + msg->u.reply.body = body; + mtx_lock(&xs.reply_lock); + TAILQ_INSERT_TAIL(&xs.reply_list, msg, list); + wakeup(&xs.reply_list); + mtx_unlock(&xs.reply_lock); + } + + return (0); +} + +/** + * Thread body of the XenStore receive thread. + * + * This thread blocks waiting for data from the XenStore service + * and processes and received messages. + */ +static void +xs_rcv_thread(void *arg __unused) +{ + int error; + enum xsd_sockmsg_type type; + + for (;;) { + error = xs_process_msg(&type); + if (error) + printf("XENSTORE error %d while reading message\n", + error); + } +} + +/*---------------- XenStore Message Request/Reply Processing -----------------*/ +/** + * Filter invoked before transmitting any message to the XenStore service. + * + * The role of the filter may expand, but currently serves to manage + * the interactions of messages with transaction state. + * + * \param request_msg_type The message type for the request. + */ +static inline void +xs_request_filter(uint32_t request_msg_type) +{ + if (request_msg_type == XS_TRANSACTION_START) + sx_slock(&xs.suspend_mutex); +} + +/** + * Filter invoked after transmitting any message to the XenStore service. + * + * The role of the filter may expand, but currently serves to manage + * the interactions of messages with transaction state. + * + * \param request_msg_type The message type for the original request. + * \param reply_msg_type The message type for any received reply. + * \param request_reply_error The error status from the attempt to send + * the request or retrieve the reply. + */ +static inline void +xs_reply_filter(uint32_t request_msg_type, + uint32_t reply_msg_type, int request_reply_error) +{ + /* + * The count of transactions drops if we attempted + * to end a transaction (even if that attempt fails + * in error), we receive a transaction end acknowledgement + * or if our attempt to begin a transactionfails. + */ + if (request_msg_type == XS_TRANSACTION_END + || (request_reply_error == 0 && reply_msg_type == XS_TRANSACTION_END) + || (request_msg_type == XS_TRANSACTION_START + && (request_reply_error != 0 || reply_msg_type == XS_ERROR))) + sx_sunlock(&xs.suspend_mutex); -/* List of registered watches, and a lock to protect it. */ -static LIST_HEAD(watch_list_head, xenbus_watch) watches; -static struct mtx watches_lock; -/* List of pending watch callback events, and a lock to protect it. */ -static TAILQ_HEAD(event_list_head, xs_stored_msg) watch_events; -static struct mtx watch_events_lock; - -/* - * Details of the xenwatch callback kernel thread. The thread waits on the - * watch_events_waitq for work to do (queued on watch_events list). When it - * wakes up it acquires the xenwatch_mutex before reading the list and - * carrying out work. - */ -static pid_t xenwatch_pid; -struct sx xenwatch_mutex; -static int watch_events_waitq; +} #define xsd_error_count (sizeof(xsd_errors) / sizeof(xsd_errors[0])) +/** + * Convert a XenStore error string into an errno number. + * + * \param errorstring The error string to convert. + * + * \return The errno best matching the input string. + * + * \note Unknown error strings are converted to EINVAL. + */ static int xs_get_error(const char *errorstring) { - unsigned int i; + u_int i; for (i = 0; i < xsd_error_count; i++) { if (!strcmp(errorstring, xsd_errors[i].errstring)) return (xsd_errors[i].errnum); } - log(LOG_WARNING, "XENBUS xen store gave: unknown error %s", + log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s", errorstring); return (EINVAL); } -extern void kdb_backtrace(void); - +/** + * Block waiting for a reply to a message request. + * + * \param type The returned type of the reply. + * \param len The returned body length of the reply. + * \param result The returned body of the reply. + * + * \return 0 on success. Otherwise an errno indicating the + * cause of failure. + */ static int -xs_read_reply(enum xsd_sockmsg_type *type, unsigned int *len, void **result) +xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result) { struct xs_stored_msg *msg; char *body; int error; - mtx_lock(&xs_state.reply_lock); - - while (TAILQ_EMPTY(&xs_state.reply_list)) { - while (TAILQ_EMPTY(&xs_state.reply_list)) { - error = mtx_sleep(&xs_state.reply_waitq, - &xs_state.reply_lock, - PCATCH, "xswait", hz/10); - if (error && error != EWOULDBLOCK) { - mtx_unlock(&xs_state.reply_lock); - return (error); - } + mtx_lock(&xs.reply_lock); + while (TAILQ_EMPTY(&xs.reply_list)) { + error = mtx_sleep(&xs.reply_list, &xs.reply_lock, + PCATCH, "xswait", hz/10); + if (error && error != EWOULDBLOCK) { + mtx_unlock(&xs.reply_lock); + return (error); } } - - msg = TAILQ_FIRST(&xs_state.reply_list); - TAILQ_REMOVE(&xs_state.reply_list, msg, list); - - mtx_unlock(&xs_state.reply_lock); + msg = TAILQ_FIRST(&xs.reply_list); + TAILQ_REMOVE(&xs.reply_list, msg, list); + mtx_unlock(&xs.reply_lock); *type = msg->hdr.type; if (len) *len = msg->hdr.len; body = msg->u.reply.body; - free(msg, M_DEVBUF); + free(msg, M_XENSTORE); *result = body; return (0); } -#if 0 -/* Emergency write. UNUSED*/ -void xenbus_debug_write(const char *str, unsigned int count) -{ - struct xsd_sockmsg msg = { 0 }; - - msg.type = XS_DEBUG; - msg.len = sizeof("print") + count + 1; - - sx_xlock(&xs_state.request_mutex); - xb_write(&msg, sizeof(msg)); - xb_write("print", sizeof("print")); - xb_write(str, count); - xb_write("", 1); - sx_xunlock(&xs_state.request_mutex); -} - -#endif - +/** + * Pass-thru interface for XenStore access by userland processes + * via the XenStore device. + * + * Reply type and length data are returned by overwriting these + * fields in the passed in request message. + * + * \param msg A properly formatted message to transmit to + * the XenStore service. + * \param result The returned body of the reply. + * + * \return 0 on success. Otherwise an errno indicating the cause + * of failure. + * + * \note The returned result is provided in malloced storage and thus + * must be free'd by the caller with 'free(result, M_XENSTORE); + */ int -xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **result) +xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result) { - struct xsd_sockmsg req_msg = *msg; + uint32_t request_type; int error; - if (req_msg.type == XS_TRANSACTION_START) - sx_slock(&xs_state.suspend_mutex); - - sx_xlock(&xs_state.request_mutex); + request_type = msg->type; + xs_request_filter(request_type); - error = xb_write(msg, sizeof(*msg) + msg->len, - &xs_state.request_mutex.lock_object); - if (error) { - msg->type = XS_ERROR; - } else { + sx_xlock(&xs.request_mutex); + if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0) error = xs_read_reply(&msg->type, &msg->len, result); - } + sx_xunlock(&xs.request_mutex); - sx_xunlock(&xs_state.request_mutex); - - if ((msg->type == XS_TRANSACTION_END) || - ((req_msg.type == XS_TRANSACTION_START) && - (msg->type == XS_ERROR))) - sx_sunlock(&xs_state.suspend_mutex); + xs_reply_filter(request_type, msg->type, error); return (error); } -/* - * Send message to xs. The reply is returned in *result and should be - * fred with free(*result, M_DEVBUF). Return zero on success or an - * error code on failure. +/** + * Send a message with an optionally muti-part body to the XenStore service. + * + * \param t The transaction to use for this request. + * \param request_type The type of message to send. + * \param iovec Pointers to the body sections of the request. + * \param num_vecs The number of body sections in the request. + * \param len The returned length of the reply. + * \param result The returned body of the reply. + * + * \return 0 on success. Otherwise an errno indicating + * the cause of failure. + * + * \note The returned result is provided in malloced storage and thus + * must be free'd by the caller with 'free(*result, M_XENSTORE); */ static int -xs_talkv(struct xenbus_transaction t, enum xsd_sockmsg_type type, - const struct iovec *iovec, unsigned int num_vecs, - unsigned int *len, void **result) +xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type, + const struct iovec *iovec, u_int num_vecs, u_int *len, void **result) { struct xsd_sockmsg msg; void *ret = NULL; - unsigned int i; + u_int i; int error; msg.tx_id = t.id; msg.req_id = 0; - msg.type = type; + msg.type = request_type; msg.len = 0; for (i = 0; i < num_vecs; i++) msg.len += iovec[i].iov_len; - sx_xlock(&xs_state.request_mutex); + xs_request_filter(request_type); + + sx_xlock(&xs.request_mutex); + error = xs_write_store(&msg, sizeof(msg)); + if (error) { + printf("xs_talkv failed %d\n", error); + goto error_lock_held; + } + + for (i = 0; i < num_vecs; i++) { + error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len); + if (error) { + printf("xs_talkv failed %d\n", error); + goto error_lock_held; + } + } + + error = xs_read_reply(&msg.type, len, &ret); + +error_lock_held: + sx_xunlock(&xs.request_mutex); + xs_reply_filter(request_type, msg.type, error); + if (error) + return (error); + + if (msg.type == XS_ERROR) { + error = xs_get_error(ret); + free(ret, M_XENSTORE); + return (error); + } + + /* Reply is either error or an echo of our request message type. */ + KASSERT(msg.type == request_type, ("bad xenstore message type")); + + if (result) + *result = ret; + else + free(ret, M_XENSTORE); + + return (0); +} + +/** + * Wrapper for xs_talkv allowing easy transmission of a message with + * a single, contiguous, message body. + * + * \param t The transaction to use for this request. + * \param request_type The type of message to send. + * \param body The body of the request. + * \param len The returned length of the reply. + * \param result The returned body of the reply. + * + * \return 0 on success. Otherwise an errno indicating + * the cause of failure. + * + * \note The returned result is provided in malloced storage and thus + * must be free'd by the caller with 'free(*result, M_XENSTORE); + */ +static int +xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type, + const char *body, u_int *len, void **result) +{ + struct iovec iovec; + + iovec.iov_base = (void *)(uintptr_t)body; + iovec.iov_len = strlen(body) + 1; + + return (xs_talkv(t, request_type, &iovec, 1, len, result)); +} + +/*------------------------- XenStore Watch Support ---------------------------*/ +/** + * Transmit a watch request to the XenStore service. + * + * \param path The path in the XenStore to watch. + * \param tocken A unique identifier for this watch. + * + * \return 0 on success. Otherwise an errno indicating the + * cause of failure. + */ +static int +xs_watch(const char *path, const char *token) +{ + struct iovec iov[2]; + + iov[0].iov_base = (void *)(uintptr_t) path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = (void *)(uintptr_t) token; + iov[1].iov_len = strlen(token) + 1; + + return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL)); +} + +/** + * Transmit an uwatch request to the XenStore service. + * + * \param path The path in the XenStore to watch. + * \param tocken A unique identifier for this watch. + * + * \return 0 on success. Otherwise an errno indicating the + * cause of failure. + */ +static int +xs_unwatch(const char *path, const char *token) +{ + struct iovec iov[2]; + + iov[0].iov_base = (void *)(uintptr_t) path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = (void *)(uintptr_t) token; + iov[1].iov_len = strlen(token) + 1; + + return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL)); +} + +/** + * Convert from watch token (unique identifier) to the associated + * internal tracking structure for this watch. + * + * \param tocken The unique identifier for the watch to find. + * + * \return A pointer to the found watch structure or NULL. + */ +static struct xs_watch * +find_watch(const char *token) +{ + struct xs_watch *i, *cmp; + + cmp = (void *)strtoul(token, NULL, 16); + + LIST_FOREACH(i, &xs.registered_watches, list) + if (i == cmp) + return (i); + + return (NULL); +} + +/** + * Thread body of the XenStore watch event dispatch thread. + */ +static void +xenwatch_thread(void *unused) +{ + struct xs_stored_msg *msg; + + for (;;) { + + mtx_lock(&xs.watch_events_lock); + while (TAILQ_EMPTY(&xs.watch_events)) + mtx_sleep(&xs.watch_events, + &xs.watch_events_lock, + PWAIT | PCATCH, "waitev", hz/10); + + mtx_unlock(&xs.watch_events_lock); + sx_xlock(&xs.xenwatch_mutex); + + mtx_lock(&xs.watch_events_lock); + msg = TAILQ_FIRST(&xs.watch_events); + if (msg) + TAILQ_REMOVE(&xs.watch_events, msg, list); + mtx_unlock(&xs.watch_events_lock); + + if (msg != NULL) { + /* + * XXX There are messages coming in with a NULL + * XXX callback. This deserves further investigation; + * XXX the workaround here simply prevents the kernel + * XXX from panic'ing on startup. + */ + if (msg->u.watch.handle->callback != NULL) + msg->u.watch.handle->callback( + msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + free(msg->u.watch.vec, M_XENSTORE); + free(msg, M_XENSTORE); + } + + sx_xunlock(&xs.xenwatch_mutex); + } +} + +/*----------- XenStore Configuration, Initialization, and Control ------------*/ +/** + * Setup communication channels with the XenStore service. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + */ +static int +xs_init_comms(void) +{ + int error; + + if (xen_store->rsp_prod != xen_store->rsp_cons) { + log(LOG_WARNING, "XENSTORE response ring is not quiescent " + "(%08x:%08x): fixing up\n", + xen_store->rsp_cons, xen_store->rsp_prod); + xen_store->rsp_cons = xen_store->rsp_prod; + } + + if (xs.irq) + unbind_from_irqhandler(xs.irq); + + error = bind_caller_port_to_irqhandler(xs.evtchn, "xenstore", + xs_intr, NULL, INTR_TYPE_NET, &xs.irq); + if (error) { + log(LOG_WARNING, "XENSTORE request irq failed %i\n", error); + return (error); + } + + return (0); +} + +/*------------------ Private Device Attachment Functions --------------------*/ +static void +xs_identify(driver_t *driver, device_t parent) +{ + + BUS_ADD_CHILD(parent, 0, "xenstore", 0); +} + +/** + * Probe for the existance of the XenStore. + * + * \param dev + */ +static int +xs_probe(device_t dev) +{ + /* + * We are either operating within a PV kernel or being probed + * as the child of the successfully attached xenpci device. + * Thus we are in a Xen environment and there will be a XenStore. + * Uncontitionally return success. + */ + device_set_desc(dev, "XenStore"); +printf("xs_probe: Probe retuns 0\n"); + return (0); +} + +static void +xs_attach_deferred(void *arg) +{ + xs_dev_init(); + + bus_generic_probe(xs.xs_dev); + bus_generic_attach(xs.xs_dev); + + config_intrhook_disestablish(&xs.xs_attachcb); +} + +/** + * Attach to the XenStore. + * + * This routine also prepares for the probe/attach of drivers that rely + * on the XenStore. + */ +static int +xs_attach(device_t dev) +{ + int error; - error = xb_write(&msg, sizeof(msg), - &xs_state.request_mutex.lock_object); - if (error) { - sx_xunlock(&xs_state.request_mutex); - printf("xs_talkv failed %d\n", error); - return (error); + /* Allow us to get device_t from softc and vice-versa. */ + xs.xs_dev = dev; + device_set_softc(dev, &xs); + + /* + * This seems to be a layering violation. The XenStore is just + * one of many clients of the Grant Table facility. It happens + * to be the first and a gating consumer to all other devices, + * so this does work. A better place would be in the PV support + * code for fully PV kernels and the xenpci driver for HVM kernels. + */ + error = gnttab_init(); + if (error != 0) { + log(LOG_WARNING, + "XENSTORE: Error initializing grant tables: %d\n", error); + return (ENXIO); } - for (i = 0; i < num_vecs; i++) { - error = xb_write(iovec[i].iov_base, iovec[i].iov_len, - &xs_state.request_mutex.lock_object); - if (error) { - sx_xunlock(&xs_state.request_mutex); - printf("xs_talkv failed %d\n", error); - return (error); - } - } + /* Initialize the interface to xenstore. */ + struct proc *p; - error = xs_read_reply(&msg.type, len, &ret); +#ifdef XENHVM + xs.evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN); + xs.gpfn = hvm_get_parameter(HVM_PARAM_STORE_PFN); + xen_store = pmap_mapdev(xs.gpfn * PAGE_SIZE, PAGE_SIZE); +#else + xs.evtchn = xen_start_info->store_evtchn; +#endif + + TAILQ_INIT(&xs.reply_list); + TAILQ_INIT(&xs.watch_events); - sx_xunlock(&xs_state.request_mutex); + mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF); + mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF); + sx_init(&xs.xenwatch_mutex, "xenwatch"); + sx_init(&xs.request_mutex, "xenstore request"); + sx_init(&xs.suspend_mutex, "xenstore suspend"); + mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF); + mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF); + xs.irq = 0; + /* Initialize the shared memory rings to talk to xenstored */ + error = xs_init_comms(); if (error) return (error); - if (msg.type == XS_ERROR) { - error = xs_get_error(ret); - free(ret, M_DEVBUF); + error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID, + 0, "xenwatch"); + if (error) return (error); - } + xs.xenwatch_pid = p->p_pid; -#if 0 - if ((xenwatch_running == 0) && (xenwatch_inline == 0)) { - xenwatch_inline = 1; - while (!TAILQ_EMPTY(&watch_events) - && xenwatch_running == 0) { - - struct xs_stored_msg *wmsg = TAILQ_FIRST(&watch_events); - TAILQ_REMOVE(&watch_events, wmsg, list); - - wmsg->u.watch.handle->callback( - wmsg->u.watch.handle, - (const char **)wmsg->u.watch.vec, - wmsg->u.watch.vec_size); - free(wmsg->u.watch.vec, M_DEVBUF); - free(wmsg, M_DEVBUF); - } - xenwatch_inline = 0; - } -#endif - KASSERT(msg.type == type, ("bad xenstore message type")); + error = kproc_create(xs_rcv_thread, NULL, NULL, + RFHIGHPID, 0, "xenstore_rcv"); - if (result) - *result = ret; - else - free(ret, M_DEVBUF); + xs.xs_attachcb.ich_func = xs_attach_deferred; + xs.xs_attachcb.ich_arg = NULL; + config_intrhook_establish(&xs.xs_attachcb); - return (0); + return (error); } -/* Simplified version of xs_talkv: single message. */ +/** + * Prepare for suspension of this VM by halting XenStore access after + * all transactions and individual requests have completed. + */ static int -xs_single(struct xenbus_transaction t, enum xsd_sockmsg_type type, - const char *string, unsigned int *len, void **result) +xs_suspend(device_t dev __unused) { - struct iovec iovec; - iovec.iov_base = (void *)(uintptr_t) string; - iovec.iov_len = strlen(string) + 1; + sx_xlock(&xs.suspend_mutex); + sx_xlock(&xs.request_mutex); - return (xs_talkv(t, type, &iovec, 1, len, result)); + return (0); } -static unsigned int -count_strings(const char *strings, unsigned int len) +/** + * Resume XenStore operations after this VM is resumed. + */ +static int +xs_resume(device_t dev __unused) { - unsigned int num; - const char *p; - - for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) - num++; - - return num; -} + struct xs_watch *watch; + char token[sizeof(watch) * 2 + 1]; -/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ -static char * -join(const char *dir, const char *name) -{ - char *buffer; + xs_init_comms(); - buffer = malloc(strlen(dir) + strlen("/") + strlen(name) + 1, - M_DEVBUF, M_WAITOK); + sx_xunlock(&xs.request_mutex); - strcpy(buffer, dir); - if (strcmp(name, "")) { - strcat(buffer, "/"); - strcat(buffer, name); + /* + * No need for registered_watches_lock: the suspend_mutex + * is sufficient. + */ + LIST_FOREACH(watch, &xs.registered_watches, list) { + sprintf(token, "%lX", (long)watch); + xs_watch(watch->node, token); } - return (buffer); -} - -static char ** -split(char *strings, unsigned int len, unsigned int *num) -{ - char *p, **ret; - - /* Count the strings. */ - *num = count_strings(strings, len) + 1; + sx_xunlock(&xs.suspend_mutex); - /* Transfer to one big alloc for easy freeing. */ - ret = malloc(*num * sizeof(char *) + len, M_DEVBUF, M_WAITOK); - memcpy(&ret[*num], strings, len); - free(strings, M_DEVBUF); + return (0); +} - strings = (char *)&ret[*num]; - for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) - ret[(*num)++] = p; +/*-------------------- Private Device Attachment Data -----------------------*/ +static device_method_t xenstore_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, xs_identify), + DEVMETHOD(device_probe, xs_probe), + DEVMETHOD(device_attach, xs_attach), + DEVMETHOD(device_detach, bus_generic_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, xs_suspend), + DEVMETHOD(device_resume, xs_resume), + + /* Bus interface */ + DEVMETHOD(bus_add_child, bus_generic_add_child), + DEVMETHOD(bus_print_child, bus_generic_print_child), + DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), + DEVMETHOD(bus_release_resource, bus_generic_release_resource), + DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), + DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), + + { 0, 0 } +}; + +DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0); +static devclass_t xenstore_devclass; + +#ifdef XENHVM +DRIVER_MODULE(xenstore, xenpci, xenstore_driver, xenstore_devclass, 0, 0); +#else +DRIVER_MODULE(xenstore, nexus, xenstore_driver, xenstore_devclass, 0, 0); +#endif - ret[*num] = strings + len; - - return ret; -} +/*------------------------------- Sysctl Data --------------------------------*/ +/* XXX Shouldn't the node be somewhere else? */ +SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen"); +SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, ""); +SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, ""); -/* - * Return the contents of a directory in *result which should be freed - * with free(*result, M_DEVBUF). - */ +/*-------------------------------- Public API --------------------------------*/ +/*------- API comments for these methods can be found in xenstorevar.h -------*/ int -xenbus_directory(struct xenbus_transaction t, const char *dir, - const char *node, unsigned int *num, char ***result) +xs_directory(struct xs_transaction t, const char *dir, const char *node, + u_int *num, const char ***result) { - char *strings, *path; - unsigned int len = 0; + struct sbuf *path; + char *strings; + u_int len = 0; int error; - path = join(dir, node); - error = xs_single(t, XS_DIRECTORY, path, &len, (void **) &strings); - free(path, M_DEVBUF); + path = xs_join(dir, node); + error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len, + (void **)&strings); + sbuf_delete(path); if (error) return (error); *result = split(strings, len, num); + return (0); } -/* - * Check if a path exists. Return 1 if it does. - */ int -xenbus_exists(struct xenbus_transaction t, const char *dir, const char *node) +xs_exists(struct xs_transaction t, const char *dir, const char *node) { - char **d; + const char **d; int error, dir_n; - error = xenbus_directory(t, dir, node, &dir_n, &d); + error = xs_directory(t, dir, node, &dir_n, &d); if (error) return (0); - free(d, M_DEVBUF); + free(d, M_XENSTORE); return (1); } -/* - * Get the value of a single file. Returns the contents in *result - * which should be freed with free(*result, M_DEVBUF) after use. - * The length of the value in bytes is returned in *len. - */ int -xenbus_read(struct xenbus_transaction t, const char *dir, const char *node, - unsigned int *len, void **result) +xs_read(struct xs_transaction t, const char *dir, const char *node, + u_int *len, void **result) { - char *path; + struct sbuf *path; void *ret; int error; - path = join(dir, node); - error = xs_single(t, XS_READ, path, len, &ret); - free(path, M_DEVBUF); + path = xs_join(dir, node); + error = xs_single(t, XS_READ, sbuf_data(path), len, &ret); + sbuf_delete(path); if (error) return (error); *result = ret; return (0); } -/* - * Write the value of a single file. Returns error on failure. - */ int -xenbus_write(struct xenbus_transaction t, const char *dir, const char *node, +xs_write(struct xs_transaction t, const char *dir, const char *node, const char *string) { - char *path; + struct sbuf *path; struct iovec iovec[2]; int error; - path = join(dir, node); + path = xs_join(dir, node); - iovec[0].iov_base = (void *)(uintptr_t) path; - iovec[0].iov_len = strlen(path) + 1; + iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path); + iovec[0].iov_len = sbuf_len(path) + 1; iovec[1].iov_base = (void *)(uintptr_t) string; iovec[1].iov_len = strlen(string); error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL); - free(path, M_DEVBUF); + sbuf_delete(path); return (error); } -/* - * Create a new directory. - */ int -xenbus_mkdir(struct xenbus_transaction t, const char *dir, const char *node) +xs_mkdir(struct xs_transaction t, const char *dir, const char *node) { - char *path; + struct sbuf *path; int ret; - path = join(dir, node); - ret = xs_single(t, XS_MKDIR, path, NULL, NULL); - free(path, M_DEVBUF); + path = xs_join(dir, node); + ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL); + sbuf_delete(path); return (ret); } -/* - * Destroy a file or directory (directories must be empty). - */ int -xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node) +xs_rm(struct xs_transaction t, const char *dir, const char *node) { - char *path; + struct sbuf *path; int ret; - path = join(dir, node); - ret = xs_single(t, XS_RM, path, NULL, NULL); - free(path, M_DEVBUF); + path = xs_join(dir, node); + ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL); + sbuf_delete(path); return (ret); } -/* - * Start a transaction: changes by others will not be seen during this - * transaction, and changes will not be visible to others until end. - */ int -xenbus_transaction_start(struct xenbus_transaction *t) +xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node) { - char *id_str; + struct xs_transaction local_xbt; + struct sbuf *root_path_sbuf; + struct sbuf *cur_path_sbuf; + char *root_path; + char *cur_path; + const char **dir; int error; + int empty; - sx_slock(&xs_state.suspend_mutex); - error = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL, - (void **) &id_str); - if (error) { - sx_sunlock(&xs_state.suspend_mutex); - return (error); +retry: + root_path_sbuf = xs_join(base, node); + cur_path_sbuf = xs_join(base, node); + root_path = sbuf_data(root_path_sbuf); + cur_path = sbuf_data(cur_path_sbuf); + dir = NULL; + local_xbt.id = 0; + + if (xbt.id == 0) { + error = xs_transaction_start(&local_xbt); + if (error != 0) + goto out; + xbt = local_xbt; } - t->id = strtoul(id_str, NULL, 0); - free(id_str, M_DEVBUF); + empty = 0; + while (1) { + u_int count; + u_int i; - return (0); + error = xs_directory(xbt, cur_path, "", &count, &dir); + if (error) + goto out; + + for (i = 0; i < count; i++) { + error = xs_rm(xbt, cur_path, dir[i]); + if (error == ENOTEMPTY) { + struct sbuf *push_dir; + + /* + * Descend to clear out this sub directory. + * We'll return to cur_dir once push_dir + * is empty. + */ + push_dir = xs_join(cur_path, dir[i]); + sbuf_delete(cur_path_sbuf); + cur_path_sbuf = push_dir; + cur_path = sbuf_data(cur_path_sbuf); + break; + } else if (error != 0) { + goto out; + } + } + + free(dir, M_XENSTORE); + dir = NULL; + + if (i == count) { + char *last_slash; + + /* Directory is empty. It is now safe to remove. */ + error = xs_rm(xbt, cur_path, ""); + if (error != 0) + goto out; + + if (!strcmp(cur_path, root_path)) + break; + + /* Return to processing the parent directory. */ + last_slash = strrchr(cur_path, '/'); + KASSERT(last_slash != NULL, + ("xs_rm_tree: mangled path %s", cur_path)); + *last_slash = '\0'; + } + } + +out: + sbuf_delete(cur_path_sbuf); + sbuf_delete(root_path_sbuf); + if (dir != NULL) + free(dir, M_XENSTORE); + + if (local_xbt.id != 0) { + int terror; + + terror = xs_transaction_end(local_xbt, /*abort*/error != 0); + xbt.id = 0; + if (terror == EAGAIN && error == 0) + goto retry; + } + return (error); } -/* - * End a transaction. If abandon is true, transaction is discarded - * instead of committed. - */ -int xenbus_transaction_end(struct xenbus_transaction t, int abort) +int +xs_transaction_start(struct xs_transaction *t) { - char abortstr[2]; + char *id_str; int error; + error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL, + (void **)&id_str); + if (error == 0) { + t->id = strtoul(id_str, NULL, 0); + free(id_str, M_XENSTORE); + } + return (error); +} + +int +xs_transaction_end(struct xs_transaction t, int abort) +{ + char abortstr[2]; + if (abort) strcpy(abortstr, "F"); else strcpy(abortstr, "T"); - error = xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL); - - sx_sunlock(&xs_state.suspend_mutex); - - return (error); + return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL)); } -/* Single read and scanf: returns zero or errno. */ int -xenbus_scanf(struct xenbus_transaction t, - const char *dir, const char *node, int *scancountp, const char *fmt, ...) +xs_scanf(struct xs_transaction t, const char *dir, const char *node, + int *scancountp, const char *fmt, ...) { va_list ap; int error, ns; char *val; - error = xenbus_read(t, dir, node, NULL, (void **) &val); + error = xs_read(t, dir, node, NULL, (void **) &val); if (error) return (error); va_start(ap, fmt); ns = vsscanf(val, fmt, ap); va_end(ap); - free(val, M_DEVBUF); + free(val, M_XENSTORE); /* Distinctive errno. */ if (ns == 0) return (ERANGE); @@ -551,41 +1515,43 @@ return (0); } -/* Single printf and write: returns zero or errno. */ int -xenbus_printf(struct xenbus_transaction t, - const char *dir, const char *node, const char *fmt, ...) +xs_vprintf(struct xs_transaction t, + const char *dir, const char *node, const char *fmt, va_list ap) { - va_list ap; - int error, ret; -#define PRINTF_BUFFER_SIZE 4096 - char *printf_buffer; + struct sbuf *sb; + int error; - printf_buffer = malloc(PRINTF_BUFFER_SIZE, M_DEVBUF, M_WAITOK); + sb = sbuf_new_auto(); + sbuf_vprintf(sb, fmt, ap); + sbuf_finish(sb); + error = xs_write(t, dir, node, sbuf_data(sb)); + sbuf_delete(sb); - va_start(ap, fmt); - ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap); - va_end(ap); + return (error); +} - KASSERT(ret <= PRINTF_BUFFER_SIZE-1, ("xenbus_printf: message too large")); - error = xenbus_write(t, dir, node, printf_buffer); +int +xs_printf(struct xs_transaction t, const char *dir, const char *node, + const char *fmt, ...) +{ + va_list ap; + int error; - free(printf_buffer, M_DEVBUF); + va_start(ap, fmt); + error = xs_vprintf(t, dir, node, fmt, ap); + va_end(ap); return (error); } -/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ int -xenbus_gather(struct xenbus_transaction t, const char *dir, ...) +xs_gather(struct xs_transaction t, const char *dir, ...) { va_list ap; const char *name; - int error, i; + int error; - for (i = 0; i < 10000; i++) - HYPERVISOR_yield(); - va_start(ap, dir); error = 0; while (error == 0 && (name = va_arg(ap, char *)) != NULL) { @@ -593,14 +1559,14 @@ void *result = va_arg(ap, void *); char *p; - error = xenbus_read(t, dir, name, NULL, (void **) &p); + error = xs_read(t, dir, name, NULL, (void **) &p); if (error) break; if (fmt) { if (sscanf(p, fmt, result) == 0) error = EINVAL; - free(p, M_DEVBUF); + free(p, M_XENSTORE); } else *(char **)result = p; } @@ -609,49 +1575,8 @@ return (error); } -static int -xs_watch(const char *path, const char *token) -{ - struct iovec iov[2]; - - iov[0].iov_base = (void *)(uintptr_t) path; - iov[0].iov_len = strlen(path) + 1; - iov[1].iov_base = (void *)(uintptr_t) token; - iov[1].iov_len = strlen(token) + 1; - - return (xs_talkv(XBT_NIL, XS_WATCH, iov, 2, NULL, NULL)); -} - -static int -xs_unwatch(const char *path, const char *token) -{ - struct iovec iov[2]; - - iov[0].iov_base = (void *)(uintptr_t) path; - iov[0].iov_len = strlen(path) + 1; - iov[1].iov_base = (void *)(uintptr_t) token; - iov[1].iov_len = strlen(token) + 1; - - return (xs_talkv(XBT_NIL, XS_UNWATCH, iov, 2, NULL, NULL)); -} - -static struct xenbus_watch * -find_watch(const char *token) -{ - struct xenbus_watch *i, *cmp; - - cmp = (void *)strtoul(token, NULL, 16); - - LIST_FOREACH(i, &watches, list) - if (i == cmp) - return (i); - - return (NULL); -} - -/* Register callback to watch this node. */ int -register_xenbus_watch(struct xenbus_watch *watch) +xs_register_watch(struct xs_watch *watch) { /* Pointer in ascii is the token. */ char token[sizeof(watch) * 2 + 1]; @@ -659,277 +1584,71 @@ sprintf(token, "%lX", (long)watch); - sx_slock(&xs_state.suspend_mutex); + sx_slock(&xs.suspend_mutex); - mtx_lock(&watches_lock); + mtx_lock(&xs.registered_watches_lock); KASSERT(find_watch(token) == NULL, ("watch already registered")); - LIST_INSERT_HEAD(&watches, watch, list); - mtx_unlock(&watches_lock); + LIST_INSERT_HEAD(&xs.registered_watches, watch, list); + mtx_unlock(&xs.registered_watches_lock); error = xs_watch(watch->node, token); - + /* Ignore errors due to multiple registration. */ - if (error == EEXIST) { - mtx_lock(&watches_lock); + if (error == EEXIST) + error = 0; + + if (error != 0) { + mtx_lock(&xs.registered_watches_lock); LIST_REMOVE(watch, list); - mtx_unlock(&watches_lock); + mtx_unlock(&xs.registered_watches_lock); } - sx_sunlock(&xs_state.suspend_mutex); + sx_sunlock(&xs.suspend_mutex); return (error); } void -unregister_xenbus_watch(struct xenbus_watch *watch) +xs_unregister_watch(struct xs_watch *watch) { struct xs_stored_msg *msg, *tmp; char token[sizeof(watch) * 2 + 1]; int error; sprintf(token, "%lX", (long)watch); - - sx_slock(&xs_state.suspend_mutex); - mtx_lock(&watches_lock); - KASSERT(find_watch(token), ("watch not registered")); + sx_slock(&xs.suspend_mutex); + + mtx_lock(&xs.registered_watches_lock); + if (find_watch(token) == NULL) { + mtx_unlock(&xs.registered_watches_lock); + sx_sunlock(&xs.suspend_mutex); + return; + } LIST_REMOVE(watch, list); - mtx_unlock(&watches_lock); + mtx_unlock(&xs.registered_watches_lock); error = xs_unwatch(watch->node, token); if (error) - log(LOG_WARNING, "XENBUS Failed to release watch %s: %i\n", + log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n", watch->node, error); - sx_sunlock(&xs_state.suspend_mutex); + sx_sunlock(&xs.suspend_mutex); /* Cancel pending watch events. */ - mtx_lock(&watch_events_lock); - TAILQ_FOREACH_SAFE(msg, &watch_events, list, tmp) { + mtx_lock(&xs.watch_events_lock); + TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) { if (msg->u.watch.handle != watch) continue; - TAILQ_REMOVE(&watch_events, msg, list); - free(msg->u.watch.vec, M_DEVBUF); - free(msg, M_DEVBUF); + TAILQ_REMOVE(&xs.watch_events, msg, list); + free(msg->u.watch.vec, M_XENSTORE); + free(msg, M_XENSTORE); } - mtx_unlock(&watch_events_lock); + mtx_unlock(&xs.watch_events_lock); /* Flush any currently-executing callback, unless we are it. :-) */ - if (curproc->p_pid != xenwatch_pid) { - sx_xlock(&xenwatch_mutex); - sx_xunlock(&xenwatch_mutex); - } -} - -void -xs_suspend(void) -{ - - sx_xlock(&xs_state.suspend_mutex); - sx_xlock(&xs_state.request_mutex); -} - -void -xs_resume(void) -{ - struct xenbus_watch *watch; - char token[sizeof(watch) * 2 + 1]; - - sx_xunlock(&xs_state.request_mutex); - - /* No need for watches_lock: the suspend_mutex is sufficient. */ - LIST_FOREACH(watch, &watches, list) { - sprintf(token, "%lX", (long)watch); - xs_watch(watch->node, token); - } - - sx_xunlock(&xs_state.suspend_mutex); -} - -static void -xenwatch_thread(void *unused) -{ - struct xs_stored_msg *msg; - - for (;;) { - - mtx_lock(&watch_events_lock); - while (TAILQ_EMPTY(&watch_events)) - mtx_sleep(&watch_events_waitq, - &watch_events_lock, - PWAIT | PCATCH, "waitev", hz/10); - - mtx_unlock(&watch_events_lock); - sx_xlock(&xenwatch_mutex); - - mtx_lock(&watch_events_lock); - msg = TAILQ_FIRST(&watch_events); - if (msg) - TAILQ_REMOVE(&watch_events, msg, list); - mtx_unlock(&watch_events_lock); - - if (msg != NULL) { - /* - * XXX There are messages coming in with a NULL callback. - * XXX This deserves further investigation; the workaround - * XXX here simply prevents the kernel from panic'ing - * XXX on startup. - */ - if (msg->u.watch.handle->callback != NULL) - msg->u.watch.handle->callback( - msg->u.watch.handle, - (const char **)msg->u.watch.vec, - msg->u.watch.vec_size); - free(msg->u.watch.vec, M_DEVBUF); - free(msg, M_DEVBUF); - } - - sx_xunlock(&xenwatch_mutex); - } -} - -static int -xs_process_msg(enum xsd_sockmsg_type *type) -{ - struct xs_stored_msg *msg; - char *body; - int error; - - msg = malloc(sizeof(*msg), M_DEVBUF, M_WAITOK); - mtx_lock(&xs_state.reply_lock); - error = xb_read(&msg->hdr, sizeof(msg->hdr), - &xs_state.reply_lock.lock_object); - mtx_unlock(&xs_state.reply_lock); - if (error) { - free(msg, M_DEVBUF); - return (error); - } - - body = malloc(msg->hdr.len + 1, M_DEVBUF, M_WAITOK); - mtx_lock(&xs_state.reply_lock); - error = xb_read(body, msg->hdr.len, - &xs_state.reply_lock.lock_object); - mtx_unlock(&xs_state.reply_lock); - if (error) { - free(body, M_DEVBUF); - free(msg, M_DEVBUF); - return (error); - } - body[msg->hdr.len] = '\0'; - - *type = msg->hdr.type; - if (msg->hdr.type == XS_WATCH_EVENT) { - msg->u.watch.vec = split(body, msg->hdr.len, - &msg->u.watch.vec_size); - - mtx_lock(&watches_lock); - msg->u.watch.handle = find_watch( - msg->u.watch.vec[XS_WATCH_TOKEN]); - if (msg->u.watch.handle != NULL) { - mtx_lock(&watch_events_lock); - TAILQ_INSERT_TAIL(&watch_events, msg, list); - wakeup(&watch_events_waitq); - mtx_unlock(&watch_events_lock); - } else { - free(msg->u.watch.vec, M_DEVBUF); - free(msg, M_DEVBUF); - } - mtx_unlock(&watches_lock); - } else { - msg->u.reply.body = body; - mtx_lock(&xs_state.reply_lock); - TAILQ_INSERT_TAIL(&xs_state.reply_list, msg, list); - wakeup(&xs_state.reply_waitq); - mtx_unlock(&xs_state.reply_lock); - } - - return 0; -} - -static void -xenbus_thread(void *unused) -{ - int error; - enum xsd_sockmsg_type type; - xenbus_running = 1; - - for (;;) { - error = xs_process_msg(&type); - if (error) - printf("XENBUS error %d while reading message\n", - error); - } -} - -#ifdef XENHVM -static unsigned long xen_store_mfn; -char *xen_store; - -static inline unsigned long -hvm_get_parameter(int index) -{ - struct xen_hvm_param xhv; - int error; - - xhv.domid = DOMID_SELF; - xhv.index = index; - error = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); - if (error) { - printf("hvm_get_parameter: failed to get %d, error %d\n", - index, error); - return (0); + if (curproc->p_pid != xs.xenwatch_pid) { + sx_xlock(&xs.xenwatch_mutex); + sx_xunlock(&xs.xenwatch_mutex); } - return (xhv.value); -} - -#endif - -int -xs_init(void) -{ - int error; - struct proc *p; - -#ifdef XENHVM - xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN); - xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN); - xen_store = pmap_mapdev(xen_store_mfn * PAGE_SIZE, PAGE_SIZE); -#else - xen_store_evtchn = xen_start_info->store_evtchn; -#endif - - TAILQ_INIT(&xs_state.reply_list); - TAILQ_INIT(&watch_events); - sx_init(&xenwatch_mutex, "xenwatch"); - - - mtx_init(&xs_state.reply_lock, "state reply", NULL, MTX_DEF); - sx_init(&xs_state.request_mutex, "xenstore request"); - sx_init(&xs_state.suspend_mutex, "xenstore suspend"); - - -#if 0 - mtx_init(&xs_state.suspend_mutex, "xenstore suspend", NULL, MTX_DEF); - sema_init(&xs_state.request_mutex, 1, "xenstore request"); - sema_init(&xenwatch_mutex, 1, "xenwatch"); -#endif - mtx_init(&watches_lock, "watches", NULL, MTX_DEF); - mtx_init(&watch_events_lock, "watch events", NULL, MTX_DEF); - - /* Initialize the shared memory rings to talk to xenstored */ - error = xb_init_comms(); - if (error) - return (error); - - xenwatch_running = 1; - error = kproc_create(xenwatch_thread, NULL, &p, - RFHIGHPID, 0, "xenwatch"); - if (error) - return (error); - xenwatch_pid = p->p_pid; - - error = kproc_create(xenbus_thread, NULL, NULL, - RFHIGHPID, 0, "xenbus"); - - return (error); } diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenstore/xenstore.c.orig head.xen/sys/xen/xenstore/xenstore.c.orig --- head.moves/sys/xen/xenstore/xenstore.c.orig 1969-12-31 17:00:00.000000000 -0700 +++ head.xen/sys/xen/xenstore/xenstore.c.orig 2010-09-16 17:09:51.106482082 -0600 @@ -0,0 +1,935 @@ +/****************************************************************************** + * xenbus_xs.c + * + * This is the kernel equivalent of the "xs" library. We don't need everything + * and we use xenbus_comms for communication. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include +__FBSDID("$FreeBSD: head/sys/xen/xenstore/xenstore.c -1 $"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include + +static int xs_process_msg(enum xsd_sockmsg_type *type); + +int xenwatch_running = 0; +int xenbus_running = 0; +int xen_store_evtchn; + +struct xs_stored_msg { + TAILQ_ENTRY(xs_stored_msg) list; + + struct xsd_sockmsg hdr; + + union { + /* Queued replies. */ + struct { + char *body; + } reply; + + /* Queued watch events. */ + struct { + struct xenbus_watch *handle; + char **vec; + unsigned int vec_size; + } watch; + } u; +}; + +struct xs_handle { + /* A list of replies. Currently only one will ever be outstanding. */ + TAILQ_HEAD(xs_handle_list, xs_stored_msg) reply_list; + struct mtx reply_lock; + int reply_waitq; + + /* One request at a time. */ + struct sx request_mutex; + + /* Protect transactions against save/restore. */ + struct sx suspend_mutex; +}; + +static struct xs_handle xs_state; + +/* List of registered watches, and a lock to protect it. */ +static LIST_HEAD(watch_list_head, xenbus_watch) watches; +static struct mtx watches_lock; +/* List of pending watch callback events, and a lock to protect it. */ +static TAILQ_HEAD(event_list_head, xs_stored_msg) watch_events; +static struct mtx watch_events_lock; + +/* + * Details of the xenwatch callback kernel thread. The thread waits on the + * watch_events_waitq for work to do (queued on watch_events list). When it + * wakes up it acquires the xenwatch_mutex before reading the list and + * carrying out work. + */ +static pid_t xenwatch_pid; +struct sx xenwatch_mutex; +static int watch_events_waitq; + +#define xsd_error_count (sizeof(xsd_errors) / sizeof(xsd_errors[0])) + +static int +xs_get_error(const char *errorstring) +{ + unsigned int i; + + for (i = 0; i < xsd_error_count; i++) { + if (!strcmp(errorstring, xsd_errors[i].errstring)) + return (xsd_errors[i].errnum); + } + log(LOG_WARNING, "XENBUS xen store gave: unknown error %s", + errorstring); + return (EINVAL); +} + +extern void kdb_backtrace(void); + +static int +xs_read_reply(enum xsd_sockmsg_type *type, unsigned int *len, void **result) +{ + struct xs_stored_msg *msg; + char *body; + int error; + + mtx_lock(&xs_state.reply_lock); + + while (TAILQ_EMPTY(&xs_state.reply_list)) { + while (TAILQ_EMPTY(&xs_state.reply_list)) { + error = mtx_sleep(&xs_state.reply_waitq, + &xs_state.reply_lock, + PCATCH, "xswait", hz/10); + if (error && error != EWOULDBLOCK) { + mtx_unlock(&xs_state.reply_lock); + return (error); + } + } + } + + msg = TAILQ_FIRST(&xs_state.reply_list); + TAILQ_REMOVE(&xs_state.reply_list, msg, list); + + mtx_unlock(&xs_state.reply_lock); + + *type = msg->hdr.type; + if (len) + *len = msg->hdr.len; + body = msg->u.reply.body; + + free(msg, M_DEVBUF); + *result = body; + return (0); +} + +#if 0 +/* Emergency write. UNUSED*/ +void xenbus_debug_write(const char *str, unsigned int count) +{ + struct xsd_sockmsg msg = { 0 }; + + msg.type = XS_DEBUG; + msg.len = sizeof("print") + count + 1; + + sx_xlock(&xs_state.request_mutex); + xb_write(&msg, sizeof(msg)); + xb_write("print", sizeof("print")); + xb_write(str, count); + xb_write("", 1); + sx_xunlock(&xs_state.request_mutex); +} + +#endif + +int +xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **result) +{ + struct xsd_sockmsg req_msg = *msg; + int error; + + if (req_msg.type == XS_TRANSACTION_START) + sx_slock(&xs_state.suspend_mutex); + + sx_xlock(&xs_state.request_mutex); + + error = xb_write(msg, sizeof(*msg) + msg->len, + &xs_state.request_mutex.lock_object); + if (error) { + msg->type = XS_ERROR; + } else { + error = xs_read_reply(&msg->type, &msg->len, result); + } + + sx_xunlock(&xs_state.request_mutex); + + if ((msg->type == XS_TRANSACTION_END) || + ((req_msg.type == XS_TRANSACTION_START) && + (msg->type == XS_ERROR))) + sx_sunlock(&xs_state.suspend_mutex); + + return (error); +} + +/* + * Send message to xs. The reply is returned in *result and should be + * fred with free(*result, M_DEVBUF). Return zero on success or an + * error code on failure. + */ +static int +xs_talkv(struct xenbus_transaction t, enum xsd_sockmsg_type type, + const struct iovec *iovec, unsigned int num_vecs, + unsigned int *len, void **result) +{ + struct xsd_sockmsg msg; + void *ret = NULL; + unsigned int i; + int error; + + msg.tx_id = t.id; + msg.req_id = 0; + msg.type = type; + msg.len = 0; + for (i = 0; i < num_vecs; i++) + msg.len += iovec[i].iov_len; + + sx_xlock(&xs_state.request_mutex); + + error = xb_write(&msg, sizeof(msg), + &xs_state.request_mutex.lock_object); + if (error) { + sx_xunlock(&xs_state.request_mutex); + printf("xs_talkv failed %d\n", error); + return (error); + } + + for (i = 0; i < num_vecs; i++) { + error = xb_write(iovec[i].iov_base, iovec[i].iov_len, + &xs_state.request_mutex.lock_object); + if (error) { + sx_xunlock(&xs_state.request_mutex); + printf("xs_talkv failed %d\n", error); + return (error); + } + } + + error = xs_read_reply(&msg.type, len, &ret); + + sx_xunlock(&xs_state.request_mutex); + + if (error) + return (error); + + if (msg.type == XS_ERROR) { + error = xs_get_error(ret); + free(ret, M_DEVBUF); + return (error); + } + +#if 0 + if ((xenwatch_running == 0) && (xenwatch_inline == 0)) { + xenwatch_inline = 1; + while (!TAILQ_EMPTY(&watch_events) + && xenwatch_running == 0) { + + struct xs_stored_msg *wmsg = TAILQ_FIRST(&watch_events); + TAILQ_REMOVE(&watch_events, wmsg, list); + + wmsg->u.watch.handle->callback( + wmsg->u.watch.handle, + (const char **)wmsg->u.watch.vec, + wmsg->u.watch.vec_size); + free(wmsg->u.watch.vec, M_DEVBUF); + free(wmsg, M_DEVBUF); + } + xenwatch_inline = 0; + } +#endif + KASSERT(msg.type == type, ("bad xenstore message type")); + + if (result) + *result = ret; + else + free(ret, M_DEVBUF); + + return (0); +} + +/* Simplified version of xs_talkv: single message. */ +static int +xs_single(struct xenbus_transaction t, enum xsd_sockmsg_type type, + const char *string, unsigned int *len, void **result) +{ + struct iovec iovec; + + iovec.iov_base = (void *)(uintptr_t) string; + iovec.iov_len = strlen(string) + 1; + + return (xs_talkv(t, type, &iovec, 1, len, result)); +} + +static unsigned int +count_strings(const char *strings, unsigned int len) +{ + unsigned int num; + const char *p; + + for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) + num++; + + return num; +} + +/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ +static char * +join(const char *dir, const char *name) +{ + char *buffer; + + buffer = malloc(strlen(dir) + strlen("/") + strlen(name) + 1, + M_DEVBUF, M_WAITOK); + + strcpy(buffer, dir); + if (strcmp(name, "")) { + strcat(buffer, "/"); + strcat(buffer, name); + } + + return (buffer); +} + +static char ** +split(char *strings, unsigned int len, unsigned int *num) +{ + char *p, **ret; + + /* Count the strings. */ + *num = count_strings(strings, len) + 1; + + /* Transfer to one big alloc for easy freeing. */ + ret = malloc(*num * sizeof(char *) + len, M_DEVBUF, M_WAITOK); + memcpy(&ret[*num], strings, len); + free(strings, M_DEVBUF); + + strings = (char *)&ret[*num]; + for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) + ret[(*num)++] = p; + + ret[*num] = strings + len; + + return ret; +} + +/* + * Return the contents of a directory in *result which should be freed + * with free(*result, M_DEVBUF). + */ +int +xenbus_directory(struct xenbus_transaction t, const char *dir, + const char *node, unsigned int *num, char ***result) +{ + char *strings, *path; + unsigned int len = 0; + int error; + + path = join(dir, node); + error = xs_single(t, XS_DIRECTORY, path, &len, (void **) &strings); + free(path, M_DEVBUF); + if (error) + return (error); + + *result = split(strings, len, num); + return (0); +} + +/* + * Check if a path exists. Return 1 if it does. + */ +int +xenbus_exists(struct xenbus_transaction t, const char *dir, const char *node) +{ + char **d; + int error, dir_n; + + error = xenbus_directory(t, dir, node, &dir_n, &d); + if (error) + return (0); + free(d, M_DEVBUF); + return (1); +} + +/* + * Get the value of a single file. Returns the contents in *result + * which should be freed with free(*result, M_DEVBUF) after use. + * The length of the value in bytes is returned in *len. + */ +int +xenbus_read(struct xenbus_transaction t, const char *dir, const char *node, + unsigned int *len, void **result) +{ + char *path; + void *ret; + int error; + + path = join(dir, node); + error = xs_single(t, XS_READ, path, len, &ret); + free(path, M_DEVBUF); + if (error) + return (error); + *result = ret; + return (0); +} + +/* + * Write the value of a single file. Returns error on failure. + */ +int +xenbus_write(struct xenbus_transaction t, const char *dir, const char *node, + const char *string) +{ + char *path; + struct iovec iovec[2]; + int error; + + path = join(dir, node); + + iovec[0].iov_base = (void *)(uintptr_t) path; + iovec[0].iov_len = strlen(path) + 1; + iovec[1].iov_base = (void *)(uintptr_t) string; + iovec[1].iov_len = strlen(string); + + error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL); + free(path, M_DEVBUF); + + return (error); +} + +/* + * Create a new directory. + */ +int +xenbus_mkdir(struct xenbus_transaction t, const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + ret = xs_single(t, XS_MKDIR, path, NULL, NULL); + free(path, M_DEVBUF); + + return (ret); +} + +/* + * Destroy a file or directory (directories must be empty). + */ +int +xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + ret = xs_single(t, XS_RM, path, NULL, NULL); + free(path, M_DEVBUF); + + return (ret); +} + +/* + * Start a transaction: changes by others will not be seen during this + * transaction, and changes will not be visible to others until end. + */ +int +xenbus_transaction_start(struct xenbus_transaction *t) +{ + char *id_str; + int error; + + sx_slock(&xs_state.suspend_mutex); + error = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL, + (void **) &id_str); + if (error) { + sx_sunlock(&xs_state.suspend_mutex); + return (error); + } + + t->id = strtoul(id_str, NULL, 0); + free(id_str, M_DEVBUF); + + return (0); +} + +/* + * End a transaction. If abandon is true, transaction is discarded + * instead of committed. + */ +int xenbus_transaction_end(struct xenbus_transaction t, int abort) +{ + char abortstr[2]; + int error; + + if (abort) + strcpy(abortstr, "F"); + else + strcpy(abortstr, "T"); + + error = xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL); + + sx_sunlock(&xs_state.suspend_mutex); + + return (error); +} + +/* Single read and scanf: returns zero or errno. */ +int +xenbus_scanf(struct xenbus_transaction t, + const char *dir, const char *node, int *scancountp, const char *fmt, ...) +{ + va_list ap; + int error, ns; + char *val; + + error = xenbus_read(t, dir, node, NULL, (void **) &val); + if (error) + return (error); + + va_start(ap, fmt); + ns = vsscanf(val, fmt, ap); + va_end(ap); + free(val, M_DEVBUF); + /* Distinctive errno. */ + if (ns == 0) + return (ERANGE); + if (scancountp) + *scancountp = ns; + return (0); +} + +/* Single printf and write: returns zero or errno. */ +int +xenbus_printf(struct xenbus_transaction t, + const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int error, ret; +#define PRINTF_BUFFER_SIZE 4096 + char *printf_buffer; + + printf_buffer = malloc(PRINTF_BUFFER_SIZE, M_DEVBUF, M_WAITOK); + + va_start(ap, fmt); + ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap); + va_end(ap); + + KASSERT(ret <= PRINTF_BUFFER_SIZE-1, ("xenbus_printf: message too large")); + error = xenbus_write(t, dir, node, printf_buffer); + + free(printf_buffer, M_DEVBUF); + + return (error); +} + +/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ +int +xenbus_gather(struct xenbus_transaction t, const char *dir, ...) +{ + va_list ap; + const char *name; + int error, i; + + for (i = 0; i < 10000; i++) + HYPERVISOR_yield(); + + va_start(ap, dir); + error = 0; + while (error == 0 && (name = va_arg(ap, char *)) != NULL) { + const char *fmt = va_arg(ap, char *); + void *result = va_arg(ap, void *); + char *p; + + error = xenbus_read(t, dir, name, NULL, (void **) &p); + if (error) + break; + + if (fmt) { + if (sscanf(p, fmt, result) == 0) + error = EINVAL; + free(p, M_DEVBUF); + } else + *(char **)result = p; + } + va_end(ap); + + return (error); +} + +static int +xs_watch(const char *path, const char *token) +{ + struct iovec iov[2]; + + iov[0].iov_base = (void *)(uintptr_t) path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = (void *)(uintptr_t) token; + iov[1].iov_len = strlen(token) + 1; + + return (xs_talkv(XBT_NIL, XS_WATCH, iov, 2, NULL, NULL)); +} + +static int +xs_unwatch(const char *path, const char *token) +{ + struct iovec iov[2]; + + iov[0].iov_base = (void *)(uintptr_t) path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = (void *)(uintptr_t) token; + iov[1].iov_len = strlen(token) + 1; + + return (xs_talkv(XBT_NIL, XS_UNWATCH, iov, 2, NULL, NULL)); +} + +static struct xenbus_watch * +find_watch(const char *token) +{ + struct xenbus_watch *i, *cmp; + + cmp = (void *)strtoul(token, NULL, 16); + + LIST_FOREACH(i, &watches, list) + if (i == cmp) + return (i); + + return (NULL); +} + +/* Register callback to watch this node. */ +int +register_xenbus_watch(struct xenbus_watch *watch) +{ + /* Pointer in ascii is the token. */ + char token[sizeof(watch) * 2 + 1]; + int error; + + sprintf(token, "%lX", (long)watch); + + sx_slock(&xs_state.suspend_mutex); + + mtx_lock(&watches_lock); + KASSERT(find_watch(token) == NULL, ("watch already registered")); + LIST_INSERT_HEAD(&watches, watch, list); + mtx_unlock(&watches_lock); + + error = xs_watch(watch->node, token); + + /* Ignore errors due to multiple registration. */ + if (error == EEXIST) { + mtx_lock(&watches_lock); + LIST_REMOVE(watch, list); + mtx_unlock(&watches_lock); + } + + sx_sunlock(&xs_state.suspend_mutex); + + return (error); +} + +void +unregister_xenbus_watch(struct xenbus_watch *watch) +{ + struct xs_stored_msg *msg, *tmp; + char token[sizeof(watch) * 2 + 1]; + int error; + + sprintf(token, "%lX", (long)watch); + + sx_slock(&xs_state.suspend_mutex); + + mtx_lock(&watches_lock); + KASSERT(find_watch(token), ("watch not registered")); + LIST_REMOVE(watch, list); + mtx_unlock(&watches_lock); + + error = xs_unwatch(watch->node, token); + if (error) + log(LOG_WARNING, "XENBUS Failed to release watch %s: %i\n", + watch->node, error); + + sx_sunlock(&xs_state.suspend_mutex); + + /* Cancel pending watch events. */ + mtx_lock(&watch_events_lock); + TAILQ_FOREACH_SAFE(msg, &watch_events, list, tmp) { + if (msg->u.watch.handle != watch) + continue; + TAILQ_REMOVE(&watch_events, msg, list); + free(msg->u.watch.vec, M_DEVBUF); + free(msg, M_DEVBUF); + } + mtx_unlock(&watch_events_lock); + + /* Flush any currently-executing callback, unless we are it. :-) */ + if (curproc->p_pid != xenwatch_pid) { + sx_xlock(&xenwatch_mutex); + sx_xunlock(&xenwatch_mutex); + } +} + +void +xs_suspend(void) +{ + + sx_xlock(&xs_state.suspend_mutex); + sx_xlock(&xs_state.request_mutex); +} + +void +xs_resume(void) +{ + struct xenbus_watch *watch; + char token[sizeof(watch) * 2 + 1]; + + sx_xunlock(&xs_state.request_mutex); + + /* No need for watches_lock: the suspend_mutex is sufficient. */ + LIST_FOREACH(watch, &watches, list) { + sprintf(token, "%lX", (long)watch); + xs_watch(watch->node, token); + } + + sx_xunlock(&xs_state.suspend_mutex); +} + +static void +xenwatch_thread(void *unused) +{ + struct xs_stored_msg *msg; + + for (;;) { + + mtx_lock(&watch_events_lock); + while (TAILQ_EMPTY(&watch_events)) + mtx_sleep(&watch_events_waitq, + &watch_events_lock, + PWAIT | PCATCH, "waitev", hz/10); + + mtx_unlock(&watch_events_lock); + sx_xlock(&xenwatch_mutex); + + mtx_lock(&watch_events_lock); + msg = TAILQ_FIRST(&watch_events); + if (msg) + TAILQ_REMOVE(&watch_events, msg, list); + mtx_unlock(&watch_events_lock); + + if (msg != NULL) { + /* + * XXX There are messages coming in with a NULL callback. + * XXX This deserves further investigation; the workaround + * XXX here simply prevents the kernel from panic'ing + * XXX on startup. + */ + if (msg->u.watch.handle->callback != NULL) + msg->u.watch.handle->callback( + msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + free(msg->u.watch.vec, M_DEVBUF); + free(msg, M_DEVBUF); + } + + sx_xunlock(&xenwatch_mutex); + } +} + +static int +xs_process_msg(enum xsd_sockmsg_type *type) +{ + struct xs_stored_msg *msg; + char *body; + int error; + + msg = malloc(sizeof(*msg), M_DEVBUF, M_WAITOK); + mtx_lock(&xs_state.reply_lock); + error = xb_read(&msg->hdr, sizeof(msg->hdr), + &xs_state.reply_lock.lock_object); + mtx_unlock(&xs_state.reply_lock); + if (error) { + free(msg, M_DEVBUF); + return (error); + } + + body = malloc(msg->hdr.len + 1, M_DEVBUF, M_WAITOK); + mtx_lock(&xs_state.reply_lock); + error = xb_read(body, msg->hdr.len, + &xs_state.reply_lock.lock_object); + mtx_unlock(&xs_state.reply_lock); + if (error) { + free(body, M_DEVBUF); + free(msg, M_DEVBUF); + return (error); + } + body[msg->hdr.len] = '\0'; + + *type = msg->hdr.type; + if (msg->hdr.type == XS_WATCH_EVENT) { + msg->u.watch.vec = split(body, msg->hdr.len, + &msg->u.watch.vec_size); + + mtx_lock(&watches_lock); + msg->u.watch.handle = find_watch( + msg->u.watch.vec[XS_WATCH_TOKEN]); + if (msg->u.watch.handle != NULL) { + mtx_lock(&watch_events_lock); + TAILQ_INSERT_TAIL(&watch_events, msg, list); + wakeup(&watch_events_waitq); + mtx_unlock(&watch_events_lock); + } else { + free(msg->u.watch.vec, M_DEVBUF); + free(msg, M_DEVBUF); + } + mtx_unlock(&watches_lock); + } else { + msg->u.reply.body = body; + mtx_lock(&xs_state.reply_lock); + TAILQ_INSERT_TAIL(&xs_state.reply_list, msg, list); + wakeup(&xs_state.reply_waitq); + mtx_unlock(&xs_state.reply_lock); + } + + return 0; +} + +static void +xenbus_thread(void *unused) +{ + int error; + enum xsd_sockmsg_type type; + xenbus_running = 1; + + for (;;) { + error = xs_process_msg(&type); + if (error) + printf("XENBUS error %d while reading message\n", + error); + } +} + +#ifdef XENHVM +static unsigned long xen_store_mfn; +char *xen_store; + +static inline unsigned long +hvm_get_parameter(int index) +{ + struct xen_hvm_param xhv; + int error; + + xhv.domid = DOMID_SELF; + xhv.index = index; + error = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); + if (error) { + printf("hvm_get_parameter: failed to get %d, error %d\n", + index, error); + return (0); + } + return (xhv.value); +} + +#endif + +int +xs_init(void) +{ + int error; + struct proc *p; + +#ifdef XENHVM + xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN); + xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN); + xen_store = pmap_mapdev(xen_store_mfn * PAGE_SIZE, PAGE_SIZE); +#else + xen_store_evtchn = xen_start_info->store_evtchn; +#endif + + TAILQ_INIT(&xs_state.reply_list); + TAILQ_INIT(&watch_events); + sx_init(&xenwatch_mutex, "xenwatch"); + + + mtx_init(&xs_state.reply_lock, "state reply", NULL, MTX_DEF); + sx_init(&xs_state.request_mutex, "xenstore request"); + sx_init(&xs_state.suspend_mutex, "xenstore suspend"); + + +#if 0 + mtx_init(&xs_state.suspend_mutex, "xenstore suspend", NULL, MTX_DEF); + sema_init(&xs_state.request_mutex, 1, "xenstore request"); + sema_init(&xenwatch_mutex, 1, "xenwatch"); +#endif + mtx_init(&watches_lock, "watches", NULL, MTX_DEF); + mtx_init(&watch_events_lock, "watch events", NULL, MTX_DEF); + + /* Initialize the shared memory rings to talk to xenstored */ + error = xb_init_comms(); + if (error) + return (error); + + xenwatch_running = 1; + error = kproc_create(xenwatch_thread, NULL, &p, + RFHIGHPID, 0, "xenwatch"); + if (error) + return (error); + xenwatch_pid = p->p_pid; + + error = kproc_create(xenbus_thread, NULL, NULL, + RFHIGHPID, 0, "xenbus"); + + return (error); +} diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenstore/xenstore_dev.c head.xen/sys/xen/xenstore/xenstore_dev.c --- head.moves/sys/xen/xenstore/xenstore_dev.c 2010-09-17 07:57:41.604844324 -0600 +++ head.xen/sys/xen/xenstore/xenstore_dev.c 2010-09-16 17:23:24.831483916 -0600 @@ -1,8 +1,8 @@ /* - * xenbus_dev.c + * xenstore_dev.c * - * Driver giving user-space access to the kernel's xenbus connection - * to xenstore. + * Driver giving user-space access to the kernel's connection to the + * XenStore service. * * Copyright (c) 2005, Christian Limpach * Copyright (c) 2005, Rusty Russell, IBM Corporation @@ -45,18 +45,19 @@ #include #include + #include -#include -#include +#include +#include -struct xenbus_dev_transaction { - LIST_ENTRY(xenbus_dev_transaction) list; - struct xenbus_transaction handle; +struct xs_dev_transaction { + LIST_ENTRY(xs_dev_transaction) list; + struct xs_transaction handle; }; -struct xenbus_dev_data { +struct xs_dev_data { /* In-progress transaction. */ - LIST_HEAD(xdd_list_head, xenbus_dev_transaction) transactions; + LIST_HEAD(xdd_list_head, xs_dev_transaction) transactions; /* Partial request. */ unsigned int len; @@ -72,13 +73,13 @@ }; static int -xenbus_dev_read(struct cdev *dev, struct uio *uio, int ioflag) +xs_dev_read(struct cdev *dev, struct uio *uio, int ioflag) { int error; - struct xenbus_dev_data *u = dev->si_drv1; + struct xs_dev_data *u = dev->si_drv1; while (u->read_prod == u->read_cons) { - error = tsleep(u, PCATCH, "xbdread", hz/10); + error = tsleep(u, PCATCH, "xsdread", hz/10); if (error && error != EWOULDBLOCK) return (error); } @@ -96,7 +97,7 @@ } static void -queue_reply(struct xenbus_dev_data *u, char *data, unsigned int len) +xs_queue_reply(struct xs_dev_data *u, char *data, unsigned int len) { int i; @@ -110,11 +111,11 @@ } static int -xenbus_dev_write(struct cdev *dev, struct uio *uio, int ioflag) +xs_dev_write(struct cdev *dev, struct uio *uio, int ioflag) { int error; - struct xenbus_dev_data *u = dev->si_drv1; - struct xenbus_dev_transaction *trans; + struct xs_dev_data *u = dev->si_drv1; + struct xs_dev_transaction *trans; void *reply; int len = uio->uio_resid; @@ -141,10 +142,10 @@ case XS_MKDIR: case XS_RM: case XS_SET_PERMS: - error = xenbus_dev_request_and_reply(&u->u.msg, &reply); + error = xs_dev_request_and_reply(&u->u.msg, &reply); if (!error) { if (u->u.msg.type == XS_TRANSACTION_START) { - trans = malloc(sizeof(*trans), M_DEVBUF, + trans = malloc(sizeof(*trans), M_XENSTORE, M_WAITOK); trans->handle.id = strtoul(reply, NULL, 0); LIST_INSERT_HEAD(&u->transactions, trans, list); @@ -156,11 +157,11 @@ BUG_ON(&trans->list == &u->transactions); #endif LIST_REMOVE(trans, list); - free(trans, M_DEVBUF); + free(trans, M_XENSTORE); } - queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg)); - queue_reply(u, (char *)reply, u->u.msg.len); - free(reply, M_DEVBUF); + xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg)); + xs_queue_reply(u, (char *)reply, u->u.msg.len); + free(reply, M_XENSTORE); } break; @@ -176,16 +177,14 @@ } static int -xenbus_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) +xs_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) { - struct xenbus_dev_data *u; + struct xs_dev_data *u; - if (xen_store_evtchn == 0) - return (ENOENT); #if 0 /* XXX figure out if equiv needed */ nonseekable_open(inode, filp); #endif - u = malloc(sizeof(*u), M_DEVBUF, M_WAITOK|M_ZERO); + u = malloc(sizeof(*u), M_XENSTORE, M_WAITOK|M_ZERO); LIST_INIT(&u->transactions); dev->si_drv1 = u; @@ -193,37 +192,33 @@ } static int -xenbus_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) +xs_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) { - struct xenbus_dev_data *u = dev->si_drv1; - struct xenbus_dev_transaction *trans, *tmp; + struct xs_dev_data *u = dev->si_drv1; + struct xs_dev_transaction *trans, *tmp; LIST_FOREACH_SAFE(trans, &u->transactions, list, tmp) { - xenbus_transaction_end(trans->handle, 1); + xs_transaction_end(trans->handle, 1); LIST_REMOVE(trans, list); - free(trans, M_DEVBUF); + free(trans, M_XENSTORE); } - free(u, M_DEVBUF); + free(u, M_XENSTORE); return (0); } -static struct cdevsw xenbus_dev_cdevsw = { +static struct cdevsw xs_dev_cdevsw = { .d_version = D_VERSION, - .d_read = xenbus_dev_read, - .d_write = xenbus_dev_write, - .d_open = xenbus_dev_open, - .d_close = xenbus_dev_close, - .d_name = "xenbus_dev", + .d_read = xs_dev_read, + .d_write = xs_dev_write, + .d_open = xs_dev_open, + .d_close = xs_dev_close, + .d_name = "xs_dev", }; -static int -xenbus_dev_sysinit(void) +void +xs_dev_init() { - make_dev(&xenbus_dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0400, - "xen/xenbus"); - - return (0); + make_dev(&xs_dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0400, + "xen/xenstore"); } -SYSINIT(xenbus_dev_sysinit, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, - xenbus_dev_sysinit, NULL); diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenstore/xenstore_dev.c.orig head.xen/sys/xen/xenstore/xenstore_dev.c.orig --- head.moves/sys/xen/xenstore/xenstore_dev.c.orig 1969-12-31 17:00:00.000000000 -0700 +++ head.xen/sys/xen/xenstore/xenstore_dev.c.orig 2010-09-16 17:03:57.322660284 -0600 @@ -0,0 +1,229 @@ +/* + * xenbus_dev.c + * + * Driver giving user-space access to the kernel's xenbus connection + * to xenstore. + * + * Copyright (c) 2005, Christian Limpach + * Copyright (c) 2005, Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include +__FBSDID("$FreeBSD: head/sys/xen/xenstore/xenstore_dev.c -1 $"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +struct xenbus_dev_transaction { + LIST_ENTRY(xenbus_dev_transaction) list; + struct xenbus_transaction handle; +}; + +struct xenbus_dev_data { + /* In-progress transaction. */ + LIST_HEAD(xdd_list_head, xenbus_dev_transaction) transactions; + + /* Partial request. */ + unsigned int len; + union { + struct xsd_sockmsg msg; + char buffer[PAGE_SIZE]; + } u; + + /* Response queue. */ +#define MASK_READ_IDX(idx) ((idx)&(PAGE_SIZE-1)) + char read_buffer[PAGE_SIZE]; + unsigned int read_cons, read_prod; +}; + +static int +xenbus_dev_read(struct cdev *dev, struct uio *uio, int ioflag) +{ + int error; + struct xenbus_dev_data *u = dev->si_drv1; + + while (u->read_prod == u->read_cons) { + error = tsleep(u, PCATCH, "xbdread", hz/10); + if (error && error != EWOULDBLOCK) + return (error); + } + + while (uio->uio_resid > 0) { + if (u->read_cons == u->read_prod) + break; + error = uiomove(&u->read_buffer[MASK_READ_IDX(u->read_cons)], + 1, uio); + if (error) + return (error); + u->read_cons++; + } + return (0); +} + +static void +queue_reply(struct xenbus_dev_data *u, char *data, unsigned int len) +{ + int i; + + for (i = 0; i < len; i++, u->read_prod++) + u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i]; + + KASSERT((u->read_prod - u->read_cons) <= sizeof(u->read_buffer), + ("xenstore reply too big")); + + wakeup(u); +} + +static int +xenbus_dev_write(struct cdev *dev, struct uio *uio, int ioflag) +{ + int error; + struct xenbus_dev_data *u = dev->si_drv1; + struct xenbus_dev_transaction *trans; + void *reply; + int len = uio->uio_resid; + + if ((len + u->len) > sizeof(u->u.buffer)) + return (EINVAL); + + error = uiomove(u->u.buffer + u->len, len, uio); + if (error) + return (error); + + u->len += len; + if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) + return (0); + + switch (u->u.msg.type) { + case XS_TRANSACTION_START: + case XS_TRANSACTION_END: + case XS_DIRECTORY: + case XS_READ: + case XS_GET_PERMS: + case XS_RELEASE: + case XS_GET_DOMAIN_PATH: + case XS_WRITE: + case XS_MKDIR: + case XS_RM: + case XS_SET_PERMS: + error = xenbus_dev_request_and_reply(&u->u.msg, &reply); + if (!error) { + if (u->u.msg.type == XS_TRANSACTION_START) { + trans = malloc(sizeof(*trans), M_DEVBUF, + M_WAITOK); + trans->handle.id = strtoul(reply, NULL, 0); + LIST_INSERT_HEAD(&u->transactions, trans, list); + } else if (u->u.msg.type == XS_TRANSACTION_END) { + LIST_FOREACH(trans, &u->transactions, list) + if (trans->handle.id == u->u.msg.tx_id) + break; +#if 0 /* XXX does this mean the list is empty? */ + BUG_ON(&trans->list == &u->transactions); +#endif + LIST_REMOVE(trans, list); + free(trans, M_DEVBUF); + } + queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg)); + queue_reply(u, (char *)reply, u->u.msg.len); + free(reply, M_DEVBUF); + } + break; + + default: + error = EINVAL; + break; + } + + if (error == 0) + u->len = 0; + + return (error); +} + +static int +xenbus_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) +{ + struct xenbus_dev_data *u; + + if (xen_store_evtchn == 0) + return (ENOENT); +#if 0 /* XXX figure out if equiv needed */ + nonseekable_open(inode, filp); +#endif + u = malloc(sizeof(*u), M_DEVBUF, M_WAITOK|M_ZERO); + LIST_INIT(&u->transactions); + dev->si_drv1 = u; + + return (0); +} + +static int +xenbus_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) +{ + struct xenbus_dev_data *u = dev->si_drv1; + struct xenbus_dev_transaction *trans, *tmp; + + LIST_FOREACH_SAFE(trans, &u->transactions, list, tmp) { + xenbus_transaction_end(trans->handle, 1); + LIST_REMOVE(trans, list); + free(trans, M_DEVBUF); + } + + free(u, M_DEVBUF); + return (0); +} + +static struct cdevsw xenbus_dev_cdevsw = { + .d_version = D_VERSION, + .d_read = xenbus_dev_read, + .d_write = xenbus_dev_write, + .d_open = xenbus_dev_open, + .d_close = xenbus_dev_close, + .d_name = "xenbus_dev", +}; + +static int +xenbus_dev_sysinit(void) +{ + make_dev(&xenbus_dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0400, + "xen/xenbus"); + + return (0); +} +SYSINIT(xenbus_dev_sysinit, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, + xenbus_dev_sysinit, NULL); diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenstore/xenstore_internal.h head.xen/sys/xen/xenstore/xenstore_internal.h --- head.moves/sys/xen/xenstore/xenstore_internal.h 1969-12-31 17:00:00.000000000 -0700 +++ head.xen/sys/xen/xenstore/xenstore_internal.h 2010-09-16 17:19:06.305262959 -0600 @@ -0,0 +1,39 @@ +/*- + * Core definitions and data structures shareable across OS platforms. + * + * Copyright (c) 2010 Justin T. Gibbs, Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * $FreeBSD$ + */ + +/* Initialize support for userspace access to the XenStore. */ +void xs_dev_init(void); + +/* Used by the XenStore character device to borrow kernel's store connection. */ +int xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result); diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenstore/xenstorevar.h head.xen/sys/xen/xenstore/xenstorevar.h --- head.moves/sys/xen/xenstore/xenstorevar.h 2010-09-17 07:58:07.491802875 -0600 +++ head.xen/sys/xen/xenstore/xenstorevar.h 2010-09-16 17:23:24.846544931 -0600 @@ -1,10 +1,11 @@ /****************************************************************************** - * xenbus.h + * xenstorevar.h * - * Talks to Xen Store to figure out what devices we have. + * Method declarations and structures for accessing the XenStore.h * * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2005 XenSource Ltd. + * Copyright (C) 2009,2010 Justin Gibbs, Spectra Logic Corporation * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: @@ -27,233 +28,311 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * - * $FreeBSD: head/sys/xen/xenstore/xenstorevar.h -1 $ + * $FreeBSD: head/sys/xen/xenbus/xenbusvar.h 186557 2008-12-29 06:31:03Z kmacy $ */ -#ifndef _XEN_XENBUS_XENBUSVAR_H -#define _XEN_XENBUS_XENBUSVAR_H +#ifndef _XEN_XENSTORE_XENSTOREVAR_H +#define _XEN_XENSTORE_XENSTOREVAR_H #include #include #include +#include +#include + +#include #include + +#include #include #include #include "xenbus_if.h" -enum { - /* - * Path of this device node. - */ - XENBUS_IVAR_NODE, - - /* - * The device type (e.g. vif, vbd). - */ - XENBUS_IVAR_TYPE, - - /* - * The state of this device (not the otherend's state). - */ - XENBUS_IVAR_STATE, - - /* - * Domain ID of the other end device. - */ - XENBUS_IVAR_OTHEREND_ID, - - /* - * Path of the other end device. - */ - XENBUS_IVAR_OTHEREND_PATH -}; +/* XenStore allocations including XenStore data returned to clients. */ +MALLOC_DECLARE(M_XENSTORE); -/* - * Simplified accessors for xenbus devices - */ -#define XENBUS_ACCESSOR(var, ivar, type) \ - __BUS_ACCESSOR(xenbus, var, XENBUS, ivar, type) +struct xenstore_domain_interface; +struct xs_watch; +extern struct xenstore_domain_interface *xen_store; -XENBUS_ACCESSOR(node, NODE, const char *) -XENBUS_ACCESSOR(type, TYPE, const char *) -XENBUS_ACCESSOR(state, STATE, enum xenbus_state) -XENBUS_ACCESSOR(otherend_id, OTHEREND_ID, int) -XENBUS_ACCESSOR(otherend_path, OTHEREND_PATH, const char *) +typedef void (xs_watch_cb_t)(struct xs_watch *, + const char **vec, unsigned int len); -/* Register callback to watch this node. */ -struct xenbus_watch +/* Register callback to watch subtree (node) in the XenStore. */ +struct xs_watch { - LIST_ENTRY(xenbus_watch) list; + LIST_ENTRY(xs_watch) list; /* Path being watched. */ char *node; /* Callback (executed in a process context with no locks held). */ - void (*callback)(struct xenbus_watch *, - const char **vec, unsigned int len); + xs_watch_cb_t *callback; }; +LIST_HEAD(xs_watch_list, xs_watch); -typedef int (*xenstore_event_handler_t)(void *); +typedef int (*xs_event_handler_t)(void *); -struct xenbus_transaction +struct xs_transaction { - uint32_t id; + uint32_t id; }; -#define XBT_NIL ((struct xenbus_transaction) { 0 }) +#define XST_NIL ((struct xs_transaction) { 0 }) -int xenbus_directory(struct xenbus_transaction t, const char *dir, - const char *node, unsigned int *num, char ***result); -int xenbus_read(struct xenbus_transaction t, const char *dir, - const char *node, unsigned int *len, void **result); -int xenbus_write(struct xenbus_transaction t, const char *dir, - const char *node, const char *string); -int xenbus_mkdir(struct xenbus_transaction t, const char *dir, - const char *node); -int xenbus_exists(struct xenbus_transaction t, const char *dir, - const char *node); -int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node); -int xenbus_transaction_start(struct xenbus_transaction *t); -int xenbus_transaction_end(struct xenbus_transaction t, int abort); - -/* - * Single read and scanf: returns errno or zero. If scancountp is - * non-null, then number of items scanned is returned in *scanncountp. +/** + * Fetch the contents of a directory in the XenStore. + * + * \param t The XenStore transaction covering this request. + * \param dir The dirname of the path to read. + * \param node The basename of the path to read. + * \param num The returned number of directory entries. + * \param result An array of directory entry strings. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * \note The results buffer is malloced and should be free'd by the + * caller with 'free(*result, M_XENSTORE)'. */ -int xenbus_scanf(struct xenbus_transaction t, - const char *dir, const char *node, int *scancountp, const char *fmt, ...) - __attribute__((format(scanf, 5, 6))); - -/* Single printf and write: returns errno or 0. */ -int xenbus_printf(struct xenbus_transaction t, - const char *dir, const char *node, const char *fmt, ...) - __attribute__((format(printf, 4, 5))); +int xs_directory(struct xs_transaction t, const char *dir, + const char *node, unsigned int *num, const char ***result); -/* - * Generic read function: NULL-terminated triples of name, - * sprintf-style type string, and pointer. Returns 0 or errno. +/** + * Determine if a path exists in the XenStore. + * + * \param t The XenStore transaction covering this request. + * \param dir The dirname of the path to read. + * \param node The basename of the path to read. + * + * \retval 1 The path exists. + * \retval 0 The path does not exist or an error occurred attempting + * to make that determination. */ -int xenbus_gather(struct xenbus_transaction t, const char *dir, ...); - -/* notifer routines for when the xenstore comes up */ -int register_xenstore_notifier(xenstore_event_handler_t func, void *arg, int priority); -#if 0 -void unregister_xenstore_notifier(); -#endif -int register_xenbus_watch(struct xenbus_watch *watch); -void unregister_xenbus_watch(struct xenbus_watch *watch); -void xs_suspend(void); -void xs_resume(void); - -/* Used by xenbus_dev to borrow kernel's store connection. */ -int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **result); - -#if 0 - -#define XENBUS_IS_ERR_READ(str) ({ \ - if (!IS_ERR(str) && strlen(str) == 0) { \ - free(str, M_DEVBUF); \ - str = ERR_PTR(-ERANGE); \ - } \ - IS_ERR(str); \ -}) - -#endif - -#define XENBUS_EXIST_ERR(err) ((err) == ENOENT || (err) == ERANGE) - +int xs_exists(struct xs_transaction t, const char *dir, const char *node); /** - * Register a watch on the given path, using the given xenbus_watch structure - * for storage, and the given callback function as the callback. Return 0 on - * success, or errno on error. On success, the given path will be saved as - * watch->node, and remains the caller's to free. On error, watch->node will - * be NULL, the device will switch to XenbusStateClosing, and the error will - * be saved in the store. + * Get the contents of a single "file". Returns the contents in + * *result which should be freed with free(*result, M_XENSTORE) after + * use. The length of the value in bytes is returned in *len. + * + * \param t The XenStore transaction covering this request. + * \param dir The dirname of the file to read. + * \param node The basename of the file to read. + * \param len The amount of data read. + * \param result The returned contents from this file. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * \note The results buffer is malloced and should be free'd by the + * caller with 'free(*result, M_XENSTORE)'. */ -int xenbus_watch_path(device_t dev, char *path, - struct xenbus_watch *watch, - void (*callback)(struct xenbus_watch *, - const char **, unsigned int)); - +int xs_read(struct xs_transaction t, const char *dir, + const char *node, unsigned int *len, void **result); /** - * Register a watch on the given path/path2, using the given xenbus_watch - * structure for storage, and the given callback function as the callback. - * Return 0 on success, or errno on error. On success, the watched path - * (path/path2) will be saved as watch->node, and becomes the caller's to - * kfree(). On error, watch->node will be NULL, so the caller has nothing to - * free, the device will switch to XenbusStateClosing, and the error will be - * saved in the store. + * Write to a single file. + * + * \param t The XenStore transaction covering this request. + * \param dir The dirname of the file to write. + * \param node The basename of the file to write. + * \param string The NUL terminated string of data to write. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. */ -int xenbus_watch_path2(device_t dev, const char *path, - const char *path2, struct xenbus_watch *watch, - void (*callback)(struct xenbus_watch *, - const char **, unsigned int)); - +int xs_write(struct xs_transaction t, const char *dir, + const char *node, const char *string); /** - * Advertise in the store a change of the given driver to the given new_state. - * which case this is performed inside its own transaction. Return 0 on - * success, or errno on error. On error, the device will switch to - * XenbusStateClosing, and the error will be saved in the store. + * Create a new directory. + * + * \param t The XenStore transaction covering this request. + * \param dir The dirname of the directory to create. + * \param node The basename of the directory to create. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. */ -int xenbus_switch_state(device_t dev, - XenbusState new_state); - +int xs_mkdir(struct xs_transaction t, const char *dir, + const char *node); /** - * Grant access to the given ring_mfn to the peer of the given device. - * Return 0 on success, or errno on error. On error, the device will - * switch to XenbusStateClosing, and the error will be saved in the - * store. The grant ring reference is returned in *refp. + * Remove a file or directory (directories must be empty). + * + * \param t The XenStore transaction covering this request. + * \param dir The dirname of the directory to remove. + * \param node The basename of the directory to remove. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. */ -int xenbus_grant_ring(device_t dev, unsigned long ring_mfn, int *refp); - +int xs_rm(struct xs_transaction t, const char *dir, const char *node); /** - * Allocate an event channel for the given xenbus_device, assigning the newly - * created local port to *port. Return 0 on success, or errno on error. On - * error, the device will switch to XenbusStateClosing, and the error will be - * saved in the store. + * Destroy a tree of files rooted at dir/node. + * + * \param t The XenStore transaction covering this request. + * \param dir The dirname of the directory to remove. + * \param node The basename of the directory to remove. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. */ -int xenbus_alloc_evtchn(device_t dev, int *port); - +int xs_rm_tree(struct xs_transaction t, const char *dir, + const char *node); /** - * Free an existing event channel. Returns 0 on success or errno on error. + * Start a transaction. + * + * Changes by others will not be seen during the lifetime of this + * transaction, and changes will not be visible to others until it + * is committed (xs_transaction_end). + * + * \param t The returned transaction. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. */ -int xenbus_free_evtchn(device_t dev, int port); - +int xs_transaction_start(struct xs_transaction *t); /** - * Return the state of the driver rooted at the given store path, or - * XenbusStateClosed if no state can be read. + * End a transaction. + * + * \param t The transaction to end/commit. + * \param abort If non-zero, the transaction is discarded + * instead of committed. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. */ -XenbusState xenbus_read_driver_state(const char *path); +int xs_transaction_end(struct xs_transaction t, int abort); +/* + * Single file read and scanf parsing of the result. + * + * \param t The XenStore transaction covering this request. + * \param dir The dirname of the path to read. + * \param node The basename of the path to read. + * \param scancountp The number of input values assigned (i.e. the result + * of scanf). + * \param fmt Scanf format string followed by a variable number of + * scanf input arguments. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + */ +int xs_scanf(struct xs_transaction t, + const char *dir, const char *node, int *scancountp, const char *fmt, ...) + __attribute__((format(scanf, 5, 6))); -/*** - * Report the given negative errno into the store, along with the given - * formatted message. +/** + * Printf formatted write to a XenStore file. + * + * \param t The XenStore transaction covering this request. + * \param dir The dirname of the path to read. + * \param node The basename of the path to read. + * \param fmt Printf format string followed by a variable number of + * printf arguments. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of write failure. */ -void xenbus_dev_error(device_t dev, int err, const char *fmt, - ...); +int xs_printf(struct xs_transaction t, const char *dir, + const char *node, const char *fmt, ...) + __attribute__((format(printf, 4, 5))); +/** + * va_list version of xenbus_printf(). + * + * \param t The XenStore transaction covering this request. + * \param dir The dirname of the path to read. + * \param node The basename of the path to read. + * \param fmt Printf format string. + * \param ap Va_list of printf arguments. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of write failure. + */ +int xs_vprintf(struct xs_transaction t, const char *dir, + const char *node, const char *fmt, va_list ap); -/*** - * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by - * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly - * closedown of this driver and its peer. +/** + * Multi-file read within a single directory and scanf parsing of + * the results. + * + * \param t The XenStore transaction covering this request. + * \param dir The dirname of the paths to read. + * \param ... A variable number of argument triples specifying + * the file name, scanf-style format string, and + * output variable (pointer to storage of the results). + * The last triple in the call must be terminated + * will a final NULL argument. A NULL format string + * will cause the entire contents of the given file + * to be assigned as a NUL terminated, M_XENSTORE heap + * backed, string to the output parameter of that tuple. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of read failure. + * + * Example: + * char protocol_abi[64]; + * uint32_t ring_ref; + * char *dev_type; + * int error; + * + * error = xenbus_gather(XBT_NIL, xenbus_get_node(dev), + * "ring-ref", "%" PRIu32, &ring_ref, + * "protocol", "%63s", protocol_abi, + * "device-type", NULL, &dev_type, + * NULL); + * + * ... + * + * free(dev_type, M_XENSTORE); */ -void xenbus_dev_fatal(device_t dev, int err, const char *fmt, - ...); +int xs_gather(struct xs_transaction t, const char *dir, ...); -int xenbus_dev_init(void); +/** + * Register a XenStore watch. + * + * XenStore watches allow a client to be notified via a callback (embedded + * within the watch object) of changes to an object in the XenStore. + * + * \param watch A xenbus_watch struct with it's node and callback fields + * properly initialized. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of write failure. EEXIST errors from the XenStore + * are supressed, allowing multiple, physically different, + * xenbus_watch objects, to watch the same path in the XenStore. + */ +int xs_register_watch(struct xs_watch *watch); + +/** + * Unregister a XenStore watch. + * + * \param watch An xs_watch object previously used in a successful call + * to xs_register_watch(). + * + * The xs_watch object's node field is not altered by this call. + * It is the caller's responsibility to properly dispose of both the + * watch object and the data pointed to by watch->node. + */ +void xs_unregister_watch(struct xs_watch *watch); -const char *xenbus_strstate(enum xenbus_state state); -int xenbus_dev_is_online(device_t dev); -int xenbus_frontend_closed(device_t dev); +/** + * Allocate and return an sbuf containing the XenStore path string + * /. If name is the NUL string, the returned sbuf contains + * the path string . + * + * \param dir The NUL terminated directory prefix for new path. + * \param name The NUL terminated basename for the new path. + * + * \return A buffer containing the joined path. + */ +struct sbuf *xs_join(const char *, const char *); -#endif /* _XEN_XENBUS_XENBUSVAR_H */ +#endif /* _XEN_XENSTORE_XENSTOREVAR_H */ diff -x .svn -x '*.o' -I '.*\$FreeBSD.*' -Nur head.moves/sys/xen/xenstore/xenstorevar.h.orig head.xen/sys/xen/xenstore/xenstorevar.h.orig --- head.moves/sys/xen/xenstore/xenstorevar.h.orig 1969-12-31 17:00:00.000000000 -0700 +++ head.xen/sys/xen/xenstore/xenstorevar.h.orig 2010-09-16 17:21:47.894771224 -0600 @@ -0,0 +1,284 @@ +/****************************************************************************** + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 XenSource Ltd. + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * $FreeBSD: head/sys/xen/xenstore/xenstorevar.h -1 $ + */ + +/** + * \file xenbusvar.h + * + * \brief Datastructures and function declarations for usedby device + * drivers operating on the XenBus. + */ + +#ifndef _XEN_XENBUS_XENBUSVAR_H +#define _XEN_XENBUS_XENBUSVAR_H + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include + +#include "xenbus_if.h" + +/* XenBus allocations including XenStore data returned to clients. */ +MALLOC_DECLARE(M_XENBUS); + +enum { + /** + * Path of this device node. + */ + XENBUS_IVAR_NODE, + + /** + * The device type (e.g. vif, vbd). + */ + XENBUS_IVAR_TYPE, + + /** + * The device identifier (e.g. "832"). + */ + XENBUS_IVAR_DEVID, + + /** + * The state of this device (not the otherend's state). + */ + XENBUS_IVAR_STATE, + + /** + * Domain ID of the other end device. + */ + XENBUS_IVAR_OTHEREND_ID, + + /** + * Path of the other end device. + */ + XENBUS_IVAR_OTHEREND_PATH +}; + +/** + * Simplified accessors for xenbus devices + */ +#define XENBUS_ACCESSOR(var, ivar, type) \ + __BUS_ACCESSOR(xenbus, var, XENBUS, ivar, type) + +XENBUS_ACCESSOR(node, NODE, const char *) +XENBUS_ACCESSOR(type, TYPE, const char *) +XENBUS_ACCESSOR(devid, DEVID, const char *) +XENBUS_ACCESSOR(state, STATE, enum xenbus_state) +XENBUS_ACCESSOR(otherend_id, OTHEREND_ID, int) +XENBUS_ACCESSOR(otherend_path, OTHEREND_PATH, const char *) + +/** + * Return the state of a XenBus device. + * + * \param path The root XenStore path for the device. + * + * \return The current state of the device or XenbusStateClosed if no + * state can be read. + */ +XenbusState xenbus_read_driver_state(const char *path); + +/** + * Initialize and register a watch on the given path (client suplied storage). + * + * \param dev The XenBus device requesting the watch service. + * \param path The XenStore path of the object to be watched. The + * storage for this string must be stable for the lifetime + * of the watch. + * \param watch The watch object to use for this request. This object + * must be stable for the lifetime of the watch. + * \param callback The function to call when XenStore objects at or below + * path are modified. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * \note On error, the device 'dev' will be switched to the XenbusStateClosing + * state and the returned error is saved in the per-device error node + * for dev in the XenStore. + */ +int xenbus_watch_path(device_t dev, char *path, + struct xs_watch *watch, + xs_watch_cb_t *callback); + +/** + * Initialize and register a watch at path/path2 in the XenStore. + * + * \param dev The XenBus device requesting the watch service. + * \param path The base XenStore path of the object to be watched. + * \param path2 The tail XenStore path of the object to be watched. + * \param watch The watch object to use for this request. This object + * must be stable for the lifetime of the watch. + * \param callback The function to call when XenStore objects at or below + * path are modified. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * \note On error, \a dev will be switched to the XenbusStateClosing + * state and the returned error is saved in the per-device error node + * for \a dev in the XenStore. + * + * Similar to xenbus_watch_path, however the storage for the path to the + * watched object is allocated from the heap and filled with "path '/' path2". + * Should a call to this function succeed, it is the callers responsibility + * to free watch->node using the M_XENBUS malloc type. + */ +int xenbus_watch_path2(device_t dev, const char *path, + const char *path2, struct xs_watch *watch, + xs_watch_cb_t *callback); + +/** + * Grant access to the given ring_mfn to the peer of the given device. + * + * \param dev The device granting access to the ring page. + * \param ring_mfn The guest machine page number of the page to grant + * peer access rights. + * \param refp[out] The grant reference for the page. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * A successful call to xenbus_grant_ring should be paired with a call + * to gnttab_end_foreign_access() when foregn access to this page is no + * longer requried. + * + * \note On error, \a dev will be switched to the XenbusStateClosing + * state and the returned error is saved in the per-device error node + * for \a dev in the XenStore. + */ +int xenbus_grant_ring(device_t dev, unsigned long ring_mfn, grant_ref_t *refp); + +/** + * Allocate an event channel for the given XenBus device. + * + * \param dev The device for which to allocate the event channel. + * \param port[out] The port identifier for the allocated event channel. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * A successfully allocated event channel should be free'd using + * xenbus_free_evtchn(). + * + * \note On error, \a dev will be switched to the XenbusStateClosing + * state and the returned error is saved in the per-device error node + * for \a dev in the XenStore. + */ +int xenbus_alloc_evtchn(device_t dev, evtchn_port_t *port); + +/** + * Free an existing event channel. + * + * \param dev The device which allocated this event channel. + * \param port The port identifier for the event channel to free. + * + * \return On success, 0. Otherwise an errno value indicating the + * type of failure. + * + * \note On error, \a dev will be switched to the XenbusStateClosing + * state and the returned error is saved in the per-device error node + * for \a dev in the XenStore. + */ +int xenbus_free_evtchn(device_t dev, evtchn_port_t port); + +/** + * Record the given errno, along with the given, printf-style, formatted + * message in dev's device specific error node in the XenStore. + * + * \param dev The device which encountered the error. + * \param err The errno value corresponding to the error. + * \param fmt Printf format string followed by a variable number of + * printf arguments. + */ +void xenbus_dev_error(device_t dev, int err, const char *fmt, ...) + __attribute__((format(printf, 3, 4))); + +/** + * va_list version of xenbus_dev_error(). + * + * \param dev The device which encountered the error. + * \param err The errno value corresponding to the error. + * \param fmt Printf format string. + * \param ap Va_list of printf arguments. + */ +void xenbus_dev_verror(device_t dev, int err, const char *fmt, va_list ap) + __attribute__((format(printf, 3, 0))); + +/** + * Equivalent to xenbus_dev_error(), followed by + * xenbus_set_state(dev, XenbusStateClosing). + * + * \param dev The device which encountered the error. + * \param err The errno value corresponding to the error. + * \param fmt Printf format string followed by a variable number of + * printf arguments. + */ +void xenbus_dev_fatal(device_t dev, int err, const char *fmt, ...) + __attribute__((format(printf, 3, 4))); + +/** + * va_list version of xenbus_dev_fatal(). + * + * \param dev The device which encountered the error. + * \param err The errno value corresponding to the error. + * \param fmt Printf format string. + * \param ap Va_list of printf arguments. + */ +void xenbus_dev_vfatal(device_t dev, int err, const char *fmt, va_list) + __attribute__((format(printf, 3, 0))); + +/** + * Convert a member of the xenbus_state enum into an ASCII string. + * + * /param state The XenBus state to lookup. + * + * /return A string representing state or, for unrecognized states, + * the string "Unknown". + */ +const char *xenbus_strstate(enum xenbus_state state); + +/** + * Return the value of a XenBus device's "online" node within the XenStore. + * + * \param dev The XenBus device to query. + * + * \return The value of the "online" node for the device. If the node + * does not exist, 0 (offline) is returned. + */ +int xenbus_dev_is_online(device_t dev); + +#endif /* _XEN_XENBUS_XENBUSVAR_H */