diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index c6c62ae..ef4ad07 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -4274,6 +4274,30 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 	pagecopy((void *)src, (void *)dst);
 }
 
+void
+pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
+    vm_offset_t b_offset, int xfersize)
+{
+	void *a_cp, *b_cp;
+	vm_offset_t a_pg_offset, b_pg_offset;
+	int cnt;
+
+	while (xfersize > 0) {
+		a_pg_offset = a_offset & PAGE_MASK;
+		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
+		a_cp = (char *)PHYS_TO_DMAP(ma[a_offset >> PAGE_SHIFT]->
+		    phys_addr) + a_pg_offset;
+		b_pg_offset = b_offset & PAGE_MASK;
+		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
+		b_cp = (char *)PHYS_TO_DMAP(mb[b_offset >> PAGE_SHIFT]->
+		    phys_addr) + b_pg_offset;
+		bcopy(a_cp, b_cp, cnt);
+		a_offset += cnt;
+		b_offset += cnt;
+		xfersize -= cnt;
+	}
+}
+
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c
index 72ef310..dd209ab 100644
--- a/sys/arm/arm/pmap-v6.c
+++ b/sys/arm/arm/pmap-v6.c
@@ -3314,6 +3314,42 @@ pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst)
 }
 
 void
+pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
+    vm_offset_t b_offset, int xfersize)
+{
+	vm_page_t a_pg, b_pg;
+	vm_offset_t a_pg_offset, b_pg_offset;
+	int cnt;
+
+	mtx_lock(&cmtx);
+	while (xfersize > 0) {
+		a_pg = ma[a_offset >> PAGE_SHIFT];
+		a_pg_offset = a_offset & PAGE_MASK;
+		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
+		b_pg = mb[b_offset >> PAGE_SHIFT];
+		b_pg_offset = b_offset & PAGE_MASK;
+		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
+		*csrc_pte = L2_S_PROTO | VM_PAGE_TO_PHYS(a_pg) |
+		    pte_l2_s_cache_mode;
+		pmap_set_prot(csrc_pte, VM_PROT_READ, 0);
+		PTE_SYNC(csrc_pte);
+		*cdst_pte = L2_S_PROTO | VM_PAGE_TO_PHYS(b_pg) |
+		    pte_l2_s_cache_mode;
+		pmap_set_prot(cdst_pte, VM_PROT_READ | VM_PROT_WRITE, 0);
+		PTE_SYNC(cdst_pte);
+		cpu_tlb_flushD_SE(csrcp);
+		cpu_tlb_flushD_SE(cdstp);
+		cpu_cpwait();
+		bcopy((char *)csrcp + a_pg_offset, (char *)cdstp + b_pg_offset,
+		    cnt);
+		cpu_idcache_wbinv_range(cdstp + b_pg_offset, cnt);
+		pmap_l2cache_wbinv_range(cdstp + b_pg_offset,
+		    VM_PAGE_TO_PHYS(b_pg) + b_pg_offset, cnt);
+	}
+	mtx_unlock(&cmtx);
+}
+
+void
 pmap_copy_page(vm_page_t src, vm_page_t dst)
 {
 
diff --git a/sys/arm/arm/pmap.c b/sys/arm/arm/pmap.c
index 28b4912..9f2ec71 100644
--- a/sys/arm/arm/pmap.c
+++ b/sys/arm/arm/pmap.c
@@ -258,6 +258,9 @@ pt_entry_t	pte_l1_c_proto;
 pt_entry_t	pte_l2_s_proto;
 
 void		(*pmap_copy_page_func)(vm_paddr_t, vm_paddr_t);
+void		(*pmap_copy_page_offs_func)(vm_paddr_t a_phys,
+		    vm_offset_t a_offs, vm_paddr_t b_phys, vm_offset_t b_offs,
+		    int cnt);
 void		(*pmap_zero_page_func)(vm_paddr_t, int, int);
 
 struct msgbuf *msgbufp = 0;
@@ -401,6 +404,13 @@ static struct vm_object pvzone_obj;
 static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
 static struct rwlock pvh_global_lock;
 
+void pmap_copy_page_offs_generic(vm_paddr_t a_phys, vm_offset_t a_offs,
+    vm_paddr_t b_phys, vm_offset_t b_offs, int cnt);
+#if ARM_MMU_XSCALE == 1
+void pmap_copy_page_offs_xscale(vm_paddr_t a_phys, vm_offset_t a_offs,
+    vm_paddr_t b_phys, vm_offset_t b_offs, int cnt);
+#endif
+
 /*
  * This list exists for the benefit of pmap_map_chunk().  It keeps track
  * of the kernel L2 tables during bootstrap, so that pmap_map_chunk() can
@@ -485,6 +495,7 @@ pmap_pte_init_generic(void)
 	pte_l2_s_proto = L2_S_PROTO_generic;
 
 	pmap_copy_page_func = pmap_copy_page_generic;
+	pmap_copy_page_offs_func = pmap_copy_page_offs_generic;
 	pmap_zero_page_func = pmap_zero_page_generic;
 }
 
@@ -661,6 +672,7 @@ pmap_pte_init_xscale(void)
 
 #ifdef CPU_XSCALE_CORE3
 	pmap_copy_page_func = pmap_copy_page_generic;
+	pmap_copy_page_offs_func = pmap_copy_page_offs_generic;
 	pmap_zero_page_func = pmap_zero_page_generic;
 	xscale_use_minidata = 0;
 	/* Make sure it is L2-cachable */
@@ -673,6 +685,7 @@ pmap_pte_init_xscale(void)
 
 #else
 	pmap_copy_page_func = pmap_copy_page_xscale;
+	pmap_copy_page_offs_func = pmap_copy_page_offs_xscale;
 	pmap_zero_page_func = pmap_zero_page_xscale;
 #endif
 
@@ -4301,6 +4314,29 @@ pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst)
 	cpu_l2cache_inv_range(csrcp, PAGE_SIZE);
 	cpu_l2cache_wbinv_range(cdstp, PAGE_SIZE);
 }
+
+void
+pmap_copy_page_offs_generic(vm_paddr_t a_phys, vm_offset_t a_offs,
+    vm_paddr_t b_phys, vm_offset_t b_offs, int cnt)
+{
+
+	mtx_lock(&cmtx);
+	*csrc_pte = L2_S_PROTO | a_phys |
+	    L2_S_PROT(PTE_KERNEL, VM_PROT_READ) | pte_l2_s_cache_mode;
+	PTE_SYNC(csrc_pte);
+	*cdst_pte = L2_S_PROTO | b_phys |
+	    L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode;
+	PTE_SYNC(cdst_pte);
+	cpu_tlb_flushD_SE(csrcp);
+	cpu_tlb_flushD_SE(cdstp);
+	cpu_cpwait();
+	bcopy((char *)csrcp + a_offs, (char *)cdstp + b_offs, cnt);
+	mtx_unlock(&cmtx);
+	cpu_dcache_inv_range(csrcp + a_offs, cnt);
+	cpu_dcache_wbinv_range(cdstp + b_offs, cnt);
+	cpu_l2cache_inv_range(csrcp + a_offs, cnt);
+	cpu_l2cache_wbinv_range(cdstp + b_offs, cnt);
+}
 #endif /* (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0 */
 
 #if ARM_MMU_XSCALE == 1
@@ -4345,6 +4381,28 @@ pmap_copy_page_xscale(vm_paddr_t src, vm_paddr_t dst)
 	mtx_unlock(&cmtx);
 	xscale_cache_clean_minidata();
 }
+
+void
+pmap_copy_page_offs_xscale(vm_paddr_t a_phys, vm_offset_t a_offs,
+    vm_paddr_t b_phys, vm_offset_t b_offs, int cnt)
+{
+
+	mtx_lock(&cmtx);
+	*csrc_pte = L2_S_PROTO | a_phys |
+	    L2_S_PROT(PTE_KERNEL, VM_PROT_READ) |
+	    L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);
+	PTE_SYNC(csrc_pte);
+	*cdst_pte = L2_S_PROTO | b_phys |
+	    L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) |
+	    L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);
+	PTE_SYNC(cdst_pte);
+	cpu_tlb_flushD_SE(csrcp);
+	cpu_tlb_flushD_SE(cdstp);
+	cpu_cpwait();
+	bcopy((char *)csrcp + a_offs, (char *)cdstp + b_offs, cnt);
+	mtx_unlock(&cmtx);
+	xscale_cache_clean_minidata();
+}
 #endif /* ARM_MMU_XSCALE == 1 */
 
 void
@@ -4371,8 +4429,38 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
 #endif
 }
 
+void
+pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
+    vm_offset_t b_offset, int xfersize)
+{
+	vm_page_t a_pg, b_pg;
+	vm_offset_t a_pg_offset, b_pg_offset;
+	int cnt;
+#ifdef ARM_USE_SMALL_ALLOC
+	vm_offset_t a_va, b_va;
+#endif
 
-
+	cpu_dcache_wbinv_all();
+	cpu_l2cache_wbinv_all();
+	while (xfersize > 0) {
+		a_pg = ma[a_offset >> PAGE_SHIFT];
+		a_pg_offset = a_offset & PAGE_MASK;
+		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
+		b_pg = mb[b_offset >> PAGE_SHIFT];
+		b_pg_offset = b_offset & PAGE_MASK;
+		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
+#ifdef ARM_USE_SMALL_ALLOC
+		a_va = arm_ptovirt(VM_PAGE_TO_PHYS(a_pg)) + a_pg_offset;
+		b_va = arm_ptovirt(VM_PAGE_TO_PHYS(b_pg)) + b_pg_offset;
+		bcopy((char *)a_va, (char *)b_va, cnt);
+		cpu_dcache_wbinv_range(b_va, cnt);
+		cpu_l2cache_wbinv_range(b_va, cnt);
+#else
+		pmap_copy_page_offs_func(VM_PAGE_TO_PHYS(a_pg), a_pg_offset,
+		    VM_PAGE_TO_PHYS(b_pg), b_pg_offset, cnt);
+#endif
+	}
+}
 
 /*
  * this routine returns true if a physical page resides
diff --git a/sys/arm/include/pmap.h b/sys/arm/include/pmap.h
index 523499f..7c8d073 100644
--- a/sys/arm/include/pmap.h
+++ b/sys/arm/include/pmap.h
@@ -533,6 +533,8 @@ extern pt_entry_t		pte_l1_c_proto;
 extern pt_entry_t		pte_l2_s_proto;
 
 extern void (*pmap_copy_page_func)(vm_paddr_t, vm_paddr_t);
+extern void (*pmap_copy_page_offs_func)(vm_paddr_t a_phys,
+    vm_offset_t a_offs, vm_paddr_t b_phys, vm_offset_t b_offs, int cnt);
 extern void (*pmap_zero_page_func)(vm_paddr_t, int, int);
 
 #if (ARM_MMU_GENERIC + ARM_MMU_V6 + ARM_MMU_V7 + ARM_MMU_SA1) != 0 || defined(CPU_XSCALE_81342)
diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c
index 4252197..c700e7c 100644
--- a/sys/cam/ata/ata_da.c
+++ b/sys/cam/ata/ata_da.c
@@ -1167,6 +1167,8 @@ adaregister(struct cam_periph *periph, void *arg)
 	    ((softc->flags & ADA_FLAG_CAN_CFA) &&
 	    !(softc->flags & ADA_FLAG_CAN_48BIT)))
 		softc->disk->d_flags |= DISKFLAG_CANDELETE;
+	if ((cpi.hba_misc & PIM_UNMAPPED) != 0)
+		softc->disk->d_flags |= DISKFLAG_UNMAPPED_BIO;
 	strlcpy(softc->disk->d_descr, cgd->ident_data.model,
 	    MIN(sizeof(softc->disk->d_descr), sizeof(cgd->ident_data.model)));
 	strlcpy(softc->disk->d_ident, cgd->ident_data.serial,
@@ -1431,13 +1433,19 @@ adastart(struct cam_periph *periph, union ccb *start_ccb)
 				return;
 			}
 #endif
+			KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 ||
+			    round_page(bp->bio_bcount + bp->bio_ma_offset) /
+			    PAGE_SIZE == bp->bio_ma_n,
+			    ("Short bio %p", bp));
 			cam_fill_ataio(ataio,
 			    ada_retry_count,
 			    adadone,
-			    bp->bio_cmd == BIO_READ ?
-			        CAM_DIR_IN : CAM_DIR_OUT,
+			    (bp->bio_cmd == BIO_READ ? CAM_DIR_IN :
+				CAM_DIR_OUT) | ((bp->bio_flags & BIO_UNMAPPED)
+				!= 0 ? CAM_DATA_BIO : 0),
 			    tag_code,
-			    bp->bio_data,
+			    ((bp->bio_flags & BIO_UNMAPPED) != 0) ? (void *)bp :
+				bp->bio_data,
 			    bp->bio_bcount,
 			    ada_default_timeout*1000);
 
diff --git a/sys/cam/cam_ccb.h b/sys/cam/cam_ccb.h
index a80880a..bcbf414 100644
--- a/sys/cam/cam_ccb.h
+++ b/sys/cam/cam_ccb.h
@@ -42,7 +42,6 @@
 #include <cam/scsi/scsi_all.h>
 #include <cam/ata/ata_all.h>
 
-
 /* General allocation length definitions for CCB structures */
 #define	IOCDBLEN	CAM_MAX_CDBLEN	/* Space for CDB bytes/pointer */
 #define	VUHBALEN	14		/* Vendor Unique HBA length */
@@ -572,7 +571,8 @@ typedef enum {
 	PIM_NOINITIATOR	= 0x20,	/* Initiator role not supported. */
 	PIM_NOBUSRESET	= 0x10,	/* User has disabled initial BUS RESET */
 	PIM_NO_6_BYTE	= 0x08,	/* Do not send 6-byte commands */
-	PIM_SEQSCAN	= 0x04	/* Do bus scans sequentially, not in parallel */
+	PIM_SEQSCAN	= 0x04,	/* Do bus scans sequentially, not in parallel */
+	PIM_UNMAPPED	= 0x02,
 } pi_miscflag;
 
 /* Path Inquiry CCB */
diff --git a/sys/cam/cam_periph.c b/sys/cam/cam_periph.c
index 523e549..fa4fa04 100644
--- a/sys/cam/cam_periph.c
+++ b/sys/cam/cam_periph.c
@@ -734,6 +734,8 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo)
 	case XPT_CONT_TARGET_IO:
 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE)
 			return(0);
+		KASSERT((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_VADDR,
+		    ("not VADDR for SCSI_IO %p %x\n", ccb, ccb->ccb_h.flags));
 
 		data_ptrs[0] = &ccb->csio.data_ptr;
 		lengths[0] = ccb->csio.dxfer_len;
@@ -743,6 +745,8 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo)
 	case XPT_ATA_IO:
 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE)
 			return(0);
+		KASSERT((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_VADDR,
+		    ("not VADDR for ATA_IO %p %x\n", ccb, ccb->ccb_h.flags));
 
 		data_ptrs[0] = &ccb->ataio.data_ptr;
 		lengths[0] = ccb->ataio.dxfer_len;
@@ -846,7 +850,7 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo)
 		 * into a larger area of VM, or if userland races against
 		 * vmapbuf() after the useracc() check.
 		 */
-		if (vmapbuf(mapinfo->bp[i]) < 0) {
+		if (vmapbuf(mapinfo->bp[i], 1) < 0) {
 			for (j = 0; j < i; ++j) {
 				*data_ptrs[j] = mapinfo->bp[j]->b_saveaddr;
 				vunmapbuf(mapinfo->bp[j]);
diff --git a/sys/cam/scsi/scsi_all.c b/sys/cam/scsi/scsi_all.c
index 9dac9c0..332732f 100644
--- a/sys/cam/scsi/scsi_all.c
+++ b/sys/cam/scsi/scsi_all.c
@@ -5679,7 +5679,11 @@ scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
 		u_int8_t *data_ptr, u_int32_t dxfer_len, u_int8_t sense_len,
 		u_int32_t timeout)
 {
+	int read;
 	u_int8_t cdb_len;
+
+	read = (readop & SCSI_RW_DIRMASK) == SCSI_RW_READ;
+
 	/*
 	 * Use the smallest possible command to perform the operation
 	 * as some legacy hardware does not support the 10 byte commands.
@@ -5696,7 +5700,7 @@ scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
 		struct scsi_rw_6 *scsi_cmd;
 
 		scsi_cmd = (struct scsi_rw_6 *)&csio->cdb_io.cdb_bytes;
-		scsi_cmd->opcode = readop ? READ_6 : WRITE_6;
+		scsi_cmd->opcode = read ? READ_6 : WRITE_6;
 		scsi_ulto3b(lba, scsi_cmd->addr);
 		scsi_cmd->length = block_count & 0xff;
 		scsi_cmd->control = 0;
@@ -5715,7 +5719,7 @@ scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
 		struct scsi_rw_10 *scsi_cmd;
 
 		scsi_cmd = (struct scsi_rw_10 *)&csio->cdb_io.cdb_bytes;
-		scsi_cmd->opcode = readop ? READ_10 : WRITE_10;
+		scsi_cmd->opcode = read ? READ_10 : WRITE_10;
 		scsi_cmd->byte2 = byte2;
 		scsi_ulto4b(lba, scsi_cmd->addr);
 		scsi_cmd->reserved = 0;
@@ -5738,7 +5742,7 @@ scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
 		struct scsi_rw_12 *scsi_cmd;
 
 		scsi_cmd = (struct scsi_rw_12 *)&csio->cdb_io.cdb_bytes;
-		scsi_cmd->opcode = readop ? READ_12 : WRITE_12;
+		scsi_cmd->opcode = read ? READ_12 : WRITE_12;
 		scsi_cmd->byte2 = byte2;
 		scsi_ulto4b(lba, scsi_cmd->addr);
 		scsi_cmd->reserved = 0;
@@ -5760,7 +5764,7 @@ scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
 		struct scsi_rw_16 *scsi_cmd;
 
 		scsi_cmd = (struct scsi_rw_16 *)&csio->cdb_io.cdb_bytes;
-		scsi_cmd->opcode = readop ? READ_16 : WRITE_16;
+		scsi_cmd->opcode = read ? READ_16 : WRITE_16;
 		scsi_cmd->byte2 = byte2;
 		scsi_u64to8b(lba, scsi_cmd->addr);
 		scsi_cmd->reserved = 0;
@@ -5771,7 +5775,8 @@ scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
 	cam_fill_csio(csio,
 		      retries,
 		      cbfcnp,
-		      /*flags*/readop ? CAM_DIR_IN : CAM_DIR_OUT,
+		      (read ? CAM_DIR_IN : CAM_DIR_OUT) |
+		      ((readop & SCSI_RW_BIO) != 0 ? CAM_DATA_BIO : 0),
 		      tag_action,
 		      data_ptr,
 		      dxfer_len,
diff --git a/sys/cam/scsi/scsi_all.h b/sys/cam/scsi/scsi_all.h
index 0693e1c..330330d 100644
--- a/sys/cam/scsi/scsi_all.h
+++ b/sys/cam/scsi/scsi_all.h
@@ -2354,6 +2354,10 @@ void scsi_write_buffer(struct ccb_scsiio *csio, u_int32_t retries,
 			uint8_t *data_ptr, uint32_t param_list_length,
 			uint8_t sense_len, uint32_t timeout);
 
+#define	SCSI_RW_READ	0x0001
+#define	SCSI_RW_WRITE	0x0002
+#define	SCSI_RW_DIRMASK	0x0003
+#define	SCSI_RW_BIO	0x1000
 void scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
 		     void (*cbfcnp)(struct cam_periph *, union ccb *),
 		     u_int8_t tag_action, int readop, u_int8_t byte2, 
diff --git a/sys/cam/scsi/scsi_cd.c b/sys/cam/scsi/scsi_cd.c
index a7c4c5b..a6d340f 100644
--- a/sys/cam/scsi/scsi_cd.c
+++ b/sys/cam/scsi/scsi_cd.c
@@ -1575,7 +1575,8 @@ cdstart(struct cam_periph *periph, union ccb *start_ccb)
 					/*retries*/ cd_retry_count,
 					/* cbfcnp */ cddone,
 					MSG_SIMPLE_Q_TAG,
-					/* read */bp->bio_cmd == BIO_READ,
+					/* read */bp->bio_cmd == BIO_READ ?
+					SCSI_RW_READ : SCSI_RW_WRITE,
 					/* byte2 */ 0,
 					/* minimum_cmd_size */ 10,
 					/* lba */ bp->bio_offset /
diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c
index 7854215..c886e9e 100644
--- a/sys/cam/scsi/scsi_da.c
+++ b/sys/cam/scsi/scsi_da.c
@@ -1180,7 +1180,7 @@ dadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t leng
 				/*retries*/0,
 				dadone,
 				MSG_ORDERED_Q_TAG,
-				/*read*/FALSE,
+				/*read*/SCSI_RW_WRITE,
 				/*byte2*/0,
 				/*minimum_cmd_size*/ softc->minimum_cmd_size,
 				offset / secsize,
@@ -1753,6 +1753,8 @@ daregister(struct cam_periph *periph, void *arg)
 	softc->disk->d_flags = 0;
 	if ((softc->quirks & DA_Q_NO_SYNC_CACHE) == 0)
 		softc->disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
+	if ((cpi.hba_misc & PIM_UNMAPPED) != 0)
+		softc->disk->d_flags |= DISKFLAG_UNMAPPED_BIO;
 	cam_strvis(softc->disk->d_descr, cgd->inq_data.vendor,
 	    sizeof(cgd->inq_data.vendor), sizeof(softc->disk->d_descr));
 	strlcat(softc->disk->d_descr, " ", sizeof(softc->disk->d_descr));
@@ -1981,14 +1983,18 @@ dastart(struct cam_periph *periph, union ccb *start_ccb)
 					/*retries*/da_retry_count,
 					/*cbfcnp*/dadone,
 					/*tag_action*/tag_code,
-					/*read_op*/bp->bio_cmd
-						== BIO_READ,
+					/*read_op*/(bp->bio_cmd == BIO_READ ?
+					SCSI_RW_READ : SCSI_RW_WRITE) |
+					((bp->bio_flags & BIO_UNMAPPED) != 0 ?
+					SCSI_RW_BIO : 0),
 					/*byte2*/0,
 					softc->minimum_cmd_size,
 					/*lba*/bp->bio_pblkno,
 					/*block_count*/bp->bio_bcount /
 					softc->params.secsize,
-					/*data_ptr*/ bp->bio_data,
+					/*data_ptr*/ (bp->bio_flags &
+					BIO_UNMAPPED) != 0 ? (void *)bp :
+					bp->bio_data,
 					/*dxfer_len*/ bp->bio_bcount,
 					/*sense_len*/SSD_FULL_SIZE,
 					da_default_timeout * 1000);
diff --git a/sys/dev/ahci/ahci.c b/sys/dev/ahci/ahci.c
index 8e692bd..d03c8af 100644
--- a/sys/dev/ahci/ahci.c
+++ b/sys/dev/ahci/ahci.c
@@ -2903,7 +2903,7 @@ ahciaction(struct cam_sim *sim, union ccb *ccb)
 		if (ch->caps & AHCI_CAP_SPM)
 			cpi->hba_inquiry |= PI_SATAPM;
 		cpi->target_sprt = 0;
-		cpi->hba_misc = PIM_SEQSCAN;
+		cpi->hba_misc = PIM_SEQSCAN | PIM_UNMAPPED;
 		cpi->hba_eng_cnt = 0;
 		if (ch->caps & AHCI_CAP_SPM)
 			cpi->max_target = 15;
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index b72f294..eccb1ee 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -110,6 +110,19 @@ static int md_malloc_wait;
 SYSCTL_INT(_vm, OID_AUTO, md_malloc_wait, CTLFLAG_RW, &md_malloc_wait, 0,
     "Allow malloc to wait for memory allocations");
 
+static int md_unmapped_swap;
+SYSCTL_INT(_debug, OID_AUTO, md_unmapped_swap, CTLFLAG_RD,
+    &md_unmapped_swap, 0,
+    "");
+static int md_unmapped_vnode;
+SYSCTL_INT(_debug, OID_AUTO, md_unmapped_vnode, CTLFLAG_RD,
+    &md_unmapped_vnode, 0,
+    "");
+static int md_unmapped_malloc;
+SYSCTL_INT(_debug, OID_AUTO, md_unmapped_malloc, CTLFLAG_RD,
+    &md_unmapped_malloc, 0,
+    "");
+
 #if defined(MD_ROOT) && !defined(MD_ROOT_FSTYPE)
 #define	MD_ROOT_FSTYPE	"ufs"
 #endif
@@ -414,13 +427,103 @@ g_md_start(struct bio *bp)
 	wakeup(sc);
 }
 
+#define	MD_MALLOC_MOVE_ZERO	1
+#define	MD_MALLOC_MOVE_FILL	2
+#define	MD_MALLOC_MOVE_READ	3
+#define	MD_MALLOC_MOVE_WRITE	4
+#define	MD_MALLOC_MOVE_CMP	5
+
+static int
+md_malloc_move(vm_page_t **mp, vm_offset_t *ma_offs, unsigned sectorsize,
+    void *ptr, u_char fill, int op)
+{
+	struct sf_buf *sf;
+	vm_page_t m, *mp1;
+	char *p, first;
+	vm_offset_t ma_offs1;
+	off_t *uc;
+	unsigned n;
+	int error, i, sz, first_read;
+
+	m = NULL;
+	error = 0;
+	sf = NULL;
+	/* if (op == MD_MALLOC_MOVE_CMP) { gcc */
+		first = 0;
+		first_read = 0;
+		uc = ptr;
+		mp1 = *mp;
+		ma_offs1 = *ma_offs;
+	/* } */
+	sched_pin();
+	for (n = sectorsize; n != 0; n -= sz) {
+		sz = imin(PAGE_SIZE - *ma_offs, n);
+		if (m != **mp) {
+			if (sf != NULL)
+				sf_buf_free(sf);
+			m = **mp;
+			sf = sf_buf_alloc(m, SFB_CPUPRIVATE |
+			    (md_malloc_wait ? 0 : SFB_NOWAIT));
+			if (sf == NULL) {
+				error = ENOMEM;
+				break;
+			}
+		}
+		p = (char *)sf_buf_kva(sf) + *ma_offs;
+		switch (op) {
+		case MD_MALLOC_MOVE_ZERO:
+			bzero(p, sz);
+			break;
+		case MD_MALLOC_MOVE_FILL:
+			memset(p, fill, sz);
+			break;
+		case MD_MALLOC_MOVE_READ:
+			bcopy(ptr, p, sz);
+			cpu_flush_dcache(p, sz);
+			break;
+		case MD_MALLOC_MOVE_WRITE:
+			bcopy(p, ptr, sz);
+			break;
+		case MD_MALLOC_MOVE_CMP:
+			for (i = 0; i < sz; i++, p++) {
+				if (!first_read) {
+					*uc = (u_char)*p;
+					first = (u_char)*p;
+					first_read = 1;
+				} else if (*p != first) {
+					error = EDOOFUS;
+					break;
+				}
+			}
+			break;
+		}
+		if (error != 0)
+			break;
+		*ma_offs += sz;
+		*ma_offs %= PAGE_SIZE;
+		if (*ma_offs == 0)
+			(*mp)++;
+	}
+
+	if (sf != NULL)
+		sf_buf_free(sf);
+	sched_unpin();
+	if (op == MD_MALLOC_MOVE_CMP && error != 0) {
+		*mp = mp1;
+		*ma_offs = ma_offs1;
+	}
+	return (error);
+}
+
 static int
 mdstart_malloc(struct md_s *sc, struct bio *bp)
 {
-	int i, error;
 	u_char *dst;
+	vm_page_t *m;
+	int i, error, error1, notmapped;
 	off_t secno, nsec, uc;
 	uintptr_t sp, osp;
+	vm_offset_t ma_offs;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
@@ -431,9 +534,17 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
 		return (EOPNOTSUPP);
 	}
 
+	notmapped = (bp->bio_flags & BIO_UNMAPPED) != 0;
+	if (notmapped) {
+		m = bp->bio_ma;
+		ma_offs = bp->bio_ma_offset;
+		dst = NULL;
+	} else {
+		dst = bp->bio_data;
+	}
+
 	nsec = bp->bio_length / sc->sectorsize;
 	secno = bp->bio_offset / sc->sectorsize;
-	dst = bp->bio_data;
 	error = 0;
 	while (nsec--) {
 		osp = s_read(sc->indir, secno);
@@ -441,21 +552,45 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
 			if (osp != 0)
 				error = s_write(sc->indir, secno, 0);
 		} else if (bp->bio_cmd == BIO_READ) {
-			if (osp == 0)
-				bzero(dst, sc->sectorsize);
-			else if (osp <= 255)
-				memset(dst, osp, sc->sectorsize);
-			else {
-				bcopy((void *)osp, dst, sc->sectorsize);
-				cpu_flush_dcache(dst, sc->sectorsize);
+			if (osp == 0) {
+				if (notmapped) {
+					error = md_malloc_move(&m, &ma_offs,
+					    sc->sectorsize, NULL, 0,
+					    MD_MALLOC_MOVE_ZERO);
+				} else
+					bzero(dst, sc->sectorsize);
+			} else if (osp <= 255) {
+				if (notmapped) {
+					error = md_malloc_move(&m, &ma_offs,
+					    sc->sectorsize, NULL, osp,
+					    MD_MALLOC_MOVE_FILL);
+				} else
+					memset(dst, osp, sc->sectorsize);
+			} else {
+				if (notmapped) {
+					error = md_malloc_move(&m, &ma_offs,
+					    sc->sectorsize, (void *)osp, 0,
+					    MD_MALLOC_MOVE_READ);
+				} else {
+					bcopy((void *)osp, dst, sc->sectorsize);
+					cpu_flush_dcache(dst, sc->sectorsize);
+				}
 			}
 			osp = 0;
 		} else if (bp->bio_cmd == BIO_WRITE) {
 			if (sc->flags & MD_COMPRESS) {
-				uc = dst[0];
-				for (i = 1; i < sc->sectorsize; i++)
-					if (dst[i] != uc)
-						break;
+				if (notmapped) {
+					error1 = md_malloc_move(&m, &ma_offs,
+					    sc->sectorsize, &uc, 0,
+					    MD_MALLOC_MOVE_CMP);
+					i = error1 == 0 ? sc->sectorsize : 0;
+				} else {
+					uc = dst[0];
+					for (i = 1; i < sc->sectorsize; i++) {
+						if (dst[i] != uc)
+							break;
+					}
+				}
 			} else {
 				i = 0;
 				uc = 0;
@@ -472,10 +607,26 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
 						error = ENOSPC;
 						break;
 					}
-					bcopy(dst, (void *)sp, sc->sectorsize);
+					if (notmapped) {
+						error = md_malloc_move(&m,
+						    &ma_offs, sc->sectorsize,
+						    (void *)sp, 0,
+						    MD_MALLOC_MOVE_WRITE);
+					} else {
+						bcopy(dst, (void *)sp,
+						    sc->sectorsize);
+					}
 					error = s_write(sc->indir, secno, sp);
 				} else {
-					bcopy(dst, (void *)osp, sc->sectorsize);
+					if (notmapped) {
+						error = md_malloc_move(&m,
+						    &ma_offs, sc->sectorsize,
+						    (void *)osp, 0,
+						    MD_MALLOC_MOVE_WRITE);
+					} else {
+						bcopy(dst, (void *)osp,
+						    sc->sectorsize);
+					}
 					osp = 0;
 				}
 			}
@@ -487,7 +638,8 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
 		if (error != 0)
 			break;
 		secno++;
-		dst += sc->sectorsize;
+		if (!notmapped)
+			dst += sc->sectorsize;
 	}
 	bp->bio_resid = 0;
 	return (error);
@@ -628,11 +780,10 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
 static int
 mdstart_swap(struct md_s *sc, struct bio *bp)
 {
-	struct sf_buf *sf;
-	int rv, offs, len, lastend;
-	vm_pindex_t i, lastp;
 	vm_page_t m;
 	u_char *p;
+	vm_pindex_t i, lastp;
+	int rv, ma_offs, offs, len, lastend;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
@@ -644,6 +795,12 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
 	}
 
 	p = bp->bio_data;
+	if ((bp->bio_flags & BIO_UNMAPPED) == 0) {
+		ma_offs = 0;
+	} else {
+		atomic_add_int(&md_unmapped_swap, 1);
+		ma_offs = bp->bio_ma_offset;
+	}
 
 	/*
 	 * offs is the offset at which to start operating on the
@@ -661,19 +818,12 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
 	vm_object_pip_add(sc->object, 1);
 	for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) {
 		len = ((i == lastp) ? lastend : PAGE_SIZE) - offs;
-
-		m = vm_page_grab(sc->object, i,
-		    VM_ALLOC_NORMAL|VM_ALLOC_RETRY);
-		VM_OBJECT_UNLOCK(sc->object);
-		sched_pin();
-		sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
-		VM_OBJECT_LOCK(sc->object);
+		m = vm_page_grab(sc->object, i, VM_ALLOC_NORMAL |
+		    VM_ALLOC_RETRY);
 		if (bp->bio_cmd == BIO_READ) {
 			if (m->valid != VM_PAGE_BITS_ALL)
 				rv = vm_pager_get_pages(sc->object, &m, 1, 0);
 			if (rv == VM_PAGER_ERROR) {
-				sf_buf_free(sf);
-				sched_unpin();
 				vm_page_wakeup(m);
 				break;
 			} else if (rv == VM_PAGER_FAIL) {
@@ -683,40 +833,44 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
 				 * valid. Do not set dirty, the page
 				 * can be recreated if thrown out.
 				 */
-				bzero((void *)sf_buf_kva(sf), PAGE_SIZE);
+				pmap_zero_page(m);
 				m->valid = VM_PAGE_BITS_ALL;
 			}
-			bcopy((void *)(sf_buf_kva(sf) + offs), p, len);
-			cpu_flush_dcache(p, len);
+			if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
+				pmap_copy_pages(&m, offs, bp->bio_ma,
+				    ma_offs, len);
+			} else {
+				physcopyout(VM_PAGE_TO_PHYS(m) + offs, p, len);
+				cpu_flush_dcache(p, len);
+			}
 		} else if (bp->bio_cmd == BIO_WRITE) {
 			if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL)
 				rv = vm_pager_get_pages(sc->object, &m, 1, 0);
 			if (rv == VM_PAGER_ERROR) {
-				sf_buf_free(sf);
-				sched_unpin();
 				vm_page_wakeup(m);
 				break;
 			}
-			bcopy(p, (void *)(sf_buf_kva(sf) + offs), len);
+			if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
+				pmap_copy_pages(bp->bio_ma, ma_offs, &m,
+				    offs, len);
+			} else {
+				physcopyin(p, VM_PAGE_TO_PHYS(m) + offs, len);
+			}
 			m->valid = VM_PAGE_BITS_ALL;
 		} else if (bp->bio_cmd == BIO_DELETE) {
 			if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL)
 				rv = vm_pager_get_pages(sc->object, &m, 1, 0);
 			if (rv == VM_PAGER_ERROR) {
-				sf_buf_free(sf);
-				sched_unpin();
 				vm_page_wakeup(m);
 				break;
 			}
 			if (len != PAGE_SIZE) {
-				bzero((void *)(sf_buf_kva(sf) + offs), len);
+				pmap_zero_page_area(m, offs, len);
 				vm_page_clear_dirty(m, offs, len);
 				m->valid = VM_PAGE_BITS_ALL;
 			} else
 				vm_pager_page_unswapped(m);
 		}
-		sf_buf_free(sf);
-		sched_unpin();
 		vm_page_wakeup(m);
 		vm_page_lock(m);
 		if (bp->bio_cmd == BIO_DELETE && len == PAGE_SIZE)
@@ -730,6 +884,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
 		/* Actions on further pages start at offset 0 */
 		p += PAGE_SIZE - offs;
 		offs = 0;
+		ma_offs += len;
 	}
 	vm_object_pip_subtract(sc->object, 1);
 	VM_OBJECT_UNLOCK(sc->object);
@@ -845,6 +1000,14 @@ mdinit(struct md_s *sc)
 	pp = g_new_providerf(gp, "md%d", sc->unit);
 	pp->mediasize = sc->mediasize;
 	pp->sectorsize = sc->sectorsize;
+	switch (sc->type) {
+	case MD_SWAP:
+	case MD_MALLOC:
+		pp->flags |= G_PF_ACCEPT_UNMAPPED;
+		break;
+	default:
+		break;
+	}
 	sc->gp = gp;
 	sc->pp = pp;
 	g_error_provider(pp, 0);
diff --git a/sys/fs/cd9660/cd9660_vnops.c b/sys/fs/cd9660/cd9660_vnops.c
index 21ee0fc..47d4f75 100644
--- a/sys/fs/cd9660/cd9660_vnops.c
+++ b/sys/fs/cd9660/cd9660_vnops.c
@@ -329,7 +329,7 @@ cd9660_read(ap)
 			if (lblktosize(imp, rablock) < ip->i_size)
 				error = cluster_read(vp, (off_t)ip->i_size,
 					 lbn, size, NOCRED, uio->uio_resid,
-					 (ap->a_ioflag >> 16), &bp);
+					 (ap->a_ioflag >> 16), 0, &bp);
 			else
 				error = bread(vp, lbn, size, NOCRED, &bp);
 		} else {
diff --git a/sys/fs/ext2fs/ext2_balloc.c b/sys/fs/ext2fs/ext2_balloc.c
index 1c0cc0e..88ad710 100644
--- a/sys/fs/ext2fs/ext2_balloc.c
+++ b/sys/fs/ext2fs/ext2_balloc.c
@@ -276,7 +276,7 @@ ext2_balloc(struct inode *ip, int32_t lbn, int size, struct ucred *cred,
 		if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			error = cluster_read(vp, ip->i_size, lbn,
 			    (int)fs->e2fs_bsize, NOCRED,
-			    MAXBSIZE, seqcount, &nbp);
+			    MAXBSIZE, seqcount, 0, &nbp);
 		} else {
 			error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, &nbp);
 		}
diff --git a/sys/fs/ext2fs/ext2_vnops.c b/sys/fs/ext2fs/ext2_vnops.c
index 1c0b7a1..77eb74b 100644
--- a/sys/fs/ext2fs/ext2_vnops.c
+++ b/sys/fs/ext2fs/ext2_vnops.c
@@ -1618,10 +1618,11 @@ ext2_read(struct vop_read_args *ap)
 
 		if (lblktosize(fs, nextlbn) >= ip->i_size)
 			error = bread(vp, lbn, size, NOCRED, &bp);
-		else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0)
+		else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			error = cluster_read(vp, ip->i_size, lbn, size,
-			    NOCRED, blkoffset + uio->uio_resid, seqcount, &bp);
-		else if (seqcount > 1) {
+			    NOCRED, blkoffset + uio->uio_resid, seqcount,
+			    0, &bp);
+		} else if (seqcount > 1) {
 			int nextsize = blksize(fs, ip, nextlbn);
 			error = breadn(vp, lbn,
 			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
@@ -1831,7 +1832,7 @@ ext2_write(struct vop_write_args *ap)
 		} else if (xfersize + blkoffset == fs->e2fs_fsize) {
 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
 				bp->b_flags |= B_CLUSTEROK;
-				cluster_write(vp, bp, ip->i_size, seqcount);
+				cluster_write(vp, bp, ip->i_size, seqcount, 0);
 			} else {
 				bawrite(bp);
 			}
diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c
index 8e045cb..213ae81 100644
--- a/sys/fs/msdosfs/msdosfs_vnops.c
+++ b/sys/fs/msdosfs/msdosfs_vnops.c
@@ -600,7 +600,7 @@ msdosfs_read(ap)
 			error = bread(vp, lbn, blsize, NOCRED, &bp);
 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			error = cluster_read(vp, dep->de_FileSize, lbn, blsize,
-			    NOCRED, on + uio->uio_resid, seqcount, &bp);
+			    NOCRED, on + uio->uio_resid, seqcount, 0, &bp);
 		} else if (seqcount > 1) {
 			rasize = blsize;
 			error = breadn(vp, lbn,
@@ -820,7 +820,7 @@ msdosfs_write(ap)
 		else if (n + croffset == pmp->pm_bpcluster) {
 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
 				cluster_write(vp, bp, dep->de_FileSize,
-				    seqcount);
+				    seqcount, 0);
 			else
 				bawrite(bp);
 		} else
diff --git a/sys/fs/udf/udf_vnops.c b/sys/fs/udf/udf_vnops.c
index b1a3b1d..abe073e 100644
--- a/sys/fs/udf/udf_vnops.c
+++ b/sys/fs/udf/udf_vnops.c
@@ -478,8 +478,9 @@ udf_read(struct vop_read_args *ap)
 		rablock = lbn + 1;
 		if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			if (lblktosize(udfmp, rablock) < fsize) {
-				error = cluster_read(vp, fsize, lbn, size, NOCRED,
-					uio->uio_resid, (ap->a_ioflag >> 16), &bp);
+				error = cluster_read(vp, fsize, lbn, size,
+				    NOCRED, uio->uio_resid,
+				    (ap->a_ioflag >> 16), 0, &bp);
 			} else {
 				error = bread(vp, lbn, size, NOCRED, &bp);
 			}
diff --git a/sys/geom/geom.h b/sys/geom/geom.h
index 351b05d..660bf6e 100644
--- a/sys/geom/geom.h
+++ b/sys/geom/geom.h
@@ -205,6 +205,7 @@ struct g_provider {
 	u_int			flags;
 #define G_PF_WITHER		0x2
 #define G_PF_ORPHAN		0x4
+#define	G_PF_ACCEPT_UNMAPPED	0x8
 
 	/* Two fields for the implementing class to use */
 	void			*private;
diff --git a/sys/geom/geom_disk.c b/sys/geom/geom_disk.c
index 72e9162..7fec9da 100644
--- a/sys/geom/geom_disk.c
+++ b/sys/geom/geom_disk.c
@@ -320,13 +320,29 @@ g_disk_start(struct bio *bp)
 		do {
 			bp2->bio_offset += off;
 			bp2->bio_length -= off;
-			bp2->bio_data += off;
+			if ((bp->bio_flags & BIO_UNMAPPED) == 0) {
+				bp2->bio_data += off;
+			} else {
+				KASSERT((dp->d_flags & DISKFLAG_UNMAPPED_BIO)
+				    != 0,
+				    ("unmapped bio not supported by disk %s",
+				    dp->d_name));
+				bp2->bio_ma += off / PAGE_SIZE;
+				bp2->bio_ma_offset += off;
+				bp2->bio_ma_offset %= PAGE_SIZE;
+				bp2->bio_ma_n -= off / PAGE_SIZE;
+			}
 			if (bp2->bio_length > dp->d_maxsize) {
 				/*
 				 * XXX: If we have a stripesize we should really
 				 * use it here.
 				 */
 				bp2->bio_length = dp->d_maxsize;
+				if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
+					bp2->bio_ma_n = howmany(
+					    bp2->bio_ma_offset +
+					    bp2->bio_length, PAGE_SIZE);
+				}
 				off += dp->d_maxsize;
 				/*
 				 * To avoid a race, we need to grab the next bio
@@ -488,6 +504,8 @@ g_disk_create(void *arg, int flag)
 	pp->sectorsize = dp->d_sectorsize;
 	pp->stripeoffset = dp->d_stripeoffset;
 	pp->stripesize = dp->d_stripesize;
+	if ((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0)
+		pp->flags |= G_PF_ACCEPT_UNMAPPED;
 	if (bootverbose)
 		printf("GEOM: new disk %s\n", gp->name);
 	sysctl_ctx_init(&sc->sysctl_ctx);
diff --git a/sys/geom/geom_disk.h b/sys/geom/geom_disk.h
index 33d8eb2..246fc49 100644
--- a/sys/geom/geom_disk.h
+++ b/sys/geom/geom_disk.h
@@ -103,6 +103,7 @@ struct disk {
 #define DISKFLAG_OPEN		0x2
 #define DISKFLAG_CANDELETE	0x4
 #define DISKFLAG_CANFLUSHCACHE	0x8
+#define	DISKFLAG_UNMAPPED_BIO	0x10
 
 struct disk *disk_alloc(void);
 void disk_create(struct disk *disk, int version);
diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c
index c6a5da8..4c84bcc 100644
--- a/sys/geom/geom_io.c
+++ b/sys/geom/geom_io.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/ktr.h>
 #include <sys/proc.h>
 #include <sys/stack.h>
+#include <sys/sysctl.h>
 
 #include <sys/errno.h>
 #include <geom/geom.h>
@@ -51,6 +52,13 @@ __FBSDID("$FreeBSD$");
 #include <sys/devicestat.h>
 
 #include <vm/uma.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
 
 static struct g_bioq g_bio_run_down;
 static struct g_bioq g_bio_run_up;
@@ -180,12 +188,17 @@ g_clone_bio(struct bio *bp)
 		/*
 		 *  BIO_ORDERED flag may be used by disk drivers to enforce
 		 *  ordering restrictions, so this flag needs to be cloned.
+		 *  BIO_UNMAPPED should be inherited, to properly indicate
+		 *  which way the buffer is passed.
 		 *  Other bio flags are not suitable for cloning.
 		 */
-		bp2->bio_flags = bp->bio_flags & BIO_ORDERED;
+		bp2->bio_flags = bp->bio_flags & (BIO_ORDERED | BIO_UNMAPPED);
 		bp2->bio_length = bp->bio_length;
 		bp2->bio_offset = bp->bio_offset;
 		bp2->bio_data = bp->bio_data;
+		bp2->bio_ma = bp->bio_ma;
+		bp2->bio_ma_n = bp->bio_ma_n;
+		bp2->bio_ma_offset = bp->bio_ma_offset;
 		bp2->bio_attribute = bp->bio_attribute;
 		/* Inherit classification info from the parent */
 		bp2->bio_classifier1 = bp->bio_classifier1;
@@ -210,11 +223,15 @@ g_duplicate_bio(struct bio *bp)
 	struct bio *bp2;
 
 	bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO);
+	bp2->bio_flags = bp->bio_flags & BIO_UNMAPPED;
 	bp2->bio_parent = bp;
 	bp2->bio_cmd = bp->bio_cmd;
 	bp2->bio_length = bp->bio_length;
 	bp2->bio_offset = bp->bio_offset;
 	bp2->bio_data = bp->bio_data;
+	bp2->bio_ma = bp->bio_ma;
+	bp2->bio_ma_n = bp->bio_ma_n;
+	bp2->bio_ma_offset = bp->bio_ma_offset;
 	bp2->bio_attribute = bp->bio_attribute;
 	bp->bio_children++;
 #ifdef KTR
@@ -575,6 +592,76 @@ g_io_deliver(struct bio *bp, int error)
 	return;
 }
 
+SYSCTL_DECL(_kern_geom);
+
+static long transient_maps;
+SYSCTL_LONG(_kern_geom, OID_AUTO, transient_maps, CTLFLAG_RD,
+    &transient_maps, 0,
+    "");
+int transient_map_retries;
+SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_retries, CTLFLAG_RD,
+    &transient_map_retries, 0,
+    "");
+int transient_map_failures;
+SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_failures, CTLFLAG_RD,
+    &transient_map_failures, 0,
+    "");
+int inflight_transient_maps;
+SYSCTL_INT(_kern_geom, OID_AUTO, inflight_transient_maps, CTLFLAG_RD,
+    &inflight_transient_maps, 0,
+    "");
+
+static int
+g_io_transient_map_bio(struct bio *bp)
+{
+	vm_offset_t addr;
+	long size;
+	int retried, rv;
+
+	size = round_page(bp->bio_ma_offset + bp->bio_length);
+	KASSERT(size / PAGE_SIZE == bp->bio_ma_n, ("Bio too short %p", bp));
+	addr = 0;
+	retried = 0;
+	atomic_add_long(&transient_maps, 1);
+retry:
+	vm_map_lock(bio_transient_map);
+	if (vm_map_findspace(bio_transient_map, vm_map_min(bio_transient_map),
+	    size, &addr)) {
+		vm_map_unlock(bio_transient_map);
+		if (retried >= 3) {
+			g_io_deliver(bp, EDEADLK/* XXXKIB */);
+			CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s",
+			    bp, bp->bio_to->name);
+			atomic_add_int(&transient_map_failures, 1);
+			return (1);
+		} else {
+			/*
+			 * Naive attempt to quisce the I/O to get more
+			 * in-flight requests completed and defragment
+			 * the bio_transient_map.
+			 */
+			CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d",
+			    bp, bp->bio_to->name, retried);
+			pause("g_d_tra", hz / 10);
+			retried++;
+			atomic_add_int(&transient_map_retries, 1);
+			goto retry;
+		}
+	}
+	rv = vm_map_insert(bio_transient_map, NULL, 0, addr, addr + size,
+	    VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT);
+	KASSERT(rv == KERN_SUCCESS,
+	    ("vm_map_insert(bio_transient_map) rv %d %jx %lx",
+	    rv, (uintmax_t)addr, size));
+	vm_map_unlock(bio_transient_map);
+	atomic_add_int(&inflight_transient_maps, 1);
+	pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size));
+	bp->bio_data = (caddr_t)addr + bp->bio_ma_offset;
+	bp->bio_flags |= BIO_TRANSIENT_MAPPING;
+	bp->bio_flags &= ~BIO_UNMAPPED;
+	return (0);
+}
+
 void
 g_io_schedule_down(struct thread *tp __unused)
 {
@@ -636,6 +723,12 @@ g_io_schedule_down(struct thread *tp __unused)
 		default:
 			break;
 		}
+		if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
+		    (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 &&
+		    (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
+			if (g_io_transient_map_bio(bp))
+				continue;
+		}
 		THREAD_NO_SLEEPING();
 		CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld "
 		    "len %ld", bp, bp->bio_to->name, bp->bio_offset,
diff --git a/sys/geom/geom_vfs.c b/sys/geom/geom_vfs.c
index bbed550..92f1ad2 100644
--- a/sys/geom/geom_vfs.c
+++ b/sys/geom/geom_vfs.c
@@ -188,14 +188,14 @@ g_vfs_strategy(struct bufobj *bo, struct buf *bp)
 	bip = g_alloc_bio();
 	bip->bio_cmd = bp->b_iocmd;
 	bip->bio_offset = bp->b_iooffset;
-	bip->bio_data = bp->b_data;
-	bip->bio_done = g_vfs_done;
-	bip->bio_caller2 = bp;
 	bip->bio_length = bp->b_bcount;
-	if (bp->b_flags & B_BARRIER) {
+	bdata2bio(bp, bip);
+	if ((bp->b_flags & B_BARRIER) != 0) {
 		bip->bio_flags |= BIO_ORDERED;
 		bp->b_flags &= ~B_BARRIER;
 	}
+	bip->bio_done = g_vfs_done;
+	bip->bio_caller2 = bp;
 	g_io_request(bip, cp);
 }
 
diff --git a/sys/geom/part/g_part.c b/sys/geom/part/g_part.c
index e2ba79e..7650499 100644
--- a/sys/geom/part/g_part.c
+++ b/sys/geom/part/g_part.c
@@ -427,6 +427,7 @@ g_part_new_provider(struct g_geom *gp, struct g_part_table *table,
 	entry->gpe_pp->stripeoffset = pp->stripeoffset + entry->gpe_offset;
 	if (pp->stripesize > 0)
 		entry->gpe_pp->stripeoffset %= pp->stripesize;
+	entry->gpe_pp->flags |= pp->flags & G_PF_ACCEPT_UNMAPPED;
 	g_error_provider(entry->gpe_pp, 0);
 }
 
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 5fee565..27548dc 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -4240,6 +4240,49 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
 	mtx_unlock(&sysmaps->lock);
 }
 
+void
+pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
+    vm_offset_t b_offset, int xfersize)
+{
+	struct sysmaps *sysmaps;
+	vm_page_t a_pg, b_pg;
+	char *a_cp, *b_cp;
+	vm_offset_t a_pg_offset, b_pg_offset;
+	int cnt;
+
+	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
+	mtx_lock(&sysmaps->lock);
+	if (*sysmaps->CMAP1 != 0)
+		panic("pmap_copy_pages: CMAP1 busy");
+	if (*sysmaps->CMAP2 != 0)
+		panic("pmap_copy_pages: CMAP2 busy");
+	sched_pin();
+	while (xfersize > 0) {
+		invlpg((u_int)sysmaps->CADDR1);
+		invlpg((u_int)sysmaps->CADDR2);
+		a_pg = ma[a_offset >> PAGE_SHIFT];
+		a_pg_offset = a_offset & PAGE_MASK;
+		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
+		b_pg = mb[b_offset >> PAGE_SHIFT];
+		b_pg_offset = b_offset & PAGE_MASK;
+		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
+		*sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(a_pg) | PG_A |
+		    pmap_cache_bits(b_pg->md.pat_mode, 0);
+		*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(b_pg) | PG_A |
+		    PG_M | pmap_cache_bits(b_pg->md.pat_mode, 0);
+		a_cp = sysmaps->CADDR1 + a_pg_offset;
+		b_cp = sysmaps->CADDR2 + b_pg_offset;
+		bcopy(a_cp, b_cp, cnt);
+		a_offset += cnt;
+		b_offset += cnt;
+		xfersize -= cnt;
+	}
+	*sysmaps->CMAP1 = 0;
+	*sysmaps->CMAP2 = 0;
+	sched_unpin();
+	mtx_unlock(&sysmaps->lock);
+}
+
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c
index a8f11a4..28ba21b 100644
--- a/sys/i386/xen/pmap.c
+++ b/sys/i386/xen/pmap.c
@@ -3448,6 +3448,46 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
 	mtx_unlock(&sysmaps->lock);
 }
 
+void
+pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
+    vm_offset_t b_offset, int xfersize)
+{
+	struct sysmaps *sysmaps;
+	vm_page_t a_pg, b_pg;
+	char *a_cp, *b_cp;
+	vm_offset_t a_pg_offset, b_pg_offset;
+	int cnt;
+
+	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
+	mtx_lock(&sysmaps->lock);
+	if (*sysmaps->CMAP1 != 0)
+		panic("pmap_copy_pages: CMAP1 busy");
+	if (*sysmaps->CMAP2 != 0)
+		panic("pmap_copy_pages: CMAP2 busy");
+	sched_pin();
+	while (xfersize > 0) {
+		a_pg = ma[a_offset >> PAGE_SHIFT];
+		a_pg_offset = a_offset & PAGE_MASK;
+		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
+		b_pg = mb[b_offset >> PAGE_SHIFT];
+		b_pg_offset = b_offset & PAGE_MASK;
+		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
+		PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(a_pg) | PG_A);
+		PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
+		    VM_PAGE_TO_MACH(b_pg) | PG_A | PG_M);
+		a_cp = sysmaps->CADDR1 + a_pg_offset;
+		b_cp = sysmaps->CADDR2 + b_pg_offset;
+		bcopy(a_cp, b_cp, cnt);
+		a_offset += cnt;
+		b_offset += cnt;
+		xfersize -= cnt;
+	}
+	PT_SET_MA(sysmaps->CADDR1, 0);
+	PT_SET_MA(sysmaps->CADDR2, 0);
+	sched_unpin();
+	mtx_unlock(&sysmaps->lock);
+}
+
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
diff --git a/sys/ia64/ia64/pmap.c b/sys/ia64/ia64/pmap.c
index 594f8c6..28610c6 100644
--- a/sys/ia64/ia64/pmap.c
+++ b/sys/ia64/ia64/pmap.c
@@ -2014,6 +2014,30 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 	bcopy(src, dst, PAGE_SIZE);
 }
 
+void
+pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
+    vm_offset_t b_offset, int xfersize)
+{
+	void *a_cp, *b_cp;
+	vm_offset_t a_pg_offset, b_pg_offset;
+	int cnt;
+
+	while (xfersize > 0) {
+		a_pg_offset = a_offset & PAGE_MASK;
+		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
+		a_cp = (char *)pmap_page_to_va(ma[a_offset >> PAGE_SHIFT]) +
+		    a_pg_offset;
+		b_pg_offset = b_offset & PAGE_MASK;
+		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
+		b_cp = (char *)pmap_page_to_va(mb[b_offset >> PAGE_SHIFT]) +
+		    b_pg_offset;
+		bcopy(a_cp, b_cp, cnt);
+		a_offset += cnt;
+		b_offset += cnt;
+		xfersize -= cnt;
+	}
+}
+
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
diff --git a/sys/kern/kern_physio.c b/sys/kern/kern_physio.c
index 34072f3..922ebb6 100644
--- a/sys/kern/kern_physio.c
+++ b/sys/kern/kern_physio.c
@@ -92,7 +92,7 @@ physio(struct cdev *dev, struct uio *uio, int ioflag)
 			bp->b_blkno = btodb(bp->b_offset);
 
 			if (uio->uio_segflg == UIO_USERSPACE)
-				if (vmapbuf(bp) < 0) {
+				if (vmapbuf(bp, 0) < 0) {
 					error = EFAULT;
 					goto doerror;
 				}
diff --git a/sys/kern/subr_bus_dma.c b/sys/kern/subr_bus_dma.c
index 773d01a..1ca1f89 100644
--- a/sys/kern/subr_bus_dma.c
+++ b/sys/kern/subr_bus_dma.c
@@ -126,11 +126,27 @@ static int
 _bus_dmamap_load_bio(bus_dma_tag_t dmat, bus_dmamap_t map, struct bio *bio,
     int *nsegs, int flags)
 {
-	int error;
+	vm_paddr_t paddr;
+	bus_size_t len, tlen;
+	int error, i, ma_offs;
 
-	error = _bus_dmamap_load_buffer(dmat, map, bio->bio_data,
-	    bio->bio_bcount, kernel_pmap, flags, NULL, nsegs);
+	if ((bio->bio_flags & BIO_UNMAPPED) == 0) {
+		error = _bus_dmamap_load_buffer(dmat, map, bio->bio_data,
+		    bio->bio_bcount, kernel_pmap, flags, NULL, nsegs);
+		return (error);
+	}
 
+	tlen = bio->bio_bcount;
+	ma_offs = bio->bio_ma_offset;
+	for (i = 0; tlen > 0; i++, tlen -= len) {
+		len = min(PAGE_SIZE - ma_offs, tlen);
+		paddr = VM_PAGE_TO_PHYS(bio->bio_ma[i]) + ma_offs;
+		error = _bus_dmamap_load_phys(dmat, map, paddr, len,
+		    flags, NULL, nsegs);
+		if (error != 0)
+			break;
+		ma_offs = 0;
+	}
 	return (error);
 }
 
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
index f36c769..1fb344e 100644
--- a/sys/kern/subr_param.c
+++ b/sys/kern/subr_param.c
@@ -91,6 +91,7 @@ int	maxfilesperproc;		/* per-proc open files limit */
 int	msgbufsize;			/* size of kernel message buffer */
 int	ncallout;			/* maximum # of timer events */
 int	nbuf;
+int	bio_transient_maxcnt;
 int	ngroups_max;			/* max # groups per process */
 int	nswbuf;
 pid_t	pid_max = PID_MAX;
@@ -119,6 +120,9 @@ SYSCTL_LONG(_kern, OID_AUTO, maxswzone, CTLFLAG_RDTUN, &maxswzone, 0,
     "Maximum memory for swap metadata");
 SYSCTL_LONG(_kern, OID_AUTO, maxbcache, CTLFLAG_RDTUN, &maxbcache, 0,
     "Maximum value of vfs.maxbufspace");
+SYSCTL_INT(_kern, OID_AUTO, bio_transient_maxcnt, CTLFLAG_RDTUN,
+    &bio_transient_maxcnt, 0,
+    "Maximum number of transient BIOs mappings");
 SYSCTL_ULONG(_kern, OID_AUTO, maxtsiz, CTLFLAG_RW | CTLFLAG_TUN, &maxtsiz, 0,
     "Maximum text size");
 SYSCTL_ULONG(_kern, OID_AUTO, dfldsiz, CTLFLAG_RW | CTLFLAG_TUN, &dfldsiz, 0,
@@ -321,6 +325,7 @@ init_param2(long physpages)
 	 */
 	nbuf = NBUF;
 	TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
+	TUNABLE_INT_FETCH("kern.bio_transient_maxcnt", &bio_transient_maxcnt);
 
 	/*
 	 * XXX: Does the callout wheel have to be so big?
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index 99b0197..ae6ae8e 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -1322,7 +1322,7 @@ aio_qphysio(struct proc *p, struct aiocblist *aiocbe)
 	/*
 	 * Bring buffer into kernel space.
 	 */
-	if (vmapbuf(bp) < 0) {
+	if (vmapbuf(bp, 1) < 0) {
 		error = EFAULT;
 		goto doerror;
 	}
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 6393399..a53f16b 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -91,6 +91,7 @@ struct	buf_ops buf_ops_bio = {
  * carnal knowledge of buffers.  This knowledge should be moved to vfs_bio.c.
  */
 struct buf *buf;		/* buffer header pool */
+caddr_t unmapped_buf;
 
 static struct proc *bufdaemonproc;
 
@@ -131,6 +132,10 @@ SYSCTL_PROC(_vfs, OID_AUTO, bufspace, CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RD,
 SYSCTL_LONG(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0,
     "Virtual memory used for buffers");
 #endif
+static long unmapped_bufspace;
+SYSCTL_LONG(_vfs, OID_AUTO, unmapped_bufspace, CTLFLAG_RD,
+    &unmapped_bufspace, 0,
+    "Amount of unmapped buffers, inclusive in the bufspace");
 static long maxbufspace;
 SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0,
     "Maximum allowed value of bufspace (including buf_daemon)");
@@ -200,6 +205,10 @@ SYSCTL_INT(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RW, &getnewbufcalls, 0,
 static int getnewbufrestarts;
 SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW, &getnewbufrestarts, 0,
     "Number of times getnewbuf has had to restart a buffer aquisition");
+static int mappingrestarts;
+SYSCTL_INT(_vfs, OID_AUTO, mappingrestarts, CTLFLAG_RW, &mappingrestarts, 0,
+    "Number of times getblk has had to restart a buffer mapping for "
+    "unmapped buffer");
 static int flushbufqtarget = 100;
 SYSCTL_INT(_vfs, OID_AUTO, flushbufqtarget, CTLFLAG_RW, &flushbufqtarget, 0,
     "Amount of work to do in flushbufqueues when helping bufdaemon");
@@ -280,6 +289,9 @@ static struct mtx nblock;
 
 /* Queues for free buffers with various properties */
 static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } };
+#ifdef INVARIANTS
+static int bq_len[BUFFER_QUEUES];
+#endif
 
 /* Lock for the bufqueues */
 static struct mtx bqlock;
@@ -510,7 +522,7 @@ caddr_t
 kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
 {
 	int tuned_nbuf;
-	long maxbuf;
+	long maxbuf, maxbuf_sz, buf_sz,	biotmap_sz;
 
 	/*
 	 * physmem_est is in pages.  Convert it to kilobytes (assumes
@@ -554,6 +566,52 @@ kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
 	}
 
 	/*
+	 * Ideal allocation size for the transient bio submap if 10%
+	 * of the maximal space buffer map.  This roughly corresponds
+	 * to the amount of the buffer mapped for typical UFS load.
+	 *
+	 * Clip the buffer map to reserve space for the transient
+	 * BIOs, if its extent is bigger than 90% of the maximum
+	 * buffer map extent on the platform.
+	 *
+	 * The fall-back to the maxbuf in case of maxbcache unset,
+	 * allows to not trim the buffer KVA for the architectures
+	 * with ample KVA space.
+	 */
+	if (bio_transient_maxcnt == 0) {
+		maxbuf_sz = maxbcache != 0 ? maxbcache : maxbuf * BKVASIZE;
+		buf_sz = (long)nbuf * BKVASIZE;
+		if (buf_sz < maxbuf_sz / 10 * 9) {
+			/*
+			 * There is more KVA than memory.  Do not
+			 * adjust buffer map size, and assign the rest
+			 * of maxbuf to transient map.
+			 */
+			biotmap_sz = maxbuf_sz - buf_sz;
+		} else {
+			/*
+			 * Buffer map spans all KVA we could afford on
+			 * this platform.  Give 10% of the buffer map
+			 * to the transient bio map.
+			 */
+ 			biotmap_sz = buf_sz / 10;
+			buf_sz -= biotmap_sz;
+		}
+		if (biotmap_sz / INT_MAX > MAXPHYS)
+			bio_transient_maxcnt = INT_MAX;
+		else
+			bio_transient_maxcnt = biotmap_sz / MAXPHYS;
+		/*
+		 * Artifically limit to 1024 simultaneous in-flight I/Os
+		 * using the transient mapping.
+		 */
+		if (bio_transient_maxcnt > 1024)
+			bio_transient_maxcnt = 1024;
+		if (tuned_nbuf)
+			nbuf = buf_sz / BKVASIZE;
+	}
+
+	/*
 	 * swbufs are used as temporary holders for I/O, such as paging I/O.
 	 * We have no less then 16 and no more then 256.
 	 */
@@ -606,6 +664,9 @@ bufinit(void)
 		LIST_INIT(&bp->b_dep);
 		BUF_LOCKINIT(bp);
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
+#ifdef INVARIANTS
+		bq_len[QUEUE_EMPTY]++;
+#endif
 	}
 
 	/*
@@ -674,6 +735,55 @@ bufinit(void)
 
 	bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
+	unmapped_buf = (caddr_t)kmem_alloc_nofault(kernel_map, MAXPHYS);
+}
+
+#ifdef INVARIANTS
+static inline void
+vfs_buf_check_mapped(struct buf *bp)
+{
+
+	KASSERT((bp->b_flags & B_UNMAPPED) == 0,
+	    ("mapped buf %p %x", bp, bp->b_flags));
+	KASSERT(bp->b_kvabase != unmapped_buf,
+	    ("mapped buf: b_kvabase was not updated %p", bp));
+	KASSERT(bp->b_data != unmapped_buf,
+	    ("mapped buf: b_data was not updated %p", bp));
+}
+
+static inline void
+vfs_buf_check_unmapped(struct buf *bp)
+{
+
+	KASSERT((bp->b_flags & B_UNMAPPED) == B_UNMAPPED,
+	    ("unmapped buf %p %x", bp, bp->b_flags));
+	KASSERT(bp->b_kvabase == unmapped_buf,
+	    ("unmapped buf: corrupted b_kvabase %p", bp));
+	KASSERT(bp->b_data == unmapped_buf,
+	    ("unmapped buf: corrupted b_data %p", bp));
+}
+
+#define	BUF_CHECK_MAPPED(bp) vfs_buf_check_mapped(bp)
+#define	BUF_CHECK_UNMAPPED(bp) vfs_buf_check_unmapped(bp)
+#else
+#define	BUF_CHECK_MAPPED(bp) do {} while (0)
+#define	BUF_CHECK_UNMAPPED(bp) do {} while (0)
+#endif
+
+static void
+bpmap_qenter(struct buf *bp)
+{
+
+	BUF_CHECK_MAPPED(bp);
+
+	/*
+	 * bp->b_data is relative to bp->b_offset, but
+	 * bp->b_offset may be offset into the first page.
+	 */
+	bp->b_data = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
+	pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages);
+	bp->b_data = (caddr_t)((vm_offset_t)bp->b_data |
+	    (vm_offset_t)(bp->b_offset & PAGE_MASK));
 }
 
 /*
@@ -685,14 +795,26 @@ static void
 bfreekva(struct buf *bp)
 {
 
-	if (bp->b_kvasize) {
-		atomic_add_int(&buffreekvacnt, 1);
-		atomic_subtract_long(&bufspace, bp->b_kvasize);
-		vm_map_remove(buffer_map, (vm_offset_t) bp->b_kvabase,
-		    (vm_offset_t) bp->b_kvabase + bp->b_kvasize);
-		bp->b_kvasize = 0;
-		bufspacewakeup();
+	if (bp->b_kvasize == 0)
+		return;
+
+	atomic_add_int(&buffreekvacnt, 1);
+	atomic_subtract_long(&bufspace, bp->b_kvasize);
+	if ((bp->b_flags & B_UNMAPPED) == 0) {
+		BUF_CHECK_MAPPED(bp);
+		vm_map_remove(buffer_map, (vm_offset_t)bp->b_kvabase,
+		    (vm_offset_t)bp->b_kvabase + bp->b_kvasize);
+	} else {
+		BUF_CHECK_UNMAPPED(bp);
+		if ((bp->b_flags & B_KVAALLOC) != 0) {
+			vm_map_remove(buffer_map, (vm_offset_t)bp->b_kvaalloc,
+			    (vm_offset_t)bp->b_kvaalloc + bp->b_kvasize);
+		}
+		atomic_subtract_long(&unmapped_bufspace, bp->b_kvasize);
+		bp->b_flags &= ~(B_UNMAPPED | B_KVAALLOC);
 	}
+	bp->b_kvasize = 0;
+	bufspacewakeup();
 }
 
 /*
@@ -759,6 +881,11 @@ bremfreel(struct buf *bp)
 	mtx_assert(&bqlock, MA_OWNED);
 
 	TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist);
+#ifdef INVARIANTS
+	KASSERT(bq_len[bp->b_qindex] >= 1, ("queue %d underflow",
+	    bp->b_qindex));
+	bq_len[bp->b_qindex]--;
+#endif
 	bp->b_qindex = QUEUE_NONE;
 	/*
 	 * If this was a delayed bremfree() we only need to remove the buffer
@@ -829,9 +956,8 @@ breada(struct vnode * vp, daddr_t * rablkno, int * rabsize,
  * getblk(). Also starts asynchronous I/O on read-ahead blocks.
  */
 int
-breadn_flags(struct vnode * vp, daddr_t blkno, int size,
-    daddr_t * rablkno, int *rabsize, int cnt,
-    struct ucred * cred, int flags, struct buf **bpp)
+breadn_flags(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablkno,
+    int *rabsize, int cnt, struct ucred *cred, int flags, struct buf **bpp)
 {
 	struct buf *bp;
 	int rv = 0, readwait = 0;
@@ -1405,7 +1531,8 @@ brelse(struct buf *bp)
 					}
 				}
 
-				if ((bp->b_flags & B_INVAL) == 0) {
+				if ((bp->b_flags & (B_INVAL | B_UNMAPPED)) == 0) {
+					BUF_CHECK_MAPPED(bp);
 					pmap_qenter(
 					    trunc_page((vm_offset_t)bp->b_data),
 					    bp->b_pages, bp->b_npages);
@@ -1506,11 +1633,17 @@ brelse(struct buf *bp)
 			bp->b_qindex = QUEUE_DIRTY;
 		else
 			bp->b_qindex = QUEUE_CLEAN;
-		if (bp->b_flags & B_AGE)
-			TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp, b_freelist);
-		else
-			TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist);
+		if (bp->b_flags & B_AGE) {
+			TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp,
+			    b_freelist);
+		} else {
+			TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp,
+			    b_freelist);
+		}
 	}
+#ifdef INVARIANTS
+	bq_len[bp->b_qindex]++;
+#endif
 	mtx_unlock(&bqlock);
 
 	/*
@@ -1601,6 +1734,9 @@ bqrelse(struct buf *bp)
 	if (bp->b_flags & B_DELWRI) {
 		bp->b_qindex = QUEUE_DIRTY;
 		TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist);
+#ifdef INVARIANTS
+		bq_len[bp->b_qindex]++;
+#endif
 	} else {
 		/*
 		 * The locking of the BO_LOCK for checking of the
@@ -1613,6 +1749,9 @@ bqrelse(struct buf *bp)
 			bp->b_qindex = QUEUE_CLEAN;
 			TAILQ_INSERT_TAIL(&bufqueues[QUEUE_CLEAN], bp,
 			    b_freelist);
+#ifdef INVARIANTS
+			bq_len[QUEUE_CLEAN]++;
+#endif
 		} else {
 			/*
 			 * We are too low on memory, we have to try to free
@@ -1654,7 +1793,11 @@ vfs_vmio_release(struct buf *bp)
 	int i;
 	vm_page_t m;
 
-	pmap_qremove(trunc_page((vm_offset_t)bp->b_data), bp->b_npages);
+	if ((bp->b_flags & B_UNMAPPED) == 0) {
+		BUF_CHECK_MAPPED(bp);
+		pmap_qremove(trunc_page((vm_offset_t)bp->b_data), bp->b_npages);
+	} else
+		BUF_CHECK_UNMAPPED(bp);
 	VM_OBJECT_LOCK(bp->b_bufobj->bo_object);
 	for (i = 0; i < bp->b_npages; i++) {
 		m = bp->b_pages[i];
@@ -1758,8 +1901,10 @@ vfs_bio_awrite(struct buf *bp)
 	int nwritten;
 	int size;
 	int maxcl;
+	int gbflags;
 
 	bo = &vp->v_bufobj;
+	gbflags = (bp->b_flags & B_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
 	/*
 	 * right now we support clustered writing only to regular files.  If
 	 * we find a clusterable block we could be in the middle of a cluster
@@ -1790,8 +1935,9 @@ vfs_bio_awrite(struct buf *bp)
 		 */
 		if (ncl != 1) {
 			BUF_UNLOCK(bp);
-			nwritten = cluster_wbuild(vp, size, lblkno - j, ncl);
-			return nwritten;
+			nwritten = cluster_wbuild(vp, size, lblkno - j, ncl,
+			    gbflags);
+			return (nwritten);
 		}
 	}
 	bremfree(bp);
@@ -1807,46 +1953,206 @@ vfs_bio_awrite(struct buf *bp)
 	return nwritten;
 }
 
+static void
+setbufkva(struct buf *bp, vm_offset_t addr, int maxsize, int gbflags)
+{
+
+	KASSERT((bp->b_flags & (B_UNMAPPED | B_KVAALLOC)) == 0 &&
+	    bp->b_kvasize == 0, ("call bfreekva(%p)", bp));
+	if ((gbflags & GB_UNMAPPED) == 0) {
+		bp->b_kvabase = (caddr_t)addr;
+	} else if ((gbflags & GB_KVAALLOC) != 0) {
+		KASSERT((gbflags & GB_UNMAPPED) != 0,
+		    ("GB_KVAALLOC without GB_UNMAPPED"));
+		bp->b_kvaalloc = (caddr_t)addr;
+		bp->b_flags |= B_UNMAPPED | B_KVAALLOC;
+		atomic_add_long(&unmapped_bufspace, bp->b_kvasize);
+	}
+	bp->b_kvasize = maxsize;
+}
+
 /*
- *	getnewbuf:
- *
- *	Find and initialize a new buffer header, freeing up existing buffers 
- *	in the bufqueues as necessary.  The new buffer is returned locked.
- *
- *	Important:  B_INVAL is not set.  If the caller wishes to throw the
- *	buffer away, the caller must set B_INVAL prior to calling brelse().
- *
- *	We block if:
- *		We have insufficient buffer headers
- *		We have insufficient buffer space
- *		buffer_map is too fragmented ( space reservation fails )
- *		If we have to flush dirty buffers ( but we try to avoid this )
- *
- *	To avoid VFS layer recursion we do not flush dirty buffers ourselves.
- *	Instead we ask the buf daemon to do it for us.  We attempt to
- *	avoid piecemeal wakeups of the pageout daemon.
+ * Allocate the buffer KVA and set b_kvasize. Also set b_kvabase if
+ * needed.
  */
+static int
+allocbufkva(struct buf *bp, int maxsize, int gbflags)
+{
+	vm_offset_t addr;
+	int rv;
 
-static struct buf *
-getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int size, int maxsize,
-    int gbflags)
+	bfreekva(bp);
+	addr = 0;
+
+	vm_map_lock(buffer_map);
+	if (vm_map_findspace(buffer_map, vm_map_min(buffer_map), maxsize,
+	    &addr)) {
+		vm_map_unlock(buffer_map);
+		/*
+		 * Buffer map is too fragmented.  Request the caller
+		 * to defragment the map.
+		 */
+		atomic_add_int(&bufdefragcnt, 1);
+		return (1);
+	}
+	rv = vm_map_insert(buffer_map, NULL, 0, addr, addr + maxsize,
+	    VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT);
+	KASSERT(rv == KERN_SUCCESS, ("vm_map_insert(buffer_map) rv %d", rv));
+	vm_map_unlock(buffer_map);
+	setbufkva(bp, addr, maxsize, gbflags);
+	atomic_add_long(&bufspace, bp->b_kvasize);
+	return (0);
+}
+
+/*
+ * Ask the bufdaemon for help, or act as bufdaemon itself, when a
+ * locked vnode is supplied.
+ */
+static void
+getnewbuf_bufd_help(struct vnode *vp, int gbflags, int slpflag, int slptimeo,
+    int defrag)
 {
 	struct thread *td;
-	struct buf *bp;
-	struct buf *nbp;
-	int defrag = 0;
-	int nqindex;
-	static int flushingbufs;
+	char *waitmsg;
+	int fl, flags, norunbuf;
+
+	mtx_assert(&bqlock, MA_OWNED);
+
+	if (defrag) {
+		flags = VFS_BIO_NEED_BUFSPACE;
+		waitmsg = "nbufkv";
+	} else if (bufspace >= hibufspace) {
+		waitmsg = "nbufbs";
+		flags = VFS_BIO_NEED_BUFSPACE;
+	} else {
+		waitmsg = "newbuf";
+		flags = VFS_BIO_NEED_ANY;
+	}
+	mtx_lock(&nblock);
+	needsbuffer |= flags;
+	mtx_unlock(&nblock);
+	mtx_unlock(&bqlock);
+
+	bd_speedup();	/* heeeelp */
+	if ((gbflags & GB_NOWAIT_BD) != 0)
+		return;
 
 	td = curthread;
+	mtx_lock(&nblock);
+	while (needsbuffer & flags) {
+		if (vp != NULL && (td->td_pflags & TDP_BUFNEED) == 0) {
+			mtx_unlock(&nblock);
+			/*
+			 * getblk() is called with a vnode locked, and
+			 * some majority of the dirty buffers may as
+			 * well belong to the vnode.  Flushing the
+			 * buffers there would make a progress that
+			 * cannot be achieved by the buf_daemon, that
+			 * cannot lock the vnode.
+			 */
+			norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) |
+			    (td->td_pflags & TDP_NORUNNINGBUF);
+			/* play bufdaemon */
+			td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF;
+			fl = buf_do_flush(vp);
+			td->td_pflags &= norunbuf;
+			mtx_lock(&nblock);
+			if (fl != 0)
+				continue;
+			if ((needsbuffer & flags) == 0)
+				break;
+		}
+		if (msleep(&needsbuffer, &nblock, (PRIBIO + 4) | slpflag,
+		    waitmsg, slptimeo))
+			break;
+	}
+	mtx_unlock(&nblock);
+}
+
+static void
+getnewbuf_reuse_bp(struct buf *bp, int qindex)
+{
+
+	CTR6(KTR_BUF, "getnewbuf(%p) vp %p flags %X kvasize %d bufsize %d "
+	    "queue %d (recycling)", bp, bp->b_vp, bp->b_flags,
+	     bp->b_kvasize, bp->b_bufsize, qindex);
+	mtx_assert(&bqlock, MA_NOTOWNED);
+
 	/*
-	 * We can't afford to block since we might be holding a vnode lock,
-	 * which may prevent system daemons from running.  We deal with
-	 * low-memory situations by proactively returning memory and running
-	 * async I/O rather then sync I/O.
+	 * Note: we no longer distinguish between VMIO and non-VMIO
+	 * buffers.
 	 */
-	atomic_add_int(&getnewbufcalls, 1);
-	atomic_subtract_int(&getnewbufrestarts, 1);
+	KASSERT((bp->b_flags & B_DELWRI) == 0,
+	    ("delwri buffer %p found in queue %d", bp, qindex));
+
+	if (qindex == QUEUE_CLEAN) {
+		if (bp->b_flags & B_VMIO) {
+			bp->b_flags &= ~B_ASYNC;
+			vfs_vmio_release(bp);
+		}
+		if (bp->b_vp != NULL)
+			brelvp(bp);
+	}
+
+	/*
+	 * Get the rest of the buffer freed up.  b_kva* is still valid
+	 * after this operation.
+	 */
+
+	if (bp->b_rcred != NOCRED) {
+		crfree(bp->b_rcred);
+		bp->b_rcred = NOCRED;
+	}
+	if (bp->b_wcred != NOCRED) {
+		crfree(bp->b_wcred);
+		bp->b_wcred = NOCRED;
+	}
+	if (!LIST_EMPTY(&bp->b_dep))
+		buf_deallocate(bp);
+	if (bp->b_vflags & BV_BKGRDINPROG)
+		panic("losing buffer 3");
+	KASSERT(bp->b_vp == NULL, ("bp: %p still has vnode %p.  qindex: %d",
+	    bp, bp->b_vp, qindex));
+	KASSERT((bp->b_xflags & (BX_VNCLEAN|BX_VNDIRTY)) == 0,
+	    ("bp: %p still on a buffer list. xflags %X", bp, bp->b_xflags));
+
+	if (bp->b_bufsize)
+		allocbuf(bp, 0);
+
+	bp->b_flags &= B_UNMAPPED | B_KVAALLOC;
+	bp->b_ioflags = 0;
+	bp->b_xflags = 0;
+	KASSERT((bp->b_vflags & BV_INFREECNT) == 0,
+	    ("buf %p still counted as free?", bp));
+	bp->b_vflags = 0;
+	bp->b_vp = NULL;
+	bp->b_blkno = bp->b_lblkno = 0;
+	bp->b_offset = NOOFFSET;
+	bp->b_iodone = 0;
+	bp->b_error = 0;
+	bp->b_resid = 0;
+	bp->b_bcount = 0;
+	bp->b_npages = 0;
+	bp->b_dirtyoff = bp->b_dirtyend = 0;
+	bp->b_bufobj = NULL;
+	bp->b_pin_count = 0;
+	bp->b_fsprivate1 = NULL;
+	bp->b_fsprivate2 = NULL;
+	bp->b_fsprivate3 = NULL;
+
+	LIST_INIT(&bp->b_dep);
+}
+
+static int flushingbufs;
+
+static struct buf *
+getnewbuf_scan(int maxsize, int defrag, int unmapped)
+{
+	struct buf *bp, *nbp;
+	int nqindex, qindex;
+
+	KASSERT(!unmapped || !defrag, ("both unmapped and defrag"));
+
 restart:
 	atomic_add_int(&getnewbufrestarts, 1);
 
@@ -1856,63 +2162,80 @@ restart:
 	 * that if we are specially marked process, we are allowed to
 	 * dip into our reserves.
 	 *
-	 * The scanning sequence is nominally:  EMPTY->EMPTYKVA->CLEAN
+	 * The scanning sequence is nominally: EMPTY->EMPTYKVA->CLEAN
+	 * for the allocation of the mapped buffer.  For unmapped, the
+	 * easiest is to start with EMPTY outright.
 	 *
 	 * We start with EMPTYKVA.  If the list is empty we backup to EMPTY.
 	 * However, there are a number of cases (defragging, reusing, ...)
 	 * where we cannot backup.
 	 */
+	nbp = NULL;
 	mtx_lock(&bqlock);
-	nqindex = QUEUE_EMPTYKVA;
-	nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA]);
-
+	if (!defrag && unmapped) {
+		nqindex = QUEUE_EMPTY;
+		nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
+	}
 	if (nbp == NULL) {
-		/*
-		 * If no EMPTYKVA buffers and we are either
-		 * defragging or reusing, locate a CLEAN buffer
-		 * to free or reuse.  If bufspace useage is low
-		 * skip this step so we can allocate a new buffer.
-		 */
-		if (defrag || bufspace >= lobufspace) {
-			nqindex = QUEUE_CLEAN;
-			nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]);
-		}
+		nqindex = QUEUE_EMPTYKVA;
+		nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA]);
+	}
 
-		/*
-		 * If we could not find or were not allowed to reuse a
-		 * CLEAN buffer, check to see if it is ok to use an EMPTY
-		 * buffer.  We can only use an EMPTY buffer if allocating
-		 * its KVA would not otherwise run us out of buffer space.
-		 */
-		if (nbp == NULL && defrag == 0 &&
-		    bufspace + maxsize < hibufspace) {
-			nqindex = QUEUE_EMPTY;
-			nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
-		}
+	/*
+	 * If no EMPTYKVA buffers and we are either defragging or
+	 * reusing, locate a CLEAN buffer to free or reuse.  If
+	 * bufspace useage is low skip this step so we can allocate a
+	 * new buffer.
+	 */
+	if (nbp == NULL && (defrag || bufspace >= lobufspace)) {
+		nqindex = QUEUE_CLEAN;
+		nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]);
+	}
+
+	/*
+	 * If we could not find or were not allowed to reuse a CLEAN
+	 * buffer, check to see if it is ok to use an EMPTY buffer.
+	 * We can only use an EMPTY buffer if allocating its KVA would
+	 * not otherwise run us out of buffer space.  No KVA is needed
+	 * for the unmapped allocation.
+	 */
+	if (nbp == NULL && defrag == 0 && bufspace + maxsize < hibufspace) {
+		nqindex = QUEUE_EMPTY;
+		nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
+	}
+
+	/*
+	 * All available buffers might be clean, retry ignoring the
+	 * lobufspace as the last resort.
+	 */
+	if (nbp == NULL && !TAILQ_EMPTY(&bufqueues[QUEUE_CLEAN])) {
+		nqindex = QUEUE_CLEAN;
+		nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]);
 	}
 
 	/*
 	 * Run scan, possibly freeing data and/or kva mappings on the fly
 	 * depending.
 	 */
-
 	while ((bp = nbp) != NULL) {
-		int qindex = nqindex;
+		qindex = nqindex;
 
 		/*
-		 * Calculate next bp ( we can only use it if we do not block
-		 * or do other fancy things ).
+		 * Calculate next bp (we can only use it if we do not
+		 * block or do other fancy things).
 		 */
 		if ((nbp = TAILQ_NEXT(bp, b_freelist)) == NULL) {
 			switch(qindex) {
 			case QUEUE_EMPTY:
 				nqindex = QUEUE_EMPTYKVA;
-				if ((nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA])))
+				nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA]);
+				if (nbp != NULL)
 					break;
 				/* FALLTHROUGH */
 			case QUEUE_EMPTYKVA:
 				nqindex = QUEUE_CLEAN;
-				if ((nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN])))
+				nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]);
+				if (nbp != NULL)
 					break;
 				/* FALLTHROUGH */
 			case QUEUE_CLEAN:
@@ -1948,22 +2271,9 @@ restart:
 			}
 			BO_UNLOCK(bp->b_bufobj);
 		}
-		CTR6(KTR_BUF,
-		    "getnewbuf(%p) vp %p flags %X kvasize %d bufsize %d "
-		    "queue %d (recycling)", bp, bp->b_vp, bp->b_flags,
-		    bp->b_kvasize, bp->b_bufsize, qindex);
 
-		/*
-		 * Sanity Checks
-		 */
-		KASSERT(bp->b_qindex == qindex, ("getnewbuf: inconsistant queue %d bp %p", qindex, bp));
-
-		/*
-		 * Note: we no longer distinguish between VMIO and non-VMIO
-		 * buffers.
-		 */
-
-		KASSERT((bp->b_flags & B_DELWRI) == 0, ("delwri buffer %p found in queue %d", bp, qindex));
+		KASSERT(bp->b_qindex == qindex,
+		    ("getnewbuf: inconsistent queue %d bp %p", qindex, bp));
 
 		if (bp->b_bufobj != NULL)
 			BO_LOCK(bp->b_bufobj);
@@ -1971,68 +2281,13 @@ restart:
 		if (bp->b_bufobj != NULL)
 			BO_UNLOCK(bp->b_bufobj);
 		mtx_unlock(&bqlock);
-
-		if (qindex == QUEUE_CLEAN) {
-			if (bp->b_flags & B_VMIO) {
-				bp->b_flags &= ~B_ASYNC;
-				vfs_vmio_release(bp);
-			}
-			if (bp->b_vp)
-				brelvp(bp);
-		}
-
 		/*
 		 * NOTE:  nbp is now entirely invalid.  We can only restart
 		 * the scan from this point on.
-		 *
-		 * Get the rest of the buffer freed up.  b_kva* is still
-		 * valid after this operation.
 		 */
 
-		if (bp->b_rcred != NOCRED) {
-			crfree(bp->b_rcred);
-			bp->b_rcred = NOCRED;
-		}
-		if (bp->b_wcred != NOCRED) {
-			crfree(bp->b_wcred);
-			bp->b_wcred = NOCRED;
-		}
-		if (!LIST_EMPTY(&bp->b_dep))
-			buf_deallocate(bp);
-		if (bp->b_vflags & BV_BKGRDINPROG)
-			panic("losing buffer 3");
-		KASSERT(bp->b_vp == NULL,
-		    ("bp: %p still has vnode %p.  qindex: %d",
-		    bp, bp->b_vp, qindex));
-		KASSERT((bp->b_xflags & (BX_VNCLEAN|BX_VNDIRTY)) == 0,
-		   ("bp: %p still on a buffer list. xflags %X",
-		    bp, bp->b_xflags));
-
-		if (bp->b_bufsize)
-			allocbuf(bp, 0);
-
-		bp->b_flags = 0;
-		bp->b_ioflags = 0;
-		bp->b_xflags = 0;
-		KASSERT((bp->b_vflags & BV_INFREECNT) == 0,
-		    ("buf %p still counted as free?", bp));
-		bp->b_vflags = 0;
-		bp->b_vp = NULL;
-		bp->b_blkno = bp->b_lblkno = 0;
-		bp->b_offset = NOOFFSET;
-		bp->b_iodone = 0;
-		bp->b_error = 0;
-		bp->b_resid = 0;
-		bp->b_bcount = 0;
-		bp->b_npages = 0;
-		bp->b_dirtyoff = bp->b_dirtyend = 0;
-		bp->b_bufobj = NULL;
-		bp->b_pin_count = 0;
-		bp->b_fsprivate1 = NULL;
-		bp->b_fsprivate2 = NULL;
-		bp->b_fsprivate3 = NULL;
-
-		LIST_INIT(&bp->b_dep);
+		getnewbuf_reuse_bp(bp, qindex);
+		mtx_assert(&bqlock, MA_NOTOWNED);
 
 		/*
 		 * If we are defragging then free the buffer.
@@ -2073,6 +2328,52 @@ restart:
 			flushingbufs = 0;
 		break;
 	}
+	return (bp);
+}
+
+/*
+ *	getnewbuf:
+ *
+ *	Find and initialize a new buffer header, freeing up existing buffers
+ *	in the bufqueues as necessary.  The new buffer is returned locked.
+ *
+ *	Important:  B_INVAL is not set.  If the caller wishes to throw the
+ *	buffer away, the caller must set B_INVAL prior to calling brelse().
+ *
+ *	We block if:
+ *		We have insufficient buffer headers
+ *		We have insufficient buffer space
+ *		buffer_map is too fragmented ( space reservation fails )
+ *		If we have to flush dirty buffers ( but we try to avoid this )
+ *
+ *	To avoid VFS layer recursion we do not flush dirty buffers ourselves.
+ *	Instead we ask the buf daemon to do it for us.  We attempt to
+ *	avoid piecemeal wakeups of the pageout daemon.
+ */
+static struct buf *
+getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int size, int maxsize,
+    int gbflags)
+{
+	struct buf *bp;
+	int defrag;
+
+	KASSERT((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC,
+	    ("GB_KVAALLOC only makes sense with GB_UNMAPPED"));
+
+	defrag = 0;
+	/*
+	 * We can't afford to block since we might be holding a vnode lock,
+	 * which may prevent system daemons from running.  We deal with
+	 * low-memory situations by proactively returning memory and running
+	 * async I/O rather then sync I/O.
+	 */
+	atomic_add_int(&getnewbufcalls, 1);
+	atomic_subtract_int(&getnewbufrestarts, 1);
+restart:
+	bp = getnewbuf_scan(maxsize, defrag, (gbflags & (GB_UNMAPPED |
+	    GB_KVAALLOC)) == GB_UNMAPPED);
+	if (bp != NULL)
+		defrag = 0;
 
 	/*
 	 * If we exhausted our list, sleep as appropriate.  We may have to
@@ -2080,65 +2381,23 @@ restart:
 	 *
 	 * Generally we are sleeping due to insufficient buffer space.
 	 */
-
 	if (bp == NULL) {
-		int flags, norunbuf;
-		char *waitmsg;
-		int fl;
-
-		if (defrag) {
-			flags = VFS_BIO_NEED_BUFSPACE;
-			waitmsg = "nbufkv";
-		} else if (bufspace >= hibufspace) {
-			waitmsg = "nbufbs";
-			flags = VFS_BIO_NEED_BUFSPACE;
-		} else {
-			waitmsg = "newbuf";
-			flags = VFS_BIO_NEED_ANY;
-		}
-		mtx_lock(&nblock);
-		needsbuffer |= flags;
-		mtx_unlock(&nblock);
-		mtx_unlock(&bqlock);
-
-		bd_speedup();	/* heeeelp */
-		if (gbflags & GB_NOWAIT_BD)
-			return (NULL);
-
-		mtx_lock(&nblock);
-		while (needsbuffer & flags) {
-			if (vp != NULL && (td->td_pflags & TDP_BUFNEED) == 0) {
-				mtx_unlock(&nblock);
-				/*
-				 * getblk() is called with a vnode
-				 * locked, and some majority of the
-				 * dirty buffers may as well belong to
-				 * the vnode. Flushing the buffers
-				 * there would make a progress that
-				 * cannot be achieved by the
-				 * buf_daemon, that cannot lock the
-				 * vnode.
-				 */
-				norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) |
-				    (td->td_pflags & TDP_NORUNNINGBUF);
-				/* play bufdaemon */
-				td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF;
-				fl = buf_do_flush(vp);
-				td->td_pflags &= norunbuf;
-				mtx_lock(&nblock);
-				if (fl != 0)
-					continue;
-				if ((needsbuffer & flags) == 0)
-					break;
-			}
-			if (msleep(&needsbuffer, &nblock,
-			    (PRIBIO + 4) | slpflag, waitmsg, slptimeo)) {
-				mtx_unlock(&nblock);
-				return (NULL);
-			}
-		}
-		mtx_unlock(&nblock);
+		mtx_assert(&bqlock, MA_OWNED);
+		getnewbuf_bufd_help(vp, gbflags, slpflag, slptimeo, defrag);
+		mtx_assert(&bqlock, MA_NOTOWNED);
+	} else if ((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) == GB_UNMAPPED) {
+		mtx_assert(&bqlock, MA_NOTOWNED);
+
+		bfreekva(bp);
+		bp->b_flags |= B_UNMAPPED;
+		bp->b_kvabase = bp->b_data = unmapped_buf;
+		bp->b_kvasize = maxsize;
+		atomic_add_long(&bufspace, bp->b_kvasize);
+		atomic_add_long(&unmapped_bufspace, bp->b_kvasize);
+		atomic_add_int(&bufreusecnt, 1);
 	} else {
+		mtx_assert(&bqlock, MA_NOTOWNED);
+
 		/*
 		 * We finally have a valid bp.  We aren't quite out of the
 		 * woods, we still have to reserve kva space.  In order
@@ -2147,39 +2406,46 @@ restart:
 		 */
 		maxsize = (maxsize + BKVAMASK) & ~BKVAMASK;
 
-		if (maxsize != bp->b_kvasize) {
-			vm_offset_t addr = 0;
-			int rv;
-
-			bfreekva(bp);
-
-			vm_map_lock(buffer_map);
-			if (vm_map_findspace(buffer_map,
-			    vm_map_min(buffer_map), maxsize, &addr)) {
-				/*
-				 * Buffer map is too fragmented.
-				 * We must defragment the map.
-				 */
-				atomic_add_int(&bufdefragcnt, 1);
-				vm_map_unlock(buffer_map);
+		if (maxsize != bp->b_kvasize || (bp->b_flags & (B_UNMAPPED |
+		    B_KVAALLOC)) == B_UNMAPPED) {
+			if (allocbufkva(bp, maxsize, gbflags)) {
 				defrag = 1;
 				bp->b_flags |= B_INVAL;
 				brelse(bp);
 				goto restart;
 			}
-			rv = vm_map_insert(buffer_map, NULL, 0, addr,
-			    addr + maxsize, VM_PROT_ALL, VM_PROT_ALL,
-			    MAP_NOFAULT);
-			KASSERT(rv == KERN_SUCCESS,
-			    ("vm_map_insert(buffer_map) rv %d", rv));
-			vm_map_unlock(buffer_map);
-			bp->b_kvabase = (caddr_t)addr;
-			bp->b_kvasize = maxsize;
-			atomic_add_long(&bufspace, bp->b_kvasize);
 			atomic_add_int(&bufreusecnt, 1);
+		} else if ((bp->b_flags & B_KVAALLOC) != 0 &&
+		    (gbflags & (GB_UNMAPPED | GB_KVAALLOC)) == 0) {
+			/*
+			 * If the reused buffer has KVA allocated,
+			 * reassign b_kvaalloc to b_kvabase.
+			 */
+			bp->b_kvabase = bp->b_kvaalloc;
+			bp->b_flags &= ~B_KVAALLOC;
+			atomic_subtract_long(&unmapped_bufspace,
+			    bp->b_kvasize);
+			atomic_add_int(&bufreusecnt, 1);
+		} else if ((bp->b_flags & (B_UNMAPPED | B_KVAALLOC)) == 0 &&
+		    (gbflags & (GB_UNMAPPED | GB_KVAALLOC)) == GB_KVAALLOC) {
+			/*
+			 * The case of reused buffer already have KVA
+			 * mapped, but the request is for unmapped
+			 * buffer with KVA allocated.
+			 */
+			bp->b_kvaalloc = bp->b_kvabase;
+			bp->b_data = bp->b_kvabase = unmapped_buf;
+			bp->b_flags |= B_UNMAPPED | B_KVAALLOC;
+			atomic_add_long(&unmapped_bufspace,
+			    bp->b_kvasize);
+			atomic_add_int(&bufreusecnt, 1);
+		}
+		if ((gbflags & GB_UNMAPPED) == 0) {
+			bp->b_saveaddr = bp->b_kvabase;
+			bp->b_data = bp->b_saveaddr;
+			bp->b_flags &= ~B_UNMAPPED;
+			BUF_CHECK_MAPPED(bp);
 		}
-		bp->b_saveaddr = bp->b_kvabase;
-		bp->b_data = bp->b_saveaddr;
 	}
 	return (bp);
 }
@@ -2590,6 +2856,90 @@ vfs_setdirty_locked_object(struct buf *bp)
 }
 
 /*
+ * Allocate the KVA mapping for an existing buffer. It handles the
+ * cases of both B_UNMAPPED buffer, and buffer with the preallocated
+ * KVA which is not mapped (B_KVAALLOC).
+ */
+static void
+bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags)
+{
+	struct buf *scratch_bp;
+	int bsize, maxsize, need_mapping, need_kva;
+	off_t offset;
+
+	need_mapping = (bp->b_flags & B_UNMAPPED) != 0 &&
+	    (gbflags & GB_UNMAPPED) == 0;
+	need_kva = (bp->b_flags & (B_KVAALLOC | B_UNMAPPED)) == B_UNMAPPED &&
+	    (gbflags & GB_KVAALLOC) != 0;
+	if (!need_mapping && !need_kva)
+		return;
+
+	BUF_CHECK_UNMAPPED(bp);
+
+	if (need_mapping && (bp->b_flags & B_KVAALLOC) != 0) {
+		/*
+		 * Buffer is not mapped, but the KVA was already
+		 * reserved at the time of the instantiation.  Use the
+		 * allocated space.
+		 */
+		bp->b_flags &= ~B_KVAALLOC;
+		KASSERT(bp->b_kvaalloc != 0, ("kvaalloc == 0"));
+		bp->b_kvabase = bp->b_kvaalloc;
+		atomic_subtract_long(&unmapped_bufspace, bp->b_kvasize);
+		goto has_addr;
+	}
+
+	/*
+	 * Calculate the amount of the address space we would reserve
+	 * if the buffer was mapped.
+	 */
+	bsize = vn_isdisk(bp->b_vp, NULL) ? DEV_BSIZE : bp->b_bufobj->bo_bsize;
+	offset = blkno * bsize;
+	maxsize = size + (offset & PAGE_MASK);
+	maxsize = imax(maxsize, bsize);
+
+mapping_loop:
+	if (allocbufkva(bp, maxsize, gbflags)) {
+		/*
+		 * Request defragmentation. getnewbuf() returns us the
+		 * allocated space by the scratch buffer KVA.
+		 */
+		scratch_bp = getnewbuf(bp->b_vp, 0, 0, size, maxsize, gbflags |
+		    (GB_UNMAPPED | GB_KVAALLOC));
+		if (scratch_bp == NULL) {
+			if ((gbflags & GB_NOWAIT_BD) != 0) {
+				/*
+				 * XXXKIB: defragmentation cannot
+				 * succeed, not sure what else to do.
+				 */
+				panic("GB_NOWAIT_BD and B_UNMAPPED %p", bp);
+			}
+			atomic_add_int(&mappingrestarts, 1);
+			goto mapping_loop;
+		}
+		KASSERT((scratch_bp->b_flags & B_KVAALLOC) != 0,
+		    ("scratch bp !B_KVAALLOC %p", scratch_bp));
+		setbufkva(bp, (vm_offset_t)scratch_bp->b_kvaalloc,
+		    scratch_bp->b_kvasize, gbflags);
+
+		/* Get rid of the scratch buffer. */
+		scratch_bp->b_kvasize = 0;
+		scratch_bp->b_flags |= B_INVAL | B_UNMAPPED;
+		scratch_bp->b_flags &= ~B_KVAALLOC;
+		brelse(scratch_bp);
+	}
+	if (!need_mapping)
+		return;
+
+has_addr:
+	bp->b_saveaddr = bp->b_kvabase;
+	bp->b_data = bp->b_saveaddr; /* b_offset is handled by bpmap_qenter */
+	bp->b_flags &= ~B_UNMAPPED;
+	BUF_CHECK_MAPPED(bp);
+	bpmap_qenter(bp);
+}
+
+/*
  *	getblk:
  *
  *	Get a block given a specified block and offset into a file/device.
@@ -2626,14 +2976,17 @@ vfs_setdirty_locked_object(struct buf *bp)
  *	prior to issuing the READ.  biodone() will *not* clear B_INVAL.
  */
 struct buf *
-getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo,
+getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
     int flags)
 {
 	struct buf *bp;
 	struct bufobj *bo;
-	int error;
+	int bsize, error, maxsize, vmio;
+	off_t offset;
 
 	CTR3(KTR_BUF, "getblk(%p, %ld, %d)", vp, (long)blkno, size);
+	KASSERT((flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC,
+	    ("GB_KVAALLOC only makes sense with GB_UNMAPPED"));
 	ASSERT_VOP_LOCKED(vp, "getblk");
 	if (size > MAXBSIZE)
 		panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE);
@@ -2701,9 +3054,8 @@ loop:
 		}
 
 		/*
-		 * check for size inconsistancies for non-VMIO case.
+		 * check for size inconsistencies for non-VMIO case.
 		 */
-
 		if (bp->b_bcount != size) {
 			if ((bp->b_flags & B_VMIO) == 0 ||
 			    (size > bp->b_kvasize)) {
@@ -2737,12 +3089,18 @@ loop:
 		}
 
 		/*
+		 * Handle the case of unmapped buffer which should
+		 * become mapped, or the buffer for which KVA
+		 * reservation is requested.
+		 */
+		bp_unmapped_get_kva(bp, blkno, size, flags);
+
+		/*
 		 * If the size is inconsistant in the VMIO case, we can resize
 		 * the buffer.  This might lead to B_CACHE getting set or
 		 * cleared.  If the size has not changed, B_CACHE remains
 		 * unchanged from its previous state.
 		 */
-
 		if (bp->b_bcount != size)
 			allocbuf(bp, size);
 
@@ -2783,9 +3141,6 @@ loop:
 		}
 		bp->b_flags &= ~B_DONE;
 	} else {
-		int bsize, maxsize, vmio;
-		off_t offset;
-
 		/*
 		 * Buffer is not in-core, create new buffer.  The buffer
 		 * returned by getnewbuf() is locked.  Note that the returned
@@ -2801,7 +3156,13 @@ loop:
 		bsize = vn_isdisk(vp, NULL) ? DEV_BSIZE : bo->bo_bsize;
 		offset = blkno * bsize;
 		vmio = vp->v_object != NULL;
-		maxsize = vmio ? size + (offset & PAGE_MASK) : size;
+		if (vmio) {
+			maxsize = size + (offset & PAGE_MASK);
+		} else {
+			maxsize = size;
+			/* Do not allow non-VMIO notmapped buffers. */
+			flags &= ~GB_UNMAPPED;
+		}
 		maxsize = imax(maxsize, bsize);
 
 		bp = getnewbuf(vp, slpflag, slptimeo, size, maxsize, flags);
@@ -2857,6 +3218,7 @@ loop:
 			KASSERT(bp->b_bufobj->bo_object == NULL,
 			    ("ARGH! has b_bufobj->bo_object %p %p\n",
 			    bp, bp->b_bufobj->bo_object));
+			BUF_CHECK_MAPPED(bp);
 		}
 
 		allocbuf(bp, size);
@@ -3031,10 +3393,14 @@ allocbuf(struct buf *bp, int size)
 			if (desiredpages < bp->b_npages) {
 				vm_page_t m;
 
-				pmap_qremove((vm_offset_t)trunc_page(
-				    (vm_offset_t)bp->b_data) +
-				    (desiredpages << PAGE_SHIFT),
-				    (bp->b_npages - desiredpages));
+				if ((bp->b_flags & B_UNMAPPED) == 0) {
+					BUF_CHECK_MAPPED(bp);
+					pmap_qremove((vm_offset_t)trunc_page(
+					    (vm_offset_t)bp->b_data) +
+					    (desiredpages << PAGE_SHIFT),
+					    (bp->b_npages - desiredpages));
+				} else
+					BUF_CHECK_UNMAPPED(bp);
 				VM_OBJECT_LOCK(bp->b_bufobj->bo_object);
 				for (i = desiredpages; i < bp->b_npages; i++) {
 					/*
@@ -3140,21 +3506,12 @@ allocbuf(struct buf *bp, int size)
 			VM_OBJECT_UNLOCK(obj);
 
 			/*
-			 * Step 3, fixup the KVM pmap.  Remember that
-			 * bp->b_data is relative to bp->b_offset, but 
-			 * bp->b_offset may be offset into the first page.
+			 * Step 3, fixup the KVM pmap.
 			 */
-
-			bp->b_data = (caddr_t)
-			    trunc_page((vm_offset_t)bp->b_data);
-			pmap_qenter(
-			    (vm_offset_t)bp->b_data,
-			    bp->b_pages, 
-			    bp->b_npages
-			);
-			
-			bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | 
-			    (vm_offset_t)(bp->b_offset & PAGE_MASK));
+			if ((bp->b_flags & B_UNMAPPED) == 0)
+				bpmap_qenter(bp);
+			else
+				BUF_CHECK_UNMAPPED(bp);
 		}
 	}
 	if (newbsize < bp->b_bufsize)
@@ -3164,21 +3521,38 @@ allocbuf(struct buf *bp, int size)
 	return 1;
 }
 
+extern int inflight_transient_maps;
+
 void
 biodone(struct bio *bp)
 {
 	struct mtx *mtxp;
 	void (*done)(struct bio *);
+	vm_offset_t start, end;
+	int transient;
 
 	mtxp = mtx_pool_find(mtxpool_sleep, bp);
 	mtx_lock(mtxp);
 	bp->bio_flags |= BIO_DONE;
+	if ((bp->bio_flags & BIO_TRANSIENT_MAPPING) != 0) {
+		start = trunc_page((vm_offset_t)bp->bio_data);
+		end = round_page((vm_offset_t)bp->bio_data + bp->bio_length);
+		transient = 1;
+	} else {
+		transient = 0;
+		start = end = 0;
+	}
 	done = bp->bio_done;
 	if (done == NULL)
 		wakeup(bp);
 	mtx_unlock(mtxp);
 	if (done != NULL)
 		done(bp);
+	if (transient) {
+		pmap_qremove(start, OFF_TO_IDX(end - start));
+		vm_map_remove(bio_transient_map, start, end);
+		atomic_add_int(&inflight_transient_maps, -1);
+	}
 }
 
 /*
@@ -3281,7 +3655,7 @@ dev_strategy(struct cdev *dev, struct buf *bp)
 	bip->bio_offset = bp->b_iooffset;
 	bip->bio_length = bp->b_bcount;
 	bip->bio_bcount = bp->b_bcount;	/* XXX: remove */
-	bip->bio_data = bp->b_data;
+	bdata2bio(bp, bip);
 	bip->bio_done = bufdonebio;
 	bip->bio_caller2 = bp;
 	bip->bio_dev = dev;
@@ -3435,9 +3809,11 @@ bufdone_finish(struct buf *bp)
 		}
 		vm_object_pip_wakeupn(obj, 0);
 		VM_OBJECT_UNLOCK(obj);
-		if (bogus)
+		if (bogus && (bp->b_flags & B_UNMAPPED) == 0) {
+			BUF_CHECK_MAPPED(bp);
 			pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
 			    bp->b_pages, bp->b_npages);
+		}
 	}
 
 	/*
@@ -3480,8 +3856,12 @@ vfs_unbusy_pages(struct buf *bp)
 			if (!m)
 				panic("vfs_unbusy_pages: page missing\n");
 			bp->b_pages[i] = m;
-			pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
-			    bp->b_pages, bp->b_npages);
+			if ((bp->b_flags & B_UNMAPPED) == 0) {
+				BUF_CHECK_MAPPED(bp);
+				pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
+				    bp->b_pages, bp->b_npages);
+			} else
+				BUF_CHECK_UNMAPPED(bp);
 		}
 		vm_object_pip_subtract(obj, 1);
 		vm_page_io_finish(m);
@@ -3646,9 +4026,11 @@ vfs_busy_pages(struct buf *bp, int clear_modify)
 		foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 	}
 	VM_OBJECT_UNLOCK(obj);
-	if (bogus)
+	if (bogus && (bp->b_flags & B_UNMAPPED) == 0) {
+		BUF_CHECK_MAPPED(bp);
 		pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
 		    bp->b_pages, bp->b_npages);
+	}
 }
 
 /*
@@ -3704,8 +4086,7 @@ vfs_bio_set_valid(struct buf *bp, int base, int size)
 void
 vfs_bio_clrbuf(struct buf *bp) 
 {
-	int i, j, mask;
-	caddr_t sa, ea;
+	int i, j, mask, sa, ea, slide;
 
 	if ((bp->b_flags & (B_VMIO | B_MALLOC)) != B_VMIO) {
 		clrbuf(bp);
@@ -3723,39 +4104,69 @@ vfs_bio_clrbuf(struct buf *bp)
 		if ((bp->b_pages[0]->valid & mask) == mask)
 			goto unlock;
 		if ((bp->b_pages[0]->valid & mask) == 0) {
-			bzero(bp->b_data, bp->b_bufsize);
+			pmap_zero_page_area(bp->b_pages[0], 0, bp->b_bufsize);
 			bp->b_pages[0]->valid |= mask;
 			goto unlock;
 		}
 	}
-	ea = sa = bp->b_data;
-	for(i = 0; i < bp->b_npages; i++, sa = ea) {
-		ea = (caddr_t)trunc_page((vm_offset_t)sa + PAGE_SIZE);
-		ea = (caddr_t)(vm_offset_t)ulmin(
-		    (u_long)(vm_offset_t)ea,
-		    (u_long)(vm_offset_t)bp->b_data + bp->b_bufsize);
+	sa = bp->b_offset & PAGE_MASK;
+	slide = 0;
+	for (i = 0; i < bp->b_npages; i++) {
+		slide = imin(slide + PAGE_SIZE, bp->b_bufsize + sa);
+		ea = slide & PAGE_MASK;
+		if (ea == 0)
+			ea = PAGE_SIZE;
 		if (bp->b_pages[i] == bogus_page)
 			continue;
-		j = ((vm_offset_t)sa & PAGE_MASK) / DEV_BSIZE;
+		j = sa / DEV_BSIZE;
 		mask = ((1 << ((ea - sa) / DEV_BSIZE)) - 1) << j;
 		VM_OBJECT_LOCK_ASSERT(bp->b_pages[i]->object, MA_OWNED);
 		if ((bp->b_pages[i]->valid & mask) == mask)
 			continue;
 		if ((bp->b_pages[i]->valid & mask) == 0)
-			bzero(sa, ea - sa);
+			pmap_zero_page_area(bp->b_pages[i], sa, ea - sa);
 		else {
 			for (; sa < ea; sa += DEV_BSIZE, j++) {
-				if ((bp->b_pages[i]->valid & (1 << j)) == 0)
-					bzero(sa, DEV_BSIZE);
+				if ((bp->b_pages[i]->valid & (1 << j)) == 0) {
+					pmap_zero_page_area(bp->b_pages[i],
+					    sa, DEV_BSIZE);
+				}
 			}
 		}
 		bp->b_pages[i]->valid |= mask;
+		sa = 0;
 	}
 unlock:
 	VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object);
 	bp->b_resid = 0;
 }
 
+void
+vfs_bio_bzero_buf(struct buf *bp, int base, int size)
+{
+	vm_page_t m;
+	int i, n;
+
+	if ((bp->b_flags & B_UNMAPPED) == 0) {
+		BUF_CHECK_MAPPED(bp);
+		bzero(bp->b_data + base, size);
+	} else {
+		BUF_CHECK_UNMAPPED(bp);
+		n = PAGE_SIZE - (base & PAGE_MASK);
+		VM_OBJECT_LOCK(bp->b_bufobj->bo_object);
+		for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) {
+			m = bp->b_pages[i];
+			if (n > size)
+				n = size;
+			pmap_zero_page_area(m, base & PAGE_MASK, n);
+			base += n;
+			size -= n;
+			n = PAGE_SIZE;
+		}
+		VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object);
+	}
+}
+
 /*
  * vm_hold_load_pages and vm_hold_free_pages get pages into
  * a buffers address space.  The pages are anonymous and are
@@ -3768,6 +4179,8 @@ vm_hold_load_pages(struct buf *bp, vm_offset_t from, vm_offset_t to)
 	vm_page_t p;
 	int index;
 
+	BUF_CHECK_MAPPED(bp);
+
 	to = round_page(to);
 	from = round_page(from);
 	index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
@@ -3799,6 +4212,8 @@ vm_hold_free_pages(struct buf *bp, int newbsize)
 	vm_page_t p;
 	int index, newnpages;
 
+	BUF_CHECK_MAPPED(bp);
+
 	from = round_page((vm_offset_t)bp->b_data + newbsize);
 	newnpages = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 	if (bp->b_npages > newnpages)
@@ -3829,7 +4244,7 @@ vm_hold_free_pages(struct buf *bp, int newbsize)
  * check the return value.
  */
 int
-vmapbuf(struct buf *bp)
+vmapbuf(struct buf *bp, int mapbuf)
 {
 	caddr_t kva;
 	vm_prot_t prot;
@@ -3844,12 +4259,19 @@ vmapbuf(struct buf *bp)
 	    (vm_offset_t)bp->b_data, bp->b_bufsize, prot, bp->b_pages,
 	    btoc(MAXPHYS))) < 0)
 		return (-1);
-	pmap_qenter((vm_offset_t)bp->b_saveaddr, bp->b_pages, pidx);
-	
-	kva = bp->b_saveaddr;
 	bp->b_npages = pidx;
-	bp->b_saveaddr = bp->b_data;
-	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
+	if (mapbuf) {
+		pmap_qenter((vm_offset_t)bp->b_saveaddr, bp->b_pages, pidx);
+		kva = bp->b_saveaddr;
+		bp->b_saveaddr = bp->b_data;
+		bp->b_data = kva + (((vm_offset_t)bp->b_data) & PAGE_MASK);
+		bp->b_flags &= ~B_UNMAPPED;
+	} else {
+		bp->b_flags |= B_UNMAPPED;
+		bp->b_offset = ((vm_offset_t)bp->b_data) & PAGE_MASK;
+		bp->b_saveaddr = bp->b_data;
+		bp->b_data = unmapped_buf;
+	}
 	return(0);
 }
 
@@ -3863,7 +4285,10 @@ vunmapbuf(struct buf *bp)
 	int npages;
 
 	npages = bp->b_npages;
-	pmap_qremove(trunc_page((vm_offset_t)bp->b_data), npages);
+	if (bp->b_flags & B_UNMAPPED)
+		bp->b_flags &= ~B_UNMAPPED;
+	else
+		pmap_qremove(trunc_page((vm_offset_t)bp->b_data), npages);
 	vm_page_unhold_pages(bp->b_pages, npages);
 	
 	bp->b_data = bp->b_saveaddr;
@@ -4000,6 +4425,29 @@ bunpin_wait(struct buf *bp)
 	mtx_unlock(mtxp);
 }
 
+/*
+ * Set bio_data or bio_ma for struct bio from the struct buf.
+ */
+void
+bdata2bio(struct buf *bp, struct bio *bip)
+{
+
+	if ((bp->b_flags & B_UNMAPPED) != 0) {
+		bip->bio_ma = bp->b_pages;
+		bip->bio_ma_n = bp->b_npages;
+		bip->bio_data = unmapped_buf;
+		bip->bio_ma_offset = (vm_offset_t)bp->b_offset & PAGE_MASK;
+		bip->bio_flags |= BIO_UNMAPPED;
+		KASSERT(round_page(bip->bio_ma_offset + bip->bio_length) /
+		    PAGE_SIZE == bp->b_npages,
+		    ("Buffer %p too short: %d %d %d", bp, bip->bio_ma_offset,
+		    bip->bio_length, bip->bio_ma_n));
+	} else {
+		bip->bio_data = bp->b_data;
+		bip->bio_ma = NULL;
+	}
+}
+
 #include "opt_ddb.h"
 #ifdef DDB
 #include <ddb/ddb.h>
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index 663b66f..60338b2 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -60,11 +60,11 @@ SYSCTL_INT(_debug, OID_AUTO, rcluster, CTLFLAG_RW, &rcluster, 0,
 
 static MALLOC_DEFINE(M_SEGMENT, "cl_savebuf", "cluster_save buffer");
 
-static struct cluster_save *
-	cluster_collectbufs(struct vnode *vp, struct buf *last_bp);
-static struct buf *
-	cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
-			 daddr_t blkno, long size, int run, struct buf *fbp);
+static struct cluster_save *cluster_collectbufs(struct vnode *vp,
+	    struct buf *last_bp, int gbflags);
+static struct buf *cluster_rbuild(struct vnode *vp, u_quad_t filesize,
+	    daddr_t lbn, daddr_t blkno, long size, int run, int gbflags,
+	    struct buf *fbp);
 static void cluster_callback(struct buf *);
 
 static int write_behind = 1;
@@ -83,15 +83,9 @@ extern vm_page_t	bogus_page;
  * cluster_read replaces bread.
  */
 int
-cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp)
-	struct vnode *vp;
-	u_quad_t filesize;
-	daddr_t lblkno;
-	long size;
-	struct ucred *cred;
-	long totread;
-	int seqcount;
-	struct buf **bpp;
+cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
+    struct ucred *cred, long totread, int seqcount, int gbflags,
+    struct buf **bpp)
 {
 	struct buf *bp, *rbp, *reqbp;
 	struct bufobj *bo;
@@ -117,7 +111,7 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp)
 	/*
 	 * get the requested block
 	 */
-	*bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0, 0);
+	*bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0, gbflags);
 	origblkno = lblkno;
 
 	/*
@@ -208,7 +202,7 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp)
 			if (ncontig < nblks)
 				nblks = ncontig;
 			bp = cluster_rbuild(vp, filesize, lblkno,
-				blkno, size, nblks, bp);
+			    blkno, size, nblks, gbflags, bp);
 			lblkno += (bp->b_bufsize / size);
 		} else {
 			bp->b_flags |= B_RAM;
@@ -252,14 +246,14 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp)
 		if (ncontig) {
 			ncontig = min(ncontig + 1, racluster);
 			rbp = cluster_rbuild(vp, filesize, lblkno, blkno,
-				size, ncontig, NULL);
+			    size, ncontig, gbflags, NULL);
 			lblkno += (rbp->b_bufsize / size);
 			if (rbp->b_flags & B_DELWRI) {
 				bqrelse(rbp);
 				continue;
 			}
 		} else {
-			rbp = getblk(vp, lblkno, size, 0, 0, 0);
+			rbp = getblk(vp, lblkno, size, 0, 0, gbflags);
 			lblkno += 1;
 			if (rbp->b_flags & B_DELWRI) {
 				bqrelse(rbp);
@@ -298,14 +292,8 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp)
  * and then parcel them up into logical blocks in the buffer hash table.
  */
 static struct buf *
-cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
-	struct vnode *vp;
-	u_quad_t filesize;
-	daddr_t lbn;
-	daddr_t blkno;
-	long size;
-	int run;
-	struct buf *fbp;
+cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
+    daddr_t blkno, long size, int run, int gbflags, struct buf *fbp)
 {
 	struct bufobj *bo;
 	struct buf *bp, *tbp;
@@ -329,7 +317,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
 		tbp = fbp;
 		tbp->b_iocmd = BIO_READ; 
 	} else {
-		tbp = getblk(vp, lbn, size, 0, 0, 0);
+		tbp = getblk(vp, lbn, size, 0, 0, gbflags);
 		if (tbp->b_flags & B_CACHE)
 			return tbp;
 		tbp->b_flags |= B_ASYNC | B_RAM;
@@ -350,9 +338,14 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
 	 * address may not be either.  Inherit the b_data offset
 	 * from the original buffer.
 	 */
-	bp->b_data = (char *)((vm_offset_t)bp->b_data |
-	    ((vm_offset_t)tbp->b_data & PAGE_MASK));
 	bp->b_flags = B_ASYNC | B_CLUSTER | B_VMIO;
+	if ((gbflags & GB_UNMAPPED) != 0) {
+		bp->b_flags |= B_UNMAPPED;
+		bp->b_data = unmapped_buf;
+	} else {
+		bp->b_data = (char *)((vm_offset_t)bp->b_data |
+		    ((vm_offset_t)tbp->b_data & PAGE_MASK));
+	}
 	bp->b_iocmd = BIO_READ;
 	bp->b_iodone = cluster_callback;
 	bp->b_blkno = blkno;
@@ -376,7 +369,8 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
 				break;
 			}
 
-			tbp = getblk(vp, lbn + i, size, 0, 0, GB_LOCK_NOWAIT);
+			tbp = getblk(vp, lbn + i, size, 0, 0, GB_LOCK_NOWAIT |
+			    (gbflags & GB_UNMAPPED));
 
 			/* Don't wait around for locked bufs. */
 			if (tbp == NULL)
@@ -499,8 +493,10 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
 		    bp->b_bufsize, bp->b_kvasize);
 	bp->b_kvasize = bp->b_bufsize;
 
-	pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
-		(vm_page_t *)bp->b_pages, bp->b_npages);
+	if ((bp->b_flags & B_UNMAPPED) == 0) {
+		pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
+		    (vm_page_t *)bp->b_pages, bp->b_npages);
+	}
 	return (bp);
 }
 
@@ -523,7 +519,10 @@ cluster_callback(bp)
 	if (bp->b_ioflags & BIO_ERROR)
 		error = bp->b_error;
 
-	pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
+	if ((bp->b_flags & B_UNMAPPED) == 0) {
+		pmap_qremove(trunc_page((vm_offset_t) bp->b_data),
+		    bp->b_npages);
+	}
 	/*
 	 * Move memory from the large cluster buffer into the component
 	 * buffers and mark IO as done on these.
@@ -565,18 +564,19 @@ cluster_callback(bp)
  */
 
 static __inline int
-cluster_wbuild_wb(struct vnode *vp, long size, daddr_t start_lbn, int len)
+cluster_wbuild_wb(struct vnode *vp, long size, daddr_t start_lbn, int len,
+    int gbflags)
 {
 	int r = 0;
 
-	switch(write_behind) {
+	switch (write_behind) {
 	case 2:
 		if (start_lbn < len)
 			break;
 		start_lbn -= len;
 		/* FALLTHROUGH */
 	case 1:
-		r = cluster_wbuild(vp, size, start_lbn, len);
+		r = cluster_wbuild(vp, size, start_lbn, len, gbflags);
 		/* FALLTHROUGH */
 	default:
 		/* FALLTHROUGH */
@@ -596,7 +596,8 @@ cluster_wbuild_wb(struct vnode *vp, long size, daddr_t start_lbn, int len)
  *	4.	end of a cluster - asynchronously write cluster
  */
 void
-cluster_write(struct vnode *vp, struct buf *bp, u_quad_t filesize, int seqcount)
+cluster_write(struct vnode *vp, struct buf *bp, u_quad_t filesize, int seqcount,
+    int gbflags)
 {
 	daddr_t lbn;
 	int maxclen, cursize;
@@ -642,13 +643,13 @@ cluster_write(struct vnode *vp, struct buf *bp, u_quad_t filesize, int seqcount)
 			    lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
 				if (!async && seqcount > 0) {
 					cluster_wbuild_wb(vp, lblocksize,
-						vp->v_cstart, cursize);
+					    vp->v_cstart, cursize, gbflags);
 				}
 			} else {
 				struct buf **bpp, **endbp;
 				struct cluster_save *buflist;
 
-				buflist = cluster_collectbufs(vp, bp);
+				buflist = cluster_collectbufs(vp, bp, gbflags);
 				endbp = &buflist->bs_children
 				    [buflist->bs_nchildren - 1];
 				if (VOP_REALLOCBLKS(vp, buflist)) {
@@ -667,7 +668,7 @@ cluster_write(struct vnode *vp, struct buf *bp, u_quad_t filesize, int seqcount)
 					if (seqcount > 1) {
 						cluster_wbuild_wb(vp, 
 						    lblocksize, vp->v_cstart, 
-						    cursize);
+						    cursize, gbflags);
 					}
 				} else {
 					/*
@@ -715,8 +716,10 @@ cluster_write(struct vnode *vp, struct buf *bp, u_quad_t filesize, int seqcount)
 		 * update daemon handle it.
 		 */
 		bdwrite(bp);
-		if (seqcount > 1)
-			cluster_wbuild_wb(vp, lblocksize, vp->v_cstart, vp->v_clen + 1);
+		if (seqcount > 1) {
+			cluster_wbuild_wb(vp, lblocksize, vp->v_cstart,
+			    vp->v_clen + 1, gbflags);
+		}
 		vp->v_clen = 0;
 		vp->v_cstart = lbn + 1;
 	} else if (vm_page_count_severe()) {
@@ -742,11 +745,8 @@ cluster_write(struct vnode *vp, struct buf *bp, u_quad_t filesize, int seqcount)
  * the current block (if last_bp == NULL).
  */
 int
-cluster_wbuild(vp, size, start_lbn, len)
-	struct vnode *vp;
-	long size;
-	daddr_t start_lbn;
-	int len;
+cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
+    int gbflags)
 {
 	struct buf *bp, *tbp;
 	struct bufobj *bo;
@@ -832,10 +832,16 @@ cluster_wbuild(vp, size, start_lbn, len)
 		 * address may not be either.  Inherit the b_data offset
 		 * from the original buffer.
 		 */
-		bp->b_data = (char *)((vm_offset_t)bp->b_data |
-		    ((vm_offset_t)tbp->b_data & PAGE_MASK));
-		bp->b_flags |= B_CLUSTER |
-				(tbp->b_flags & (B_VMIO | B_NEEDCOMMIT));
+		if ((gbflags & GB_UNMAPPED) == 0 ||
+		    (tbp->b_flags & B_VMIO) == 0) {
+			bp->b_data = (char *)((vm_offset_t)bp->b_data |
+			    ((vm_offset_t)tbp->b_data & PAGE_MASK));
+		} else {
+			bp->b_flags |= B_UNMAPPED;
+			bp->b_data = unmapped_buf;
+		}
+		bp->b_flags |= B_CLUSTER | (tbp->b_flags & (B_VMIO |
+		    B_NEEDCOMMIT));
 		bp->b_iodone = cluster_callback;
 		pbgetvp(vp, bp);
 		/*
@@ -962,8 +968,10 @@ cluster_wbuild(vp, size, start_lbn, len)
 				tbp, b_cluster.cluster_entry);
 		}
 	finishcluster:
-		pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
-			(vm_page_t *) bp->b_pages, bp->b_npages);
+		if ((bp->b_flags & B_UNMAPPED) == 0) {
+			pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
+			    (vm_page_t *)bp->b_pages, bp->b_npages);
+		}
 		if (bp->b_bufsize > bp->b_kvasize)
 			panic(
 			    "cluster_wbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
@@ -984,9 +992,7 @@ cluster_wbuild(vp, size, start_lbn, len)
  * Plus add one additional buffer.
  */
 static struct cluster_save *
-cluster_collectbufs(vp, last_bp)
-	struct vnode *vp;
-	struct buf *last_bp;
+cluster_collectbufs(struct vnode *vp, struct buf *last_bp, int gbflags)
 {
 	struct cluster_save *buflist;
 	struct buf *bp;
@@ -999,7 +1005,8 @@ cluster_collectbufs(vp, last_bp)
 	buflist->bs_nchildren = 0;
 	buflist->bs_children = (struct buf **) (buflist + 1);
 	for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) {
-		(void) bread(vp, lbn, last_bp->b_bcount, NOCRED, &bp);
+		(void)bread_gb(vp, lbn, last_bp->b_bcount, NOCRED,
+		    gbflags, &bp);
 		buflist->bs_children[i] = bp;
 		if (bp->b_blkno == bp->b_lblkno)
 			VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 32c0978..6b28580 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1121,6 +1121,45 @@ vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio)
 	return (error);
 }
 
+int
+vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
+    struct uio *uio)
+{
+	struct thread *td;
+	vm_offset_t iov_base;
+	int cnt, pgadv;
+
+	td = curthread;
+	if ((td->td_pflags & TDP_UIOHELD) == 0 ||
+	    uio->uio_segflg != UIO_USERSPACE)
+		return (uiomove_fromphys(ma, offset, xfersize, uio));
+
+	KASSERT(uio->uio_iovcnt == 1, ("uio_iovcnt %d", uio->uio_iovcnt));
+	cnt = xfersize > uio->uio_resid ? uio->uio_resid : xfersize;
+	iov_base = (vm_offset_t)uio->uio_iov->iov_base;
+	switch (uio->uio_rw) {
+	case UIO_WRITE:
+		pmap_copy_pages(td->td_ma, iov_base & PAGE_MASK, ma,
+		    offset, cnt);
+		break;
+	case UIO_READ:
+		pmap_copy_pages(ma, offset, td->td_ma, iov_base & PAGE_MASK,
+		    cnt);
+		break;
+	}
+	pgadv = ((iov_base + cnt) >> PAGE_SHIFT) - (iov_base >> PAGE_SHIFT);
+	td->td_ma += pgadv;
+	KASSERT(td->td_ma_cnt >= pgadv, ("consumed pages %d %d", td->td_ma_cnt,
+	    pgadv));
+	td->td_ma_cnt -= pgadv;
+	uio->uio_iov->iov_base = (char *)(iov_base + cnt);
+	uio->uio_iov->iov_len -= cnt;
+	uio->uio_resid -= cnt;
+	uio->uio_offset += cnt;
+	return (0);
+}
+
+
 /*
  * File table truncate routine.
  */
diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c
index 7925b8c..4fdc88d 100644
--- a/sys/mips/mips/pmap.c
+++ b/sys/mips/mips/pmap.c
@@ -2576,6 +2576,51 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
 	}
 }
 
+void
+pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
+    vm_offset_t b_offset, int xfersize)
+{
+	char *a_cp, *b_cp;
+	vm_page_t a_m, b_m;
+	vm_offset_t a_pg_offset, b_pg_offset;
+	vm_paddr_t a_phys, b_phys;
+	int cnt;
+
+	while (xfersize > 0) {
+		a_pg_offset = a_offset & PAGE_MASK;
+		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
+		a_m = ma[a_offset >> PAGE_SHIFT];
+		a_phys = VM_PAGE_TO_PHYS(a_m);
+		b_pg_offset = b_offset & PAGE_MASK;
+		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
+		b_m = mb[b_offset >> PAGE_SHIFT];
+		b_phys = VM_PAGE_TO_PHYS(b_m);
+		if (MIPS_DIRECT_MAPPABLE(a_phys) &&
+		    MIPS_DIRECT_MAPPABLE(b_phys)) {
+			pmap_flush_pvcache(a_m);
+			mips_dcache_wbinv_range_index(
+			    MIPS_PHYS_TO_DIRECT(b_phys), PAGE_SIZE);
+			a_cp = (char *)MIPS_PHYS_TO_DIRECT(a_phys) +
+			    a_pg_offset;
+			b_cp = (char *)MIPS_PHYS_TO_DIRECT(b_phys) +
+			    b_pg_offset;
+			bcopy(a_cp, b_cp, cnt);
+			mips_dcache_wbinv_range((vm_offset_t)b_cp, cnt);
+		} else {
+			a_cp = (char *)pmap_lmem_map2(a_phys, b_phys);
+			b_cp = (char *)a_cp + PAGE_SIZE;
+			a_cp += a_pg_offset;
+			b_cp += b_pg_offset;
+			bcopy(a_cp, b_cp, cnt);
+			mips_dcache_wbinv_range((vm_offset_t)b_cp, cnt);
+			pmap_lmem_unmap();
+		}
+		a_offset += cnt;
+		b_offset += cnt;
+		xfersize -= cnt;
+	}
+}
+
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c
index b173760..9b496ac 100644
--- a/sys/powerpc/aim/mmu_oea.c
+++ b/sys/powerpc/aim/mmu_oea.c
@@ -276,6 +276,8 @@ void moea_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t);
 void moea_clear_modify(mmu_t, vm_page_t);
 void moea_clear_reference(mmu_t, vm_page_t);
 void moea_copy_page(mmu_t, vm_page_t, vm_page_t);
+void moea_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
+    vm_page_t *mb, vm_offset_t b_offset, int xfersize);
 void moea_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t);
 void moea_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t,
     vm_prot_t);
@@ -321,6 +323,7 @@ static mmu_method_t moea_methods[] = {
 	MMUMETHOD(mmu_clear_modify,	moea_clear_modify),
 	MMUMETHOD(mmu_clear_reference,	moea_clear_reference),
 	MMUMETHOD(mmu_copy_page,	moea_copy_page),
+	MMUMETHOD(mmu_copy_pages,	moea_copy_pages),
 	MMUMETHOD(mmu_enter,		moea_enter),
 	MMUMETHOD(mmu_enter_object,	moea_enter_object),
 	MMUMETHOD(mmu_enter_quick,	moea_enter_quick),
@@ -1044,6 +1047,30 @@ moea_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst)
 	bcopy((void *)src, (void *)dst, PAGE_SIZE);
 }
 
+void
+moea_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
+    vm_page_t *mb, vm_offset_t b_offset, int xfersize)
+{
+	void *a_cp, *b_cp;
+	vm_offset_t a_pg_offset, b_pg_offset;
+	int cnt;
+
+	while (xfersize > 0) {
+		a_pg_offset = a_offset & PAGE_MASK;
+		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
+		a_cp = (char *)VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]) +
+		    a_pg_offset;
+		b_pg_offset = b_offset & PAGE_MASK;
+		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
+		b_cp = (char *)VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]) +
+		    b_pg_offset;
+		bcopy(a_cp, b_cp, cnt);
+		a_offset += cnt;
+		b_offset += cnt;
+		xfersize -= cnt;
+	}
+}
+
 /*
  * Zero a page of physical memory by temporarily mapping it into the tlb.
  */
diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c
index 00dab9b..a7bacf4 100644
--- a/sys/powerpc/aim/mmu_oea64.c
+++ b/sys/powerpc/aim/mmu_oea64.c
@@ -291,6 +291,8 @@ void moea64_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t);
 void moea64_clear_modify(mmu_t, vm_page_t);
 void moea64_clear_reference(mmu_t, vm_page_t);
 void moea64_copy_page(mmu_t, vm_page_t, vm_page_t);
+void moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
+    vm_page_t *mb, vm_offset_t b_offset, int xfersize);
 void moea64_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t);
 void moea64_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t,
     vm_prot_t);
@@ -335,6 +337,7 @@ static mmu_method_t moea64_methods[] = {
 	MMUMETHOD(mmu_clear_modify,	moea64_clear_modify),
 	MMUMETHOD(mmu_clear_reference,	moea64_clear_reference),
 	MMUMETHOD(mmu_copy_page,	moea64_copy_page),
+	MMUMETHOD(mmu_copy_pages,	moea64_copy_pages),
 	MMUMETHOD(mmu_enter,		moea64_enter),
 	MMUMETHOD(mmu_enter_object,	moea64_enter_object),
 	MMUMETHOD(mmu_enter_quick,	moea64_enter_quick),
@@ -1105,6 +1108,72 @@ moea64_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst)
 	}
 }
 
+static inline void
+moea64_copy_pages_dmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
+    vm_page_t *mb, vm_offset_t b_offset, int xfersize)
+{
+	void *a_cp, *b_cp;
+	vm_offset_t a_pg_offset, b_pg_offset;
+	int cnt;
+
+	while (xfersize > 0) {
+		a_pg_offset = a_offset & PAGE_MASK;
+		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
+		a_cp = (char *)VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]) +
+		    a_pg_offset;
+		b_pg_offset = b_offset & PAGE_MASK;
+		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
+		b_cp = (char *)VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]) +
+		    b_pg_offset;
+		bcopy(a_cp, b_cp, cnt);
+		a_offset += cnt;
+		b_offset += cnt;
+		xfersize -= cnt;
+	}
+}
+
+static inline void
+moea64_copy_pages_nodmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
+    vm_page_t *mb, vm_offset_t b_offset, int xfersize)
+{
+	void *a_cp, *b_cp;
+	vm_offset_t a_pg_offset, b_pg_offset;
+	int cnt;
+
+	mtx_lock(&moea64_scratchpage_mtx);
+	while (xfersize > 0) {
+		a_pg_offset = a_offset & PAGE_MASK;
+		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
+		moea64_set_scratchpage_pa(mmu, 0,
+		    VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]));
+		a_cp = (char *)moea64_scratchpage_va[0] + a_pg_offset;
+		b_pg_offset = b_offset & PAGE_MASK;
+		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
+		moea64_set_scratchpage_pa(mmu, 1,
+		    VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]));
+		b_cp = (char *)moea64_scratchpage_va[1] + b_pg_offset;
+		bcopy(a_cp, b_cp, cnt);
+		a_offset += cnt;
+		b_offset += cnt;
+		xfersize -= cnt;
+	}
+	mtx_unlock(&moea64_scratchpage_mtx);
+}
+
+void
+moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
+    vm_page_t *mb, vm_offset_t b_offset, int xfersize)
+{
+
+	if (hw_direct_map) {
+		moea64_copy_pages_dmap(mmu, ma, a_offset, mb, b_offset,
+		    xfersize);
+	} else {
+		moea64_copy_pages_nodmap(mmu, ma, a_offset, mb, b_offset,
+		    xfersize);
+	}
+}
+
 void
 moea64_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size)
 {
diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c
index f6e5f9c..233e1e0 100644
--- a/sys/powerpc/booke/pmap.c
+++ b/sys/powerpc/booke/pmap.c
@@ -275,6 +275,8 @@ static void		mmu_booke_clear_reference(mmu_t, vm_page_t);
 static void		mmu_booke_copy(mmu_t, pmap_t, pmap_t, vm_offset_t,
     vm_size_t, vm_offset_t);
 static void		mmu_booke_copy_page(mmu_t, vm_page_t, vm_page_t);
+static void		mmu_booke_copy_pages(mmu_t, vm_page_t *,
+    vm_offset_t, vm_page_t *, vm_offset_t, int);
 static void		mmu_booke_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t,
     vm_prot_t, boolean_t);
 static void		mmu_booke_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t,
@@ -335,6 +337,7 @@ static mmu_method_t mmu_booke_methods[] = {
 	MMUMETHOD(mmu_clear_reference,	mmu_booke_clear_reference),
 	MMUMETHOD(mmu_copy,		mmu_booke_copy),
 	MMUMETHOD(mmu_copy_page,	mmu_booke_copy_page),
+	MMUMETHOD(mmu_copy_pages,	mmu_booke_copy_pages),
 	MMUMETHOD(mmu_enter,		mmu_booke_enter),
 	MMUMETHOD(mmu_enter_object,	mmu_booke_enter_object),
 	MMUMETHOD(mmu_enter_quick,	mmu_booke_enter_quick),
@@ -2138,6 +2141,36 @@ mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm)
 	mtx_unlock(&copy_page_mutex);
 }
 
+static inline void
+mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
+    vm_page_t *mb, vm_offset_t b_offset, int xfersize)
+{
+	void *a_cp, *b_cp;
+	vm_offset_t a_pg_offset, b_pg_offset;
+	int cnt;
+
+	mtx_lock(&copy_page_mutex);
+	while (xfersize > 0) {
+		a_pg_offset = a_offset & PAGE_MASK;
+		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
+		mmu_booke_kenter(mmu, copy_page_src_va,
+		    VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]));
+		a_cp = (char *)copy_page_src_va + a_pg_offset;
+		b_pg_offset = b_offset & PAGE_MASK;
+		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
+		mmu_booke_kenter(mmu, copy_page_dst_va,
+		    VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]));
+		b_cp = (char *)copy_page_dst_va + b_pg_offset;
+		bcopy(a_cp, b_cp, cnt);
+		mmu_booke_kremove(mmu, copy_page_dst_va);
+		mmu_booke_kremove(mmu, copy_page_src_va);
+		a_offset += cnt;
+		b_offset += cnt;
+		xfersize -= cnt;
+	}
+	mtx_unlock(&copy_page_mutex);
+}
+
 /*
  * mmu_booke_zero_page_idle zeros the specified hardware page by mapping it
  * into virtual memory and using bzero to clear its contents. This is intended
diff --git a/sys/powerpc/powerpc/mmu_if.m b/sys/powerpc/powerpc/mmu_if.m
index 8cd6e52..0382bd8 100644
--- a/sys/powerpc/powerpc/mmu_if.m
+++ b/sys/powerpc/powerpc/mmu_if.m
@@ -215,6 +215,14 @@ METHOD void copy_page {
 	vm_page_t	_dst;
 };
 
+METHOD void copy_pages {
+	mmu_t		_mmu;
+	vm_page_t	*_ma;
+	vm_offset_t	_a_offset;
+	vm_page_t	*_mb;
+	vm_offset_t	_b_offset;
+	int		_xfersize;
+};
 
 /**
  * @brief Create a mapping between a virtual/physical address pair in the
diff --git a/sys/powerpc/powerpc/pmap_dispatch.c b/sys/powerpc/powerpc/pmap_dispatch.c
index c919196..42f1a39 100644
--- a/sys/powerpc/powerpc/pmap_dispatch.c
+++ b/sys/powerpc/powerpc/pmap_dispatch.c
@@ -133,6 +133,16 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
 }
 
 void
+pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
+    vm_offset_t b_offset, int xfersize)
+{
+
+	CTR6(KTR_PMAP, "%s(%p, %#x, %p, %#x, %#x)", __func__, ma,
+	    a_offset, mb, b_offset, xfersize);
+	MMU_COPY_PAGES(mmu_obj, ma, a_offset, mb, b_offset, xfersize);
+}
+
+void
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t p,
     vm_prot_t prot, boolean_t wired)
 {
diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c
index 08f008c..27947dd 100644
--- a/sys/sparc64/sparc64/pmap.c
+++ b/sys/sparc64/sparc64/pmap.c
@@ -1835,8 +1835,9 @@ pmap_zero_page_idle(vm_page_t m)
 	}
 }
 
-void
-pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
+static void
+pmap_copy_page_offs(vm_page_t msrc, int src_off, vm_page_t mdst, int dst_off,
+    int cnt)
 {
 	vm_offset_t vdst;
 	vm_offset_t vsrc;
@@ -1857,16 +1858,17 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 		PMAP_STATS_INC(pmap_ncopy_page_c);
 		vdst = TLB_PHYS_TO_DIRECT(pdst);
 		vsrc = TLB_PHYS_TO_DIRECT(psrc);
-		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
+		cpu_block_copy((char *)vsrc + src_off, (char *)vdst + dst_off,
+		    cnt);
 	} else if (msrc->md.color == -1 && mdst->md.color == -1) {
 		PMAP_STATS_INC(pmap_ncopy_page_nc);
-		ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE);
+		ascopy(ASI_PHYS_USE_EC, psrc + src_off, pdst + dst_off, cnt);
 	} else if (msrc->md.color == -1) {
 		if (mdst->md.color == DCACHE_COLOR(pdst)) {
 			PMAP_STATS_INC(pmap_ncopy_page_dc);
 			vdst = TLB_PHYS_TO_DIRECT(pdst);
-			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
-			    PAGE_SIZE);
+			ascopyfrom(ASI_PHYS_USE_EC, psrc + src_off,
+			    (char *)vdst + dst_off, cnt);
 		} else {
 			PMAP_STATS_INC(pmap_ncopy_page_doc);
 			PMAP_LOCK(kernel_pmap);
@@ -1875,8 +1877,8 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 			tp->tte_data =
 			    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
 			tp->tte_vpn = TV_VPN(vdst, TS_8K);
-			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
-			    PAGE_SIZE);
+			ascopyfrom(ASI_PHYS_USE_EC, psrc + src_off,
+			    (char *)vdst + dst_off, cnt);
 			tlb_page_demap(kernel_pmap, vdst);
 			PMAP_UNLOCK(kernel_pmap);
 		}
@@ -1884,8 +1886,8 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 		if (msrc->md.color == DCACHE_COLOR(psrc)) {
 			PMAP_STATS_INC(pmap_ncopy_page_sc);
 			vsrc = TLB_PHYS_TO_DIRECT(psrc);
-			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
-			    PAGE_SIZE);
+			ascopyto((char *)vsrc + src_off, ASI_PHYS_USE_EC,
+			    pdst + dst_off, cnt);
 		} else {
 			PMAP_STATS_INC(pmap_ncopy_page_soc);
 			PMAP_LOCK(kernel_pmap);
@@ -1894,8 +1896,8 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 			tp->tte_data =
 			    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
 			tp->tte_vpn = TV_VPN(vsrc, TS_8K);
-			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
-			    PAGE_SIZE);
+			ascopyto((char *)vsrc + src_off, ASI_PHYS_USE_EC,
+			    pdst + dst_off, cnt);
 			tlb_page_demap(kernel_pmap, vsrc);
 			PMAP_UNLOCK(kernel_pmap);
 		}
@@ -1912,13 +1914,41 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 		tp->tte_data =
 		    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
 		tp->tte_vpn = TV_VPN(vsrc, TS_8K);
-		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
+		cpu_block_copy((char *)vsrc + src_off, (char *)vdst + dst_off,
+		    cnt);
 		tlb_page_demap(kernel_pmap, vdst);
 		tlb_page_demap(kernel_pmap, vsrc);
 		PMAP_UNLOCK(kernel_pmap);
 	}
 }
 
+void
+pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
+{
+
+	pmap_copy_page_offs(msrc, 0, mdst, 0, PAGE_SIZE);
+}
+
+void
+pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
+    vm_offset_t b_offset, int xfersize)
+{
+	vm_offset_t a_pg_offset, b_pg_offset;
+	int cnt;
+
+	while (xfersize > 0) {
+		a_pg_offset = a_offset & PAGE_MASK;
+		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
+		b_pg_offset = b_offset & PAGE_MASK;
+		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
+		pmap_copy_page_offs(ma[a_offset >> PAGE_SHIFT], a_pg_offset,
+		    mb[b_offset >> PAGE_SHIFT], b_pg_offset, cnt);
+		a_offset += cnt;
+		b_offset += cnt;
+		xfersize -= cnt;
+	}
+}
+
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
diff --git a/sys/sys/bio.h b/sys/sys/bio.h
index c016ee6..7678f5a 100644
--- a/sys/sys/bio.h
+++ b/sys/sys/bio.h
@@ -55,10 +55,13 @@
 #define BIO_DONE	0x02
 #define BIO_ONQUEUE	0x04
 #define BIO_ORDERED	0x08
+#define	BIO_UNMAPPED	0x10
+#define	BIO_TRANSIENT_MAPPING	0x20
 
 #ifdef _KERNEL
 struct disk;
 struct bio;
+struct vm_map;
 
 /* Empty classifier tag, to prevent further classification. */
 #define	BIO_NOTCLASSIFIED		(void *)(~0UL)
@@ -78,6 +81,9 @@ struct bio {
 	off_t	bio_offset;		/* Offset into file. */
 	long	bio_bcount;		/* Valid bytes in buffer. */
 	caddr_t	bio_data;		/* Memory, superblocks, indirect etc. */
+	struct vm_page **bio_ma;	/* Or unmapped. */
+	int	bio_ma_offset;		/* Offset in the first page of bio_ma. */
+	int	bio_ma_n;		/* Number of pages in bio_ma. */
 	int	bio_error;		/* Errno for BIO_ERROR. */
 	long	bio_resid;		/* Remaining I/O in bytes. */
 	void	(*bio_done)(struct bio *);
@@ -121,6 +127,9 @@ struct bio_queue_head {
 	struct	bio *insert_point;
 };
 
+extern struct vm_map *bio_transient_map;
+extern int bio_transient_maxcnt;
+
 void biodone(struct bio *bp);
 void biofinish(struct bio *bp, struct devstat *stat, int error);
 int biowait(struct bio *bp, const char *wchan);
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 672ef5a..0c7a6f4 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -117,6 +117,7 @@ struct buf {
 	long	b_bufsize;		/* Allocated buffer size. */
 	long	b_runningbufspace;	/* when I/O is running, pipelining */
 	caddr_t	b_kvabase;		/* base kva for buffer */
+	caddr_t	b_kvaalloc;		/* allocated kva for B_KVAALLOC */
 	int	b_kvasize;		/* size of kva for buffer */
 	daddr_t b_lblkno;		/* Logical block number. */
 	struct	vnode *b_vp;		/* Device vnode. */
@@ -202,8 +203,8 @@ struct buf {
 #define	B_PERSISTENT	0x00000100	/* Perm. ref'ed while EXT2FS mounted. */
 #define	B_DONE		0x00000200	/* I/O completed. */
 #define	B_EINTR		0x00000400	/* I/O was interrupted */
-#define	B_00000800	0x00000800	/* Available flag. */
-#define	B_00001000	0x00001000	/* Available flag. */
+#define	B_UNMAPPED	0x00000800	/* KVA is not mapped. */
+#define	B_KVAALLOC	0x00001000	/* But allocated. */
 #define	B_INVAL		0x00002000	/* Does not contain valid info. */
 #define	B_BARRIER	0x00004000	/* Write this and all preceeding first. */
 #define	B_NOCACHE	0x00008000	/* Do not cache block after use. */
@@ -453,7 +454,9 @@ buf_countdeps(struct buf *bp, int i)
  */
 #define	GB_LOCK_NOWAIT	0x0001		/* Fail if we block on a buf lock. */
 #define	GB_NOCREAT	0x0002		/* Don't create a buf if not found. */
-#define	GB_NOWAIT_BD	0x0004		/* Do not wait for bufdaemon */
+#define	GB_NOWAIT_BD	0x0004		/* Do not wait for bufdaemon. */
+#define	GB_UNMAPPED	0x0008		/* Do not mmap buffer pages. */
+#define	GB_KVAALLOC	0x0010		/* But allocate KVA. */
 
 #ifdef _KERNEL
 extern int	nbuf;			/* The number of buffer headers */
@@ -470,17 +473,22 @@ extern struct	buf *swbuf;		/* Swap I/O buffer headers. */
 extern int	nswbuf;			/* Number of swap I/O buffer headers. */
 extern int	cluster_pbuf_freecnt;	/* Number of pbufs for clusters */
 extern int	vnode_pbuf_freecnt;	/* Number of pbufs for vnode pager */
+extern caddr_t	unmapped_buf;
 
 void	runningbufwakeup(struct buf *);
 void	waitrunningbufspace(void);
 caddr_t	kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est);
 void	bufinit(void);
+void	bdata2bio(struct buf *bp, struct bio *bip);
 void	bwillwrite(void);
 int	buf_dirty_count_severe(void);
 void	bremfree(struct buf *);
 void	bremfreef(struct buf *);	/* XXX Force bremfree, only for nfs. */
 #define bread(vp, blkno, size, cred, bpp) \
-	    breadn_flags(vp, blkno, size, 0, 0, 0, cred, 0, bpp)
+	    breadn_flags(vp, blkno, size, NULL, NULL, 0, cred, 0, bpp)
+#define bread_gb(vp, blkno, size, cred, gbflags, bpp) \
+	    breadn_flags(vp, blkno, size, NULL, NULL, 0, cred, \
+		gbflags, bpp)
 #define breadn(vp, blkno, size, rablkno, rabsize, cnt, cred, bpp) \
 	    breadn_flags(vp, blkno, size, rablkno, rabsize, cnt, cred, 0, bpp)
 int	breadn_flags(struct vnode *, daddr_t, int, daddr_t *, int *, int,
@@ -508,14 +516,15 @@ void	bufdone_finish(struct buf *);
 void	bd_speedup(void);
 
 int	cluster_read(struct vnode *, u_quad_t, daddr_t, long,
-	    struct ucred *, long, int, struct buf **);
-int	cluster_wbuild(struct vnode *, long, daddr_t, int);
-void	cluster_write(struct vnode *, struct buf *, u_quad_t, int);
+	    struct ucred *, long, int, int, struct buf **);
+int	cluster_wbuild(struct vnode *, long, daddr_t, int, int);
+void	cluster_write(struct vnode *, struct buf *, u_quad_t, int, int);
+void	vfs_bio_bzero_buf(struct buf *bp, int base, int size);
 void	vfs_bio_set_valid(struct buf *, int base, int size);
 void	vfs_bio_clrbuf(struct buf *);
 void	vfs_busy_pages(struct buf *, int clear_modify);
 void	vfs_unbusy_pages(struct buf *);
-int	vmapbuf(struct buf *);
+int	vmapbuf(struct buf *, int);
 void	vunmapbuf(struct buf *);
 void	relpbuf(struct buf *, int *);
 void	brelvp(struct buf *);
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index bbbc569..f8e7662 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -351,6 +351,7 @@ void          __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
 #define	MNTK_VGONE_WAITER	0x00000400
 #define	MNTK_LOOKUP_EXCL_DOTDOT	0x00000800
 #define	MNTK_MARKER		0x00001000
+#define	MNTK_UNMAPPED_BUFS	0x00002000
 #define MNTK_NOASYNC	0x00800000	/* disable async */
 #define MNTK_UNMOUNT	0x01000000	/* unmount in progress */
 #define	MNTK_MWAIT	0x02000000	/* waiting for unmount to finish */
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index b54dc04..e6a41a4 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -692,6 +692,8 @@ int	vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags,
 	    struct vnode **rvp);
 
 int	vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio);
+int	vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
+	    struct uio *uio);
 
 #define	vn_rangelock_unlock(vp, cookie)					\
 	rangelock_unlock(&(vp)->v_rl, (cookie), VI_MTX(vp))
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index abe4073..0bdbbae 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -254,7 +254,7 @@ ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp)
 	struct buf *bp;
 	struct ufsmount *ump;
 	u_int cg, request, reclaimed;
-	int error;
+	int error, gbflags;
 	ufs2_daddr_t bno;
 	static struct timeval lastfail;
 	static int curfail;
@@ -265,6 +265,8 @@ ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp)
 	fs = ip->i_fs;
 	bp = NULL;
 	ump = ip->i_ump;
+	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
+
 	mtx_assert(UFS_MTX(ump), MA_OWNED);
 #ifdef INVARIANTS
 	if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
@@ -296,7 +298,7 @@ retry:
 	/*
 	 * Allocate the extra space in the buffer.
 	 */
-	error = bread(vp, lbprev, osize, NOCRED, &bp);
+	error = bread_gb(vp, lbprev, osize, NOCRED, gbflags, &bp);
 	if (error) {
 		brelse(bp);
 		return (error);
@@ -332,7 +334,7 @@ retry:
 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		allocbuf(bp, nsize);
 		bp->b_flags |= B_DONE;
-		bzero(bp->b_data + osize, nsize - osize);
+		vfs_bio_bzero_buf(bp, osize, nsize - osize);
 		if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO)
 			vfs_bio_set_valid(bp, osize, nsize - osize);
 		*bpp = bp;
@@ -400,7 +402,7 @@ retry:
 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		allocbuf(bp, nsize);
 		bp->b_flags |= B_DONE;
-		bzero(bp->b_data + osize, nsize - osize);
+		vfs_bio_bzero_buf(bp, osize, nsize - osize);
 		if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO)
 			vfs_bio_set_valid(bp, osize, nsize - osize);
 		*bpp = bp;
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 0e29be87f..d20df77 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -107,7 +107,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
 	int saved_inbdflush;
 	static struct timeval lastfail;
 	static int curfail;
-	int reclaimed;
+	int gbflags, reclaimed;
 
 	ip = VTOI(vp);
 	dp = ip->i_din1;
@@ -123,6 +123,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
 		return (EOPNOTSUPP);
 	if (lbn < 0)
 		return (EFBIG);
+	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
 
 	if (DOINGSOFTDEP(vp))
 		softdep_prealloc(vp, MNT_WAIT);
@@ -211,7 +212,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
 			    nsize, flags, cred, &newb);
 			if (error)
 				return (error);
-			bp = getblk(vp, lbn, nsize, 0, 0, 0);
+			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
 			bp->b_blkno = fsbtodb(fs, newb);
 			if (flags & BA_CLRBUF)
 				vfs_bio_clrbuf(bp);
@@ -255,7 +256,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
 		nb = newb;
 		*allocblk++ = nb;
 		*lbns_remfree++ = indirs[1].in_lbn;
-		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
+		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
 		bp->b_blkno = fsbtodb(fs, nb);
 		vfs_bio_clrbuf(bp);
 		if (DOINGSOFTDEP(vp)) {
@@ -389,7 +390,7 @@ retry:
 		nb = newb;
 		*allocblk++ = nb;
 		*lbns_remfree++ = lbn;
-		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		if (flags & BA_CLRBUF)
 			vfs_bio_clrbuf(nbp);
@@ -418,16 +419,17 @@ retry:
 		if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			error = cluster_read(vp, ip->i_size, lbn,
 			    (int)fs->fs_bsize, NOCRED,
-			    MAXBSIZE, seqcount, &nbp);
+			    MAXBSIZE, seqcount, gbflags, &nbp);
 		} else {
-			error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
+			error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
+			    gbflags, &nbp);
 		}
 		if (error) {
 			brelse(nbp);
 			goto fail;
 		}
 	} else {
-		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
 		nbp->b_blkno = fsbtodb(fs, nb);
 	}
 	curthread_pflags_restore(saved_inbdflush);
@@ -539,7 +541,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
 	int saved_inbdflush;
 	static struct timeval lastfail;
 	static int curfail;
-	int reclaimed;
+	int gbflags, reclaimed;
 
 	ip = VTOI(vp);
 	dp = ip->i_din2;
@@ -553,6 +555,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
 	*bpp = NULL;
 	if (lbn < 0)
 		return (EFBIG);
+	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
 
 	if (DOINGSOFTDEP(vp))
 		softdep_prealloc(vp, MNT_WAIT);
@@ -603,7 +606,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
 		nb = dp->di_extb[lbn];
 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
-			error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
+			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
+			    gbflags, &bp);
 			if (error) {
 				brelse(bp);
 				return (error);
@@ -620,7 +624,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
 			nsize = fragroundup(fs, size);
 			if (nsize <= osize) {
-				error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
+				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
+				    gbflags, &bp);
 				if (error) {
 					brelse(bp);
 					return (error);
@@ -653,7 +658,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
 			   nsize, flags, cred, &newb);
 			if (error)
 				return (error);
-			bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0);
+			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
 			bp->b_blkno = fsbtodb(fs, newb);
 			bp->b_xflags |= BX_ALTDATA;
 			if (flags & BA_CLRBUF)
@@ -679,9 +684,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
 		if (osize < fs->fs_bsize && osize > 0) {
 			UFS_LOCK(ump);
 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
-				ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
-				    &dp->di_db[0]), osize, (int)fs->fs_bsize,
-				    flags, cred, &bp);
+			    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
+			    &dp->di_db[0]), osize, (int)fs->fs_bsize,
+			    flags, cred, &bp);
 			if (error)
 				return (error);
 			if (DOINGSOFTDEP(vp))
@@ -707,7 +712,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
 			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
 		nb = dp->di_db[lbn];
 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
-			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
+			error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
+			    gbflags, &bp);
 			if (error) {
 				brelse(bp);
 				return (error);
@@ -723,7 +729,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
 			nsize = fragroundup(fs, size);
 			if (nsize <= osize) {
-				error = bread(vp, lbn, osize, NOCRED, &bp);
+				error = bread_gb(vp, lbn, osize, NOCRED,
+				    gbflags, &bp);
 				if (error) {
 					brelse(bp);
 					return (error);
@@ -733,7 +740,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
 				UFS_LOCK(ump);
 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
-				       &dp->di_db[0]), osize, nsize, flags,
+				    &dp->di_db[0]), osize, nsize, flags,
 				    cred, &bp);
 				if (error)
 					return (error);
@@ -753,7 +760,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
 				&dp->di_db[0]), nsize, flags, cred, &newb);
 			if (error)
 				return (error);
-			bp = getblk(vp, lbn, nsize, 0, 0, 0);
+			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
 			bp->b_blkno = fsbtodb(fs, newb);
 			if (flags & BA_CLRBUF)
 				vfs_bio_clrbuf(bp);
@@ -797,7 +804,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
 		nb = newb;
 		*allocblk++ = nb;
 		*lbns_remfree++ = indirs[1].in_lbn;
-		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
+		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
+		    GB_UNMAPPED);
 		bp->b_blkno = fsbtodb(fs, nb);
 		vfs_bio_clrbuf(bp);
 		if (DOINGSOFTDEP(vp)) {
@@ -862,7 +870,8 @@ retry:
 		nb = newb;
 		*allocblk++ = nb;
 		*lbns_remfree++ = indirs[i].in_lbn;
-		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
+		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
+		    GB_UNMAPPED);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		vfs_bio_clrbuf(nbp);
 		if (DOINGSOFTDEP(vp)) {
@@ -931,7 +940,7 @@ retry:
 		nb = newb;
 		*allocblk++ = nb;
 		*lbns_remfree++ = lbn;
-		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		if (flags & BA_CLRBUF)
 			vfs_bio_clrbuf(nbp);
@@ -966,16 +975,17 @@ retry:
 		if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			error = cluster_read(vp, ip->i_size, lbn,
 			    (int)fs->fs_bsize, NOCRED,
-			    MAXBSIZE, seqcount, &nbp);
+			    MAXBSIZE, seqcount, gbflags, &nbp);
 		} else {
-			error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
+			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
+			    NOCRED, gbflags, &nbp);
 		}
 		if (error) {
 			brelse(nbp);
 			goto fail;
 		}
 	} else {
-		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
 		nbp->b_blkno = fsbtodb(fs, nb);
 	}
 	curthread_pflags_restore(saved_inbdflush);
diff --git a/sys/ufs/ffs/ffs_rawread.c b/sys/ufs/ffs/ffs_rawread.c
index f8e3e00..45cb730 100644
--- a/sys/ufs/ffs/ffs_rawread.c
+++ b/sys/ufs/ffs/ffs_rawread.c
@@ -240,7 +240,7 @@ ffs_rawread_readahead(struct vnode *vp,
 			bp->b_bcount = bsize - blockoff * DEV_BSIZE;
 		bp->b_bufsize = bp->b_bcount;
 		
-		if (vmapbuf(bp) < 0)
+		if (vmapbuf(bp, 1) < 0)
 			return EFAULT;
 		
 		maybe_yield();
@@ -259,7 +259,7 @@ ffs_rawread_readahead(struct vnode *vp,
 		bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE;
 	bp->b_bufsize = bp->b_bcount;
 	
-	if (vmapbuf(bp) < 0)
+	if (vmapbuf(bp, 1) < 0)
 		return EFAULT;
 	
 	BO_STRATEGY(&dp->v_bufobj, bp);
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 0204613..f1a3aab 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1076,7 +1076,7 @@ ffs_mountfs(devvp, mp, td)
 	 */
 	MNT_ILOCK(mp);
 	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
-	    MNTK_NO_IOPF;
+	    MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS;
 	MNT_IUNLOCK(mp);
 #ifdef UFS_EXTATTR
 #ifdef UFS_EXTATTR_AUTOSTART
@@ -2091,6 +2091,7 @@ ffs_bufwrite(struct buf *bp)
 		 * set b_lblkno and BKGRDMARKER before calling bgetvp()
 		 * to avoid confusing the splay tree and gbincore().
 		 */
+		KASSERT((bp->b_flags & B_UNMAPPED) == 0, ("Unmapped cg"));
 		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
 		newbp->b_lblkno = bp->b_lblkno;
 		newbp->b_xflags |= BX_BKGRDMARKER;
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 5c99d5b..ef6194c 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -508,7 +508,8 @@ ffs_read(ap)
 			/*
 			 * Don't do readahead if this is the end of the file.
 			 */
-			error = bread(vp, lbn, size, NOCRED, &bp);
+			error = bread_gb(vp, lbn, size, NOCRED,
+			    GB_UNMAPPED, &bp);
 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			/*
 			 * Otherwise if we are allowed to cluster,
@@ -518,7 +519,8 @@ ffs_read(ap)
 			 * doing sequential access.
 			 */
 			error = cluster_read(vp, ip->i_size, lbn,
-				size, NOCRED, blkoffset + uio->uio_resid, seqcount, &bp);
+			    size, NOCRED, blkoffset + uio->uio_resid,
+			    seqcount, GB_UNMAPPED, &bp);
 		} else if (seqcount > 1) {
 			/*
 			 * If we are NOT allowed to cluster, then
@@ -529,15 +531,16 @@ ffs_read(ap)
 			 * the 6th argument.
 			 */
 			int nextsize = blksize(fs, ip, nextlbn);
-			error = breadn(vp, lbn,
-			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
+			error = breadn_flags(vp, lbn, size, &nextlbn,
+			    &nextsize, 1, NOCRED, GB_UNMAPPED, &bp);
 		} else {
 			/*
 			 * Failing all of the above, just read what the
 			 * user asked for. Interestingly, the same as
 			 * the first option above.
 			 */
-			error = bread(vp, lbn, size, NOCRED, &bp);
+			error = bread_gb(vp, lbn, size, NOCRED,
+			    GB_UNMAPPED, &bp);
 		}
 		if (error) {
 			brelse(bp);
@@ -568,8 +571,13 @@ ffs_read(ap)
 			xfersize = size;
 		}
 
-		error = vn_io_fault_uiomove((char *)bp->b_data + blkoffset,
-		    (int)xfersize, uio);
+		if ((bp->b_flags & B_UNMAPPED) == 0) {
+			error = vn_io_fault_uiomove((char *)bp->b_data +
+			    blkoffset, (int)xfersize, uio);
+		} else {
+			error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
+			    (int)xfersize, uio);
+		}
 		if (error)
 			break;
 
@@ -700,6 +708,7 @@ ffs_write(ap)
 		flags = seqcount << BA_SEQSHIFT;
 	if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
 		flags |= IO_SYNC;
+	flags |= BA_UNMAPPED;
 
 	for (error = 0; uio->uio_resid > 0;) {
 		lbn = lblkno(fs, uio->uio_offset);
@@ -739,8 +748,13 @@ ffs_write(ap)
 		if (size < xfersize)
 			xfersize = size;
 
-		error = vn_io_fault_uiomove((char *)bp->b_data + blkoffset,
-		    (int)xfersize, uio);
+		if ((bp->b_flags & B_UNMAPPED) == 0) {
+			error = vn_io_fault_uiomove((char *)bp->b_data +
+			    blkoffset, (int)xfersize, uio);
+		} else {
+			error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
+			    (int)xfersize, uio);
+		}
 		/*
 		 * If the buffer is not already filled and we encounter an
 		 * error while trying to fill it, we have to clear out any
@@ -783,7 +797,8 @@ ffs_write(ap)
 		} else if (xfersize + blkoffset == fs->fs_bsize) {
 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
 				bp->b_flags |= B_CLUSTEROK;
-				cluster_write(vp, bp, ip->i_size, seqcount);
+				cluster_write(vp, bp, ip->i_size, seqcount,
+				    GB_UNMAPPED);
 			} else {
 				bawrite(bp);
 			}
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index c590748..31a2ba8 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -121,6 +121,7 @@ void	softdep_revert_rmdir(struct inode *, struct inode *);
  */
 #define BA_CLRBUF	0x00010000	/* Clear invalid areas of buffer. */
 #define BA_METAONLY	0x00020000	/* Return indirect block buffer. */
+#define	BA_UNMAPPED	0x00040000	/* Do not mmap resulted buffer. */
 #define BA_SEQMASK	0x7F000000	/* Bits holding seq heuristic. */
 #define BA_SEQSHIFT	24
 #define BA_SEQMAX	0x7F
diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h
index d06c22b..c64a549 100644
--- a/sys/vm/pmap.h
+++ b/sys/vm/pmap.h
@@ -108,6 +108,8 @@ void		 pmap_clear_modify(vm_page_t m);
 void		 pmap_clear_reference(vm_page_t m);
 void		 pmap_copy(pmap_t, pmap_t, vm_offset_t, vm_size_t, vm_offset_t);
 void		 pmap_copy_page(vm_page_t, vm_page_t);
+void		 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset,
+		    vm_page_t mb[], vm_offset_t b_offset, int xfersize);
 void		 pmap_enter(pmap_t, vm_offset_t, vm_prot_t, vm_page_t,
 		    vm_prot_t, boolean_t);
 void		 pmap_enter_object(pmap_t pmap, vm_offset_t start,
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 44bff25..10a2c28 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -758,6 +758,16 @@ swp_pager_strategy(struct buf *bp)
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		if (bp->b_blkno >= sp->sw_first && bp->b_blkno < sp->sw_end) {
 			mtx_unlock(&sw_dev_mtx);
+			if ((sp->sw_flags & SW_UNMAPPED) != 0) {
+				bp->b_kvaalloc = bp->b_data;
+				bp->b_data = unmapped_buf;
+				bp->b_kvabase = unmapped_buf;
+				bp->b_offset = 0;
+				bp->b_flags |= B_UNMAPPED;
+			} else {
+				pmap_qenter((vm_offset_t)bp->b_data,
+				    &bp->b_pages[0], bp->b_bcount / PAGE_SIZE);
+			}
 			sp->sw_strategy(bp, sp);
 			return;
 		}
@@ -1155,11 +1165,6 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
 	bp = getpbuf(&nsw_rcount);
 	bp->b_flags |= B_PAGING;
 
-	/*
-	 * map our page(s) into kva for input
-	 */
-	pmap_qenter((vm_offset_t)bp->b_data, m + i, j - i);
-
 	bp->b_iocmd = BIO_READ;
 	bp->b_iodone = swp_pager_async_iodone;
 	bp->b_rcred = crhold(thread0.td_ucred);
@@ -1371,8 +1376,6 @@ swap_pager_putpages(vm_object_t object, vm_page_t *m, int count,
 		bp->b_flags |= B_PAGING;
 		bp->b_iocmd = BIO_WRITE;
 
-		pmap_qenter((vm_offset_t)bp->b_data, &m[i], n);
-
 		bp->b_rcred = crhold(thread0.td_ucred);
 		bp->b_wcred = crhold(thread0.td_ucred);
 		bp->b_bcount = PAGE_SIZE * n;
@@ -1484,7 +1487,12 @@ swp_pager_async_iodone(struct buf *bp)
 	/*
 	 * remove the mapping for kernel virtual
 	 */
-	pmap_qremove((vm_offset_t)bp->b_data, bp->b_npages);
+	if ((bp->b_flags & B_UNMAPPED) != 0) {
+		bp->b_data = bp->b_kvaalloc;
+		bp->b_kvabase = bp->b_kvaalloc;
+		bp->b_flags &= ~B_UNMAPPED;
+	} else
+		pmap_qremove((vm_offset_t)bp->b_data, bp->b_npages);
 
 	if (bp->b_npages) {
 		object = bp->b_pages[0]->object;
@@ -2144,7 +2152,8 @@ swapon_check_swzone(unsigned long npages)
 }
 
 static void
-swaponsomething(struct vnode *vp, void *id, u_long nblks, sw_strategy_t *strategy, sw_close_t *close, dev_t dev)
+swaponsomething(struct vnode *vp, void *id, u_long nblks,
+    sw_strategy_t *strategy, sw_close_t *close, dev_t dev, int flags)
 {
 	struct swdevt *sp, *tsp;
 	swblk_t dvbase;
@@ -2180,6 +2189,7 @@ swaponsomething(struct vnode *vp, void *id, u_long nblks, sw_strategy_t *strateg
 	sp->sw_used = 0;
 	sp->sw_strategy = strategy;
 	sp->sw_close = close;
+	sp->sw_flags = flags;
 
 	sp->sw_blist = blist_create(nblks, M_WAITOK);
 	/*
@@ -2537,10 +2547,19 @@ swapgeom_strategy(struct buf *bp, struct swdevt *sp)
 
 	bio->bio_caller2 = bp;
 	bio->bio_cmd = bp->b_iocmd;
-	bio->bio_data = bp->b_data;
 	bio->bio_offset = (bp->b_blkno - sp->sw_first) * PAGE_SIZE;
 	bio->bio_length = bp->b_bcount;
 	bio->bio_done = swapgeom_done;
+	if ((bp->b_flags & B_UNMAPPED) != 0) {
+		bio->bio_ma = bp->b_pages;
+		bio->bio_data = unmapped_buf;
+		bio->bio_ma_offset = (vm_offset_t)bp->b_offset & PAGE_MASK;
+		bio->bio_ma_n = bp->b_npages;
+		bio->bio_flags |= BIO_UNMAPPED;
+	} else {
+		bio->bio_data = bp->b_data;
+		bio->bio_ma = NULL;
+	}
 	g_io_request(bio, cp);
 	return;
 }
@@ -2630,9 +2649,9 @@ swapongeom_ev(void *arg, int flags)
 	}
 	nblks = pp->mediasize / DEV_BSIZE;
 	swaponsomething(swh->vp, cp, nblks, swapgeom_strategy,
-	    swapgeom_close, dev2udev(swh->dev));
+	    swapgeom_close, dev2udev(swh->dev),
+	    (pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ? SW_UNMAPPED : 0);
 	swh->error = 0;
-	return;
 }
 
 static int
@@ -2721,6 +2740,6 @@ swaponvp(struct thread *td, struct vnode *vp, u_long nblks)
 		return (error);
 
 	swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close,
-	    NODEV);
+	    NODEV, 0);
 	return (0);
 }
diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h
index 5c716d9..79f8767 100644
--- a/sys/vm/swap_pager.h
+++ b/sys/vm/swap_pager.h
@@ -68,6 +68,7 @@ struct swdevt {
 	sw_close_t		*sw_close;
 };
 
+#define	SW_UNMAPPED	0x01
 #define	SW_CLOSING	0x04
 
 #ifdef _KERNEL
diff --git a/sys/vm/vm.h b/sys/vm/vm.h
index 132c10e..106c510 100644
--- a/sys/vm/vm.h
+++ b/sys/vm/vm.h
@@ -136,6 +136,8 @@ struct kva_md_info {
 	vm_offset_t	clean_eva;
 	vm_offset_t	pager_sva;
 	vm_offset_t	pager_eva;
+	vm_offset_t	bio_transient_sva;
+	vm_offset_t	bio_transient_eva;
 };
 
 extern struct kva_md_info	kmi;
diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
index c507691..2eb1070 100644
--- a/sys/vm/vm_init.c
+++ b/sys/vm/vm_init.c
@@ -186,10 +186,15 @@ again:
 		panic("startup: table size inconsistency");
 
 	clean_map = kmem_suballoc(kernel_map, &kmi->clean_sva, &kmi->clean_eva,
-	    (long)nbuf * BKVASIZE + (long)nswbuf * MAXPHYS, TRUE);
+	    (long)nbuf * BKVASIZE + (long)nswbuf * MAXPHYS +
+	    (long)bio_transient_maxcnt * MAXPHYS, TRUE);
 	buffer_map = kmem_suballoc(clean_map, &kmi->buffer_sva,
 	    &kmi->buffer_eva, (long)nbuf * BKVASIZE, FALSE);
 	buffer_map->system_map = 1;
+	bio_transient_map = kmem_suballoc(clean_map, &kmi->bio_transient_sva,
+	    &kmi->bio_transient_eva, (long)bio_transient_maxcnt * MAXPHYS,
+	    FALSE);
+	bio_transient_map->system_map = 1;
 	pager_map = kmem_suballoc(clean_map, &kmi->pager_sva, &kmi->pager_eva,
 	    (long)nswbuf * MAXPHYS, FALSE);
 	pager_map->system_map = 1;
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index ad9aa0d..efd2bf2 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -85,11 +85,12 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
-vm_map_t kernel_map=0;
-vm_map_t kmem_map=0;
-vm_map_t exec_map=0;
+vm_map_t kernel_map;
+vm_map_t kmem_map;
+vm_map_t exec_map;
 vm_map_t pipe_map;
-vm_map_t buffer_map=0;
+vm_map_t buffer_map;
+vm_map_t bio_transient_map;
 
 const void *zero_region;
 CTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0);
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index a6d78f4..86ca7b4 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -697,6 +697,7 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage)
 	int runpg;
 	int runend;
 	struct buf *bp;
+	struct mount *mp;
 	int count;
 	int error;
 
@@ -899,12 +900,23 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage)
 	}
 
 	bp = getpbuf(&vnode_pbuf_freecnt);
-	kva = (vm_offset_t) bp->b_data;
+	kva = (vm_offset_t)bp->b_data;
 
 	/*
-	 * and map the pages to be read into the kva
+	 * and map the pages to be read into the kva, if the filesystem
+	 * requires mapped buffers.
 	 */
-	pmap_qenter(kva, m, count);
+	mp = vp->v_mount;
+	if (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0) {
+		bp->b_data = unmapped_buf;
+		bp->b_kvabase = unmapped_buf;
+		bp->b_offset = 0;
+		bp->b_flags |= B_UNMAPPED;
+		bp->b_npages = count;
+		for (i = 0; i < count; i++)
+			bp->b_pages[i] = m[i];
+	} else
+		pmap_qenter(kva, m, count);
 
 	/* build a minimal buffer header */
 	bp->b_iocmd = BIO_READ;
@@ -933,11 +945,22 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage)
 	if ((bp->b_ioflags & BIO_ERROR) != 0)
 		error = EIO;
 
-	if (!error) {
-		if (size != count * PAGE_SIZE)
-			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
+	if (error != 0 && size != count * PAGE_SIZE) {
+		if ((bp->b_flags & B_UNMAPPED) != 0) {
+			bp->b_flags &= ~B_UNMAPPED;
+			pmap_qenter(kva, m, count);
+		}
+		bzero((caddr_t)kva + size, PAGE_SIZE * count - size);
+	}
+	if ((bp->b_flags & B_UNMAPPED) == 0)
+		pmap_qremove(kva, count);
+	if (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0) {
+		bp->b_data = (caddr_t)kva;
+		bp->b_kvabase = (caddr_t)kva;
+		bp->b_flags &= ~B_UNMAPPED;
+		for (i = 0; i < count; i++)
+			bp->b_pages[i] = NULL;
 	}
-	pmap_qremove(kva, count);
 
 	/*
 	 * free the buffer header back to the swap buffer pool