diff -Naur linux-2.4.22-dm-1/drivers/md/Config.in linux-2.4.22-new-snapshot/drivers/md/Config.in
--- linux-2.4.22-dm-1/drivers/md/Config.in	2003-09-30 11:59:23.000000000 -0500
+++ linux-2.4.22-new-snapshot/drivers/md/Config.in	2003-09-30 12:02:51.000000000 -0500
@@ -16,5 +16,8 @@
 dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD
 dep_tristate ' Device-mapper support' CONFIG_BLK_DEV_DM $CONFIG_MD
 dep_tristate '  Mirror (RAID-1) support' CONFIG_BLK_DEV_DM_MIRROR $CONFIG_BLK_DEV_DM
+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+   dep_tristate '   Bad-Block Device Target (NEW)' CONFIG_BLK_DEV_DM_BADBLOCK $CONFIG_BLK_DEV_DM
+fi
 
 endmenu
diff -Naur linux-2.4.22-dm-1/drivers/md/Makefile linux-2.4.22-new-snapshot/drivers/md/Makefile
--- linux-2.4.22-dm-1/drivers/md/Makefile	2003-09-30 11:59:22.000000000 -0500
+++ linux-2.4.22-new-snapshot/drivers/md/Makefile	2003-09-30 12:03:07.000000000 -0500
@@ -5,13 +5,13 @@
 O_TARGET	:= mddev.o
 
 export-objs	:= md.o xor.o dm-table.o dm-target.o kcopyd.o dm-daemon.o \
-		   dm-log.o dm-io.o dm.o
+		   dm-log.o dm-io.o dm.o dm-exception.o
 
 list-multi	:= lvm-mod.o dm-mod.o dm-mirror-mod.o
 lvm-mod-objs	:= lvm.o lvm-snap.o lvm-fs.o
 dm-mod-objs	:= dm.o dm-table.o dm-target.o dm-ioctl.o \
-		   dm-linear.o dm-stripe.o dm-snapshot.o dm-exception-store.o \
-		   kcopyd.o dm-daemon.o dm-io.o
+		   dm-linear.o dm-stripe.o dm-snapshot.o dm-exception.o \
+		   dm-exception-store.o kcopyd.o dm-daemon.o dm-io.o
 dm-mirror-mod-objs := dm-raid1.o dm-log.o
 
 # Note: link order is important.  All raid personalities
@@ -30,6 +30,7 @@
 
 obj-$(CONFIG_BLK_DEV_DM)		+= dm-mod.o
 obj-$(CONFIG_BLK_DEV_DM_MIRROR)		+= dm-mirror.o
+obj-$(CONFIG_BLK_DEV_DM_BADBLOCK)	+= dm-badblock.o
 
 include $(TOPDIR)/Rules.make
 
diff -Naur linux-2.4.22-dm-1/drivers/md/dm-badblock.c linux-2.4.22-new-snapshot/drivers/md/dm-badblock.c
--- linux-2.4.22-dm-1/drivers/md/dm-badblock.c	1969-12-31 18:00:00.000000000 -0600
+++ linux-2.4.22-new-snapshot/drivers/md/dm-badblock.c	2003-09-30 12:01:28.000000000 -0500
@@ -0,0 +1,346 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2003
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *   Bad-Block-Relocation target for Device-Mapper.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mempool.h>
+#include <linux/device-mapper.h>
+
+#include "dm.h"
+#include "dm-daemon.h"
+#include "dm-exception.h"
+
+/**
+ * struct dm_badblock
+ *
+ * Private data for each badblock instance.
+ **/
+struct dm_badblock {
+	struct exception_table *etable;
+};
+
+/**
+ * struct badblock_io
+ *
+ * Extra info to attach to each buffer-head.
+ **/
+struct badblock_io {
+	struct dm_badblock *bb;
+	struct buffer_head *bh;
+	struct list_head list;
+	sector_t b_rsector;
+};
+
+/* Memory pool of badblock_io structures. */
+static kmem_cache_t *badblock_io_cache;
+static mempool_t *badblock_io_pool;
+
+/* Daemon for processing I/O errors, and
+ * a list for passing I/Os to the daemon.
+ */
+static struct dm_daemon badblock_daemon;
+static LIST_HEAD(io_list);
+static spinlock_t io_list_lock = SPIN_LOCK_UNLOCKED;
+
+static void process_io_list(void);
+
+/*
+ * When the first badblock device is created, initialize the mempools
+ * and start the badblock-daemon. When the last device is deleted, tear
+ * everything down.
+ */
+
+static DECLARE_MUTEX(global_init_lock);
+static int bbr_devices = 0;
+
+static int global_init(void)
+{
+	int rc = 0;
+
+	down(&global_init_lock);
+
+	if (bbr_devices == 0) {
+		badblock_io_cache = kmem_cache_create("dm_badblock_io",
+						      sizeof(struct badblock_io),
+						      __alignof__(struct badblock_io),
+						      0, NULL, NULL);
+		if (!badblock_io_cache) {
+			DMERR("cannot create bad-block I/O cache.");
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		badblock_io_pool = mempool_create(256, mempool_alloc_slab,
+						  mempool_free_slab,
+						  badblock_io_cache);
+		if (!badblock_io_pool) {
+			DMERR("cannot create bad-block I/O mempool.");
+			kmem_cache_destroy(badblock_io_cache);
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		rc = dm_daemon_start(&badblock_daemon,
+				     "dm_badblock", process_io_list);
+		if (rc) {
+			DMERR("cannot start bad-block daemon.");
+			mempool_destroy(badblock_io_pool);
+			kmem_cache_destroy(badblock_io_cache);
+			goto out;
+		}
+	}
+	bbr_devices++;
+
+out:
+	up(&global_init_lock);
+	return rc;
+}
+
+static void global_exit(void)
+{
+	down(&global_init_lock);
+
+	bbr_devices--;
+	if (bbr_devices == 0) {
+		dm_daemon_stop(&badblock_daemon);
+		mempool_destroy(badblock_io_pool);
+		kmem_cache_destroy(badblock_io_cache);
+	}
+
+	up(&global_init_lock);
+}
+
+/*
+ * Process one I/O that has triggered an error.
+ */
+static int process_io(struct badblock_io *bb_io)
+{
+	/* KMC: More to do here. :) */
+	return 0;
+}
+
+/*
+ * Take entries off the I/O list and process each individually.
+ */
+static void process_io_list(void)
+{
+	struct badblock_io *bb_io;
+	unsigned long flags;
+	int rc;
+
+	while (1) {
+		spin_lock_irqsave(&io_list_lock, flags);
+		if (list_empty(&io_list)) {
+			spin_unlock_irqrestore(&io_list_lock, flags);
+			break;
+		}
+
+		bb_io = list_entry(io_list.next, struct badblock_io, list);
+		list_del_init(&bb_io->list);
+		spin_unlock_irqrestore(&io_list_lock, flags);
+
+		rc = process_io(bb_io);
+	}
+}
+
+/*
+ * Put an I/O request on the daemon's list for further processing.
+ */
+static void schedule_io(struct badblock_io *bb_io)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&io_list_lock, flags);
+	list_add_tail(&bb_io->list , &io_list);
+	spin_unlock_irqrestore(&io_list_lock, flags);
+	dm_daemon_wake(&badblock_daemon);
+}
+
+/**
+ * badblock_ctr
+ *
+ * Build a new bad-block mapping.
+ * arg format: <data-dev> <replacement-dev> <chunk-size>
+ **/
+static int badblock_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+	struct dm_badblock *bb;
+	char *data_path, *repl_path;
+	unsigned long chunk_size;
+	int rc;
+
+	if (argc < 3) {
+		ti->error = "dm-badblock requires exactly 3 arguments";
+		rc = -EINVAL;
+		goto bad1;
+	}
+
+	data_path = argv[0];
+	repl_path = argv[1];
+	chunk_size = simple_strtoul(argv[2], NULL, 10);
+
+	rc = global_init();
+	if (rc) {
+		ti->error = "cannot initialize bad-block global pools and daemon";
+		goto bad1;
+	}
+
+	bb = kmalloc(sizeof(*bb), GFP_KERNEL);
+	if (!bb) {
+		ti->error = "cannot allocate bad-block private structure";
+		rc = -ENOMEM;
+		goto bad2;
+	}
+
+	bb->etable = etable_create(ti, data_path, repl_path, chunk_size, 'N');
+	if (!bb->etable) {
+		ti->error = "cannot create bad-block exception table";
+		rc = -EINVAL;
+		goto bad3;
+	}
+
+	ti->private = bb;
+	return 0;
+
+bad3:
+	kfree(bb);
+bad2:
+	global_exit();
+bad1:
+	return rc;
+}
+
+/**
+ * badblock_dtr
+ *
+ * Delete a bad-block mapping.
+ **/
+static void badblock_dtr(struct dm_target *ti)
+{
+	struct dm_badblock *bb = ti->private;
+	etable_delete(bb->etable);
+	kfree(bb);
+	global_exit();
+}
+
+/**
+ * badblock_map
+ *
+ * Process an I/O request for a bad-block device.
+ **/
+static int badblock_map(struct dm_target *ti, struct buffer_head *bh,
+			int rw, union map_info *map_context)
+{
+	struct dm_badblock *bb = ti->private;
+	return -ENOSYS;
+}
+
+/**
+ * badblock_end_io
+ *
+ * Return >0 if we're going to do more processing on this bh.
+ * Return <0 if there's an error we can't handle.
+ **/
+static int badblock_end_io(struct dm_target *ti, struct buffer_head *bh,
+			   int rw, int error, union map_info *map_context)
+{
+	return -ENOSYS;
+}
+
+static void badblock_resume(struct dm_target *ti)
+{
+	struct dm_badblock *bb = ti->private;
+	int rc;
+
+	rc = etable_read_metadata(bb->etable);
+	/* KMC: What do we do if there's an error??? */
+}
+
+static int badblock_status(struct dm_target *ti, status_type_t type,
+			   char *result, unsigned int maxlen)
+{
+	struct dm_badblock *bb = ti->private;
+	struct exception_table *et = bb->etable;
+	struct dm_dev *dev;
+	chunk_t numerator, denominator;
+	char data[16], repl[16];
+	int rc;
+
+	switch (type) {
+	case STATUSTYPE_INFO:
+		rc = etable_fraction_full(et, &numerator, &denominator);
+		if (rc) {
+			snprintf(result, maxlen, "Unknown");
+		} else {
+			snprintf(result, maxlen,
+				 SECTOR_FORMAT "/" SECTOR_FORMAT,
+				 numerator, denominator);
+		}
+		break;
+
+	case STATUSTYPE_TABLE:
+		/*
+		 * dm_kdevname returns a static pointer so we need to
+		 * make private copies if the output is to make sense.
+		 */
+		dev = etable_src_dev(et);
+		strncpy(data, dm_kdevname(dev->dev), sizeof(data));
+		dev = etable_dest_dev(et);
+		strncpy(repl, dm_kdevname(dev->dev), sizeof(repl));
+		snprintf(result, maxlen, "%s %s %ld", data, repl,
+			 etable_chunk_size(et));
+		break;
+	}
+
+	return 0;
+}
+
+static struct target_type badblock_target = {
+	name:	"badblock",
+	module:	THIS_MODULE,
+	ctr:	badblock_ctr,
+	dtr:	badblock_dtr,
+	map:	badblock_map,
+	end_io:	badblock_end_io,
+	resume:	badblock_resume,
+	status:	badblock_status,
+};
+
+int __init dm_badblock_init(void)
+{
+	int rc = dm_register_target(&badblock_target);
+	if (rc) {
+		DMERR("failed to register badblock target: %d", rc);
+	}
+	return rc;
+}
+
+void __exit dm_badblock_exit(void)
+{
+	int rc = dm_unregister_target(&badblock_target);
+	if (rc) {
+		DMERR("failed to unregister badblock target: %d", rc);
+	}
+}
+
+module_init(dm_badblock_init);
+module_exit(dm_badblock_exit);
+MODULE_LICENSE("GPL");
+
diff -Naur linux-2.4.22-dm-1/drivers/md/dm-exception-store.c linux-2.4.22-new-snapshot/drivers/md/dm-exception-store.c
--- linux-2.4.22-dm-1/drivers/md/dm-exception-store.c	2003-09-30 11:59:23.000000000 -0500
+++ linux-2.4.22-new-snapshot/drivers/md/dm-exception-store.c	2003-09-30 12:01:35.000000000 -0500
@@ -1,42 +1,43 @@
 /*
- * dm-snapshot.c
+ * dm-exception-store.c
  *
  * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
  *
  * This file is released under the GPL.
  */
 
-#include "dm-snapshot.h"
-#include "dm-io.h"
-#include "kcopyd.h"
-
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/device-mapper.h>
 
-/*-----------------------------------------------------------------
- * Persistent snapshots, by persistent we mean that the snapshot
- * will survive a reboot.
- *---------------------------------------------------------------*/
+#include "dm-io.h"
+#include "kcopyd.h"
+#include "dm-exception-table.h"
+
+/*-----------------------------------------------------------------------
+ * Persistent exception store. By persistent we mean that the destination
+ * device will survive a reboot.
+ *---------------------------------------------------------------------*/
 
 /*
- * We need to store a record of which parts of the origin have
- * been copied to the snapshot device.  The snapshot code
- * requires that we copy exception chunks to chunk aligned areas
- * of the COW store.  It makes sense therefore, to store the
- * metadata in chunk size blocks.
+ * We need to keep a record of which parts of the source device
+ * have been copied to the destination device.  The exception code
+ * requires that we copy exception-chunks to chunk-aligned areas
+ * of the destination device. It makes sense therefore, to store the
+ * metadata in chunk-size blocks.
  *
- * There is no backward or forward compatibility implemented,
- * snapshots with different disk versions than the kernel will
- * not be usable.  It is expected that "lvcreate" will blank out
- * the start of a fresh COW device before calling the snapshot
+ * There is no backward or forward compatibility implemented.
+ * Excpetion-stores with different disk versions than the kernel
+ * will not be usable. It is expected that "lvcreate" will blank out
+ * the start of a fresh destination device before calling the exception
  * constructor.
  *
- * The first chunk of the COW device just contains the header.
+ * The first chunk of the destination device just contains the header.
  * After this there is a chunk filled with exception metadata,
- * followed by as many exception chunks as can fit in the
- * metadata areas.
+ * followed by as many exception chunks as can fit in the metadata
+ * areas.
  *
  * All on disk structures are in little-endian format.  The end
  * of the exceptions info is indicated by an exception with a
@@ -45,21 +46,21 @@
  */
 
 /*
- * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
+ * Magic for persistent exception-stores: "SnAp" - Feeble isn't it.
  */
-#define SNAP_MAGIC 0x70416e53
+#define PSTORE_MAGIC 0x70416e53
 
 /*
  * The on-disk version of the metadata.
  */
-#define SNAPSHOT_DISK_VERSION 1
+#define PSTORE_DISK_VERSION 1
 
 struct disk_header {
 	uint32_t magic;
 
 	/*
-	 * Is this snapshot valid.  There is no way of recovering
-	 * an invalid snapshot.
+	 * Is this exception-store valid. There is no way of recovering
+	 * once it's invalidated.
 	 */
 	uint32_t valid;
 
@@ -87,10 +88,10 @@
  * The top level structure for a persistent exception store.
  */
 struct pstore {
-	struct dm_snapshot *snap;	/* up pointer to my snapshot */
+	struct exception_table *etable;	/* up pointer to my table */
 	int version;
-	int valid;
-	uint32_t chunk_size;
+	int valid;			/* KMC: Do we need this? Should be available through "etable" */
+	uint32_t chunk_size;		/* KMC: Do we need this? Should be available through "etable" */
 	uint32_t exceptions_per_area;
 
 	/*
@@ -184,7 +185,7 @@
 	struct io_region where;
 	unsigned int bits;
 
-	where.dev = ps->snap->cow->dev;
+	where.dev = ps->etable->dest->dev;
 	where.sector = ps->chunk_size * chunk;
 	where.count = ps->chunk_size;
 
@@ -217,7 +218,7 @@
 	return area_io(ps, area, WRITE);
 }
 
-static int read_header(struct pstore *ps, int *new_snapshot)
+static int read_header(struct pstore *ps, int *new_store)
 {
 	int r;
 	struct disk_header *dh;
@@ -229,16 +230,16 @@
 	dh = (struct disk_header *) ps->area;
 
 	if (le32_to_cpu(dh->magic) == 0) {
-		*new_snapshot = 1;
+		*new_store = 1;
 
-	} else if (le32_to_cpu(dh->magic) == SNAP_MAGIC) {
-		*new_snapshot = 0;
+	} else if (le32_to_cpu(dh->magic) == PSTORE_MAGIC) {
+		*new_store = 0;
 		ps->valid = le32_to_cpu(dh->valid);
 		ps->version = le32_to_cpu(dh->version);
 		ps->chunk_size = le32_to_cpu(dh->chunk_size);
 
 	} else {
-		DMWARN("Invalid/corrupt snapshot");
+		DMWARN("Invalid/corrupt exception store");
 		r = -ENXIO;
 	}
 
@@ -252,7 +253,7 @@
 	memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT);
 
 	dh = (struct disk_header *) ps->area;
-	dh->magic = cpu_to_le32(SNAP_MAGIC);
+	dh->magic = cpu_to_le32(PSTORE_MAGIC);
 	dh->valid = cpu_to_le32(ps->valid);
 	dh->version = cpu_to_le32(ps->version);
 	dh->chunk_size = cpu_to_le32(ps->chunk_size);
@@ -325,8 +326,8 @@
 
 		/*
 		 * If the new_chunk is pointing at the start of
-		 * the COW device, where the first metadata area
-		 * is we know that we've hit the end of the
+		 * the destination device, where the first metadata
+		 * area is we know that we've hit the end of the
 		 * exceptions.  Therefore the area is not full.
 		 */
 		if (de.new_chunk == 0LL) {
@@ -342,9 +343,9 @@
 			ps->next_free = de.new_chunk + 1;
 
 		/*
-		 * Otherwise we add the exception to the snapshot.
+		 * Otherwise we add the exception to the table.
 		 */
-		r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk);
+		r = etable_add_exception(ps->etable, de.old_chunk, de.new_chunk);
 		if (r)
 			return r;
 	}
@@ -382,8 +383,8 @@
 static void persistent_fraction_full(struct exception_store *store,
 				     sector_t *numerator, sector_t *denominator)
 {
-	*numerator = get_info(store)->next_free * store->snap->chunk_size;
-	*denominator = get_dev_size(store->snap->cow->dev);
+	*numerator = get_info(store)->next_free * store->etable->chunk_size;
+	*denominator = get_dev_size(store->etable->dest->dev);
 }
 
 static void persistent_destroy(struct exception_store *store)
@@ -398,20 +399,20 @@
 
 static int persistent_read_metadata(struct exception_store *store)
 {
-	int r, new_snapshot;
+	int r, new_store;
 	struct pstore *ps = get_info(store);
 
 	/*
-	 * Read the snapshot header.
+	 * Read the exception header.
 	 */
-	r = read_header(ps, &new_snapshot);
+	r = read_header(ps, &new_store);
 	if (r)
 		return r;
 
 	/*
-	 * Do we need to setup a new snapshot ?
+	 * Do we need to setup a new exception store?
 	 */
-	if (new_snapshot) {
+	if (new_store) {
 		r = write_header(ps);
 		if (r) {
 			DMWARN("write_header failed");
@@ -427,14 +428,17 @@
 	} else {
 		/*
 		 * Sanity checks.
+		 *
+		 * KMC: Should we compare chunk_size from the disk-header
+		 *      against the value sent to etable_create?
 		 */
 		if (!ps->valid) {
-			DMWARN("snapshot is marked invalid");
+			DMWARN("exception store is marked invalid");
 			return -EINVAL;
 		}
 
-		if (ps->version != SNAPSHOT_DISK_VERSION) {
-			DMWARN("unable to handle snapshot disk version %d",
+		if (ps->version != PSTORE_DISK_VERSION) {
+			DMWARN("unable to handle exception store disk version %d",
 			       ps->version);
 			return -EINVAL;
 		}
@@ -455,10 +459,10 @@
 {
 	struct pstore *ps = get_info(store);
 	uint32_t stride;
-	sector_t size = get_dev_size(store->snap->cow->dev);
+	sector_t size = get_dev_size(store->etable->dest->dev);
 
 	/* Is there enough room ? */
-	if (size < ((ps->next_free + 1) * store->snap->chunk_size))
+	if (size < ((ps->next_free + 1) * store->etable->chunk_size))
 		return -ENOSPC;
 
 	e->new_chunk = ps->next_free;
@@ -530,7 +534,7 @@
 	}
 }
 
-static void persistent_drop(struct exception_store *store)
+static void persistent_invalidate(struct exception_store *store)
 {
 	struct pstore *ps = get_info(store);
 
@@ -539,7 +543,7 @@
 		DMWARN("write header failed");
 }
 
-int dm_create_persistent(struct exception_store *store, uint32_t chunk_size)
+int estore_create_persistent(struct exception_store *store, uint32_t chunk_size)
 {
 	int r;
 	struct pstore *ps;
@@ -555,12 +559,12 @@
 		goto bad;
 	}
 
-	ps->snap = store->snap;
+	ps->etable = store->etable;
 	ps->valid = 1;
-	ps->version = SNAPSHOT_DISK_VERSION;
+	ps->version = PSTORE_DISK_VERSION;
 	ps->chunk_size = chunk_size;
 	ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) /
-	    sizeof(struct disk_exception);
+				  sizeof(struct disk_exception);
 	ps->next_free = 2;	/* skipping the header and first area */
 	ps->current_committed = 0;
 
@@ -585,7 +589,7 @@
 	store->read_metadata = persistent_read_metadata;
 	store->prepare_exception = persistent_prepare;
 	store->commit_exception = persistent_commit;
-	store->drop_snapshot = persistent_drop;
+	store->invalidate = persistent_invalidate;
 	store->fraction_full = persistent_fraction_full;
 	store->context = ps;
 
@@ -603,40 +607,40 @@
 }
 
 /*-----------------------------------------------------------------
- * Implementation of the store for non-persistent snapshots.
+ * Implementation of the non-persistent exception-store.
  *---------------------------------------------------------------*/
 struct transient_c {
 	sector_t next_free;
 };
 
-void transient_destroy(struct exception_store *store)
+static void transient_destroy(struct exception_store *store)
 {
 	kfree(store->context);
 }
 
-int transient_read_metadata(struct exception_store *store)
+static int transient_read_metadata(struct exception_store *store)
 {
 	return 0;
 }
 
-int transient_prepare(struct exception_store *store, struct exception *e)
+static int transient_prepare(struct exception_store *store, struct exception *e)
 {
 	struct transient_c *tc = (struct transient_c *) store->context;
-	sector_t size = get_dev_size(store->snap->cow->dev);
+	sector_t size = get_dev_size(store->etable->dest->dev);
 
-	if (size < (tc->next_free + store->snap->chunk_size))
+	if (size < (tc->next_free + store->etable->chunk_size))
 		return -1;
 
-	e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
-	tc->next_free += store->snap->chunk_size;
+	e->new_chunk = sector_to_chunk(store->etable, tc->next_free);
+	tc->next_free += store->etable->chunk_size;
 
 	return 0;
 }
 
-void transient_commit(struct exception_store *store,
-		      struct exception *e,
-		      void (*callback) (void *, int success),
-		      void *callback_context)
+static void transient_commit(struct exception_store *store,
+			     struct exception *e,
+			     void (*callback) (void *, int success),
+			     void *callback_context)
 {
 	/* Just succeed */
 	callback(callback_context, 1);
@@ -646,11 +650,11 @@
 				    sector_t *numerator, sector_t *denominator)
 {
 	*numerator = ((struct transient_c *) store->context)->next_free;
-	*denominator = get_dev_size(store->snap->cow->dev);
+	*denominator = get_dev_size(store->etable->dest->dev);
 }
 
-int dm_create_transient(struct exception_store *store,
-			struct dm_snapshot *s, int blocksize)
+int estore_create_transient(struct exception_store *store,
+			    struct exception_table *etable)
 {
 	struct transient_c *tc;
 
@@ -660,7 +664,7 @@
 	store->prepare_exception = transient_prepare;
 	store->commit_exception = transient_commit;
 	store->fraction_full = transient_fraction_full;
-	store->snap = s;
+	store->etable = etable;
 
 	tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
 	if (!tc)
@@ -671,3 +675,4 @@
 
 	return 0;
 }
+
diff -Naur linux-2.4.22-dm-1/drivers/md/dm-exception-table.h linux-2.4.22-new-snapshot/drivers/md/dm-exception-table.h
--- linux-2.4.22-dm-1/drivers/md/dm-exception-table.h	1969-12-31 18:00:00.000000000 -0600
+++ linux-2.4.22-new-snapshot/drivers/md/dm-exception-table.h	2003-09-30 12:01:35.000000000 -0500
@@ -0,0 +1,179 @@
+/*
+ * dm-exception-table.h
+ *
+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_EXCEPTION_TABLE_H
+#define DM_EXCEPTION_TABLE_H
+
+#include "dm-exception.h"
+
+/**
+ * struct exception
+ *
+ * An exception is used where an old chunk of data has been
+ * replaced by a new one.
+ **/
+struct exception {
+	struct list_head hash_list;
+	chunk_t old_chunk;
+	chunk_t new_chunk;
+};
+
+/**
+ * struct pending_exception
+ *
+ * An exception that is in the process of being copied from the
+ * source device to the destination device.
+ *
+ * KMC: We may need to add a ref-count or a lock to this structure.
+ **/
+struct pending_exception {
+	struct exception e;
+
+	/*
+	 * I/O buffers waiting for this copy to complete are held
+	 * in a list (using b_reqnext).
+	 *
+	 * KMC: Do we need a third queue for reads to the destination?
+	 */
+	struct buffer_head *src_bhs;
+	struct buffer_head *dest_bhs;
+
+	/*
+	 * Other pending_exceptions that are processing this chunk
+	 * on the source device. When this list is empty, we know
+	 * we can complete the source queue.
+	 */
+	struct list_head siblings;
+
+	/* Pointer back to the exception-table. */
+	struct exception_table *etable;
+
+	/* 1 indicates the exception has already been sent to kcopyd. */
+	int started;
+
+	/* Count of threads accessing this structure. */
+	atomic_t count;
+};
+
+/**
+ * struct exception_hash
+ *
+ * A hash table for fast storage and lookups of exceptions.
+ * Used for both completed and pending exceptions.
+ **/
+struct exception_hash {
+	uint32_t hash_mask;
+	struct list_head *hash_table;
+};
+
+/*
+ * Abstraction to handle the meta/layout of exception stores (the
+ * destination device).
+ */
+struct exception_store {
+
+	/*
+	 * Deletes this store from memory when you've finished with it.
+	 */
+	void (*destroy) (struct exception_store *store);
+
+	/*
+	 * Read metadata from the exception device and load all existing
+	 * exceptions into memory. Don't perform I/O to the destination
+	 * device until this has been called.
+	 */
+	int (*read_metadata) (struct exception_store *store);
+
+	/*
+	 * Find somewhere to store the next exception.
+	 */
+	int (*prepare_exception) (struct exception_store *store,
+				  struct exception *e);
+
+	/*
+	 * Update the metadata with this exception.
+	 */
+	void (*commit_exception) (struct exception_store *store,
+				  struct exception *e,
+				  void (*callback) (void *, int success),
+				  void *callback_context);
+
+	/*
+	 * The destination device is invalid; note this in the metadata.
+	 */
+	void (*invalidate) (struct exception_store *store);
+
+	/*
+	 * Return how full the destination device is.
+	 */
+	void (*fraction_full) (struct exception_store *store,
+			       sector_t *numerator,
+			       sector_t *denominator);
+
+	struct exception_table *etable;
+	void *context;
+};
+
+/**
+ * struct exception_table
+ *
+ * Describe one exception-handler instance.
+ **/
+struct exception_table {
+	struct rw_semaphore lock;
+
+	struct dm_target *dm_target;
+	struct dm_dev *src;
+	struct dm_dev *dest;
+
+	/* Size of data blocks saved - must be a power of 2 */
+	chunk_t chunk_size;
+	chunk_t chunk_mask;
+	chunk_t chunk_shift;
+
+	/* You can't use the destination device if this is 0 (e.g. if full) */
+	int valid;
+
+	/* 1 if metadata has already been read from disk. */
+	int have_metadata;
+
+	/* Used for display of table. 'P' or 'N'. */
+	char type;
+
+	/* The last percentage we notified */
+	int last_percent;
+
+	/* Two hash tables. One for in-progress copies, and one
+	 * for completed copies.
+	 */
+	struct exception_hash pending;
+	struct exception_hash complete;
+
+	/* The on disk metadata handler */
+	struct exception_store store;
+
+	struct kcopyd_client *kcopyd_client;
+};
+
+/*
+ * Constructors for the persistent and transient stores.
+ */
+int estore_create_persistent(struct exception_store *store, uint32_t chunk_size);
+int estore_create_transient(struct exception_store *store,
+			    struct exception_table *etable);
+
+/*
+ * Used by the exception stores to load exceptions when
+ * initialising.
+ */
+int etable_add_exception(struct exception_table *et, chunk_t old, chunk_t new);
+chunk_t sector_to_chunk(struct exception_table *etable, sector_t sector);
+sector_t chunk_to_sector(struct exception_table *etable, chunk_t chunk);
+
+#endif
+
diff -Naur linux-2.4.22-dm-1/drivers/md/dm-exception.c linux-2.4.22-new-snapshot/drivers/md/dm-exception.c
--- linux-2.4.22-dm-1/drivers/md/dm-exception.c	1969-12-31 18:00:00.000000000 -0600
+++ linux-2.4.22-new-snapshot/drivers/md/dm-exception.c	2003-09-30 12:01:35.000000000 -0500
@@ -0,0 +1,932 @@
+/*
+ * dm-exception-table.c
+ *
+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+/* KMC: Need to designate which APIs require read and/or write locks. */
+
+/* KMC: Need to figure out which APIs are EXPORT'ed. */
+
+#include <linux/slab.h>
+#include <linux/mempool.h>
+#include <linux/device-mapper.h>
+
+#include "dm.h"
+#include "dm-exception-table.h"
+#include "kcopyd.h"
+
+/*
+ * Each exception-table reserves this many pages for io
+ * FIXME: calculate this
+ */
+#define EXCEPTION_TABLE_PAGES 256
+
+/*
+ * The percentage increment we will wake up users at
+ */
+#define WAKE_UP_PERCENT 5
+
+/*
+ * Global caches and pools for exceptions.
+ */
+static kmem_cache_t *exception_cache;
+static kmem_cache_t *pending_cache;
+static mempool_t *pending_pool;
+
+/*
+ * Simple APIs to return basic info about the exception table.
+ */
+
+struct dm_dev *etable_src_dev(struct exception_table *etable)
+{
+	return etable->src;
+}
+
+struct dm_dev *etable_dest_dev(struct exception_table *etable)
+{
+	return etable->dest;
+}
+
+chunk_t etable_chunk_size(struct exception_table *etable)
+{
+	return etable->chunk_size;
+}
+
+int etable_valid(struct exception_table *etable)
+{
+	return etable->valid;
+}
+
+char etable_type(struct exception_table *etable)
+{
+	return etable->type;
+}
+
+int etable_fraction_full(struct exception_table *etable,
+			 sector_t *numerator, sector_t *denominator)
+{
+	int r = -EINVAL;
+
+	if (etable->store.fraction_full) {
+		etable->store.fraction_full(&etable->store,
+					    numerator, denominator);
+		r = 0;
+	}
+
+	return r;
+}
+
+/*
+ * Must lock the exception-table before accessing exception info.
+ */
+
+void etable_write_lock(struct exception_table *etable)
+{
+	down_write(&etable->lock);
+}
+
+void etable_write_unlock(struct exception_table *etable)
+{
+	up_write(&etable->lock);
+}
+
+void etable_read_lock(struct exception_table *etable)
+{
+	down_read(&etable->lock);
+}
+
+void etable_read_unlock(struct exception_table *etable)
+{
+	up_read(&etable->lock);
+}
+
+/*
+ * Hard coded magic. The number of list_head's that will fit into
+ * 1/50th of the machine's physical memory.
+ */
+static int calc_max_buckets(void)
+{
+	unsigned long mem;
+
+	mem = num_physpages << PAGE_SHIFT;
+	mem /= 50;
+	mem /= sizeof(struct list_head);
+
+	return mem;
+}
+
+/*
+ * Rounds a number down to a power of 2.
+ * KMC: Should this be in dm.h like dm_round_up()?
+ */
+static inline uint32_t round_down(uint32_t n)
+{
+	while (n & (n - 1))
+		n &= (n - 1);
+	return n;
+}
+
+static int init_exception_hash(struct exception_hash *eh, uint32_t size)
+{
+	unsigned int i;
+
+	eh->hash_mask = size - 1;
+	eh->hash_table = vcalloc(size, sizeof(struct list_head));
+	if (!eh->hash_table)
+		return -ENOMEM;
+
+	for (i = 0; i < size; i++)
+		INIT_LIST_HEAD(eh->hash_table + i);
+
+	return 0;
+}
+
+static void exit_exception_hash(struct exception_hash *eh, kmem_cache_t *mem)
+{
+	struct list_head *slot, *entry, *temp;
+	struct exception *ex;
+	int i, size;
+
+	size = eh->hash_mask + 1;
+	for (i = 0; i < size; i++) {
+		slot = eh->hash_table + i;
+
+		list_for_each_safe(entry, temp, slot) {
+			ex = list_entry(entry, struct exception, hash_list);
+			kmem_cache_free(mem, ex);
+		}
+	}
+
+	vfree(eh->hash_table);
+}
+
+/*
+ * Allocate room for a suitable hash table.
+ */
+static int init_hash_tables(struct exception_table *et)
+{
+	sector_t hash_size, dest_dev_size, src_dev_size, max_buckets;
+
+	/*
+	 * Calculate based on the size of the source or destination devices.
+	 */
+	dest_dev_size = get_dev_size(et->dest->dev);
+	src_dev_size = get_dev_size(et->src->dev);
+	max_buckets = calc_max_buckets();
+
+	hash_size = min(src_dev_size, dest_dev_size) / et->chunk_size;
+	hash_size = min(hash_size, max_buckets);
+
+	/* Round it down to a power of 2 */
+	hash_size = round_down(hash_size);
+	if (init_exception_hash(&et->complete, hash_size))
+		return -ENOMEM;
+
+	/*
+	 * Allocate hash table for in-flight exceptions
+	 * Make this smaller than the real hash table
+	 */
+	hash_size >>= 3;
+	if (!hash_size)
+		hash_size = 64;
+
+	if (init_exception_hash(&et->pending, hash_size)) {
+		exit_exception_hash(&et->complete, exception_cache);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * etable_create
+ *
+ * Create a new exception table.
+ *   chunk_size: Must be a non-zero power-of-2.
+ *   persistent: Must be 'P' or 'N'
+ **/
+struct exception_table *etable_create(struct dm_target *ti,
+				      char *src_path,
+				      char *dest_path,
+				      unsigned long chunk_size,
+				      char persistent)
+{
+	struct exception_table *et = NULL;
+	int blocksize, r;
+
+	if (persistent != 'P' && persistent != 'N') {
+		DMERR("Persistent flag is not P or N");
+		goto out;
+	}
+
+	if (chunk_size == 0) {
+		DMERR("Chunk size must be non-zero");
+		goto out;
+	}
+
+	/*
+	 * Chunk size must be multiple of page size. Silently
+	 * round up if it's not.
+	 */
+	chunk_size = dm_round_up(chunk_size, PAGE_SIZE / SECTOR_SIZE);
+
+	/* Check the sizes are small enough to fit in one kiovec */
+	if (chunk_size > KIO_MAX_SECTORS) {
+		DMERR("Chunk size is too big");
+		goto out;
+	}
+
+	/* Check chunk_size is a power of 2 */
+	/* KMC: Since we rounded-up to PAGE_SIZE, do we need to bother with this? */
+	if (chunk_size & (chunk_size - 1)) {
+		DMERR("Chunk size is not a power of 2");
+		goto out;
+	}
+
+	et = kmalloc(sizeof(*et), GFP_KERNEL);
+	if (!et) {
+		DMERR("Cannot allocate new exception table.");
+		goto out;
+	}
+
+	r = dm_get_device(ti, src_path, 0, ti->len, FMODE_READ, &et->src);
+	if (r) {
+		DMERR("Cannot get source device");
+		goto bad1;
+	}
+
+	r = dm_get_device(ti, dest_path, 0, 0,
+			  FMODE_READ | FMODE_WRITE, &et->dest);
+	if (r) {
+		dm_put_device(ti, et->src);
+		DMERR("Cannot get destination device");
+		goto bad1;
+	}
+
+	/* Validate the chunk size against the dest. device's block size */
+	blocksize = get_hardsect_size(et->dest->dev);
+	if (chunk_size % (blocksize / SECTOR_SIZE)) {
+		DMERR("Chunk size is not a multiple of device blocksize");
+		goto bad2;
+	}
+
+	init_rwsem(&et->lock);
+	et->dm_target = ti;
+	et->chunk_size = chunk_size;
+	et->chunk_mask = chunk_size - 1;
+	for (et->chunk_shift = 0; chunk_size;
+	     et->chunk_shift++, chunk_size >>= 1)
+		;
+	et->chunk_shift--;
+
+	et->valid = 1;
+	et->have_metadata = 0;
+	et->type = persistent;
+	et->last_percent = 0;
+
+	/* Allocate hash tables. */
+	if (init_hash_tables(et)) {
+		DMERR("Unable to allocate hash table space");
+		goto bad2;
+	}
+
+	/*
+	 * Check the persistent flag - done here because we need the iobuf
+	 * to check the destination-dev header.
+	 */
+	et->store.etable = et;
+
+	if (persistent == 'P')
+		r = estore_create_persistent(&et->store, et->chunk_size);
+	else
+		r = estore_create_transient(&et->store, et);
+
+	if (r) {
+		DMERR("Couldn't create exception store");
+		goto bad3;
+	}
+
+	r = kcopyd_client_create(EXCEPTION_TABLE_PAGES, &et->kcopyd_client);
+	if (r) {
+		DMERR("Could not create kcopyd client");
+		goto bad4;
+	}
+
+out:
+	return et;
+
+bad4:
+	et->store.destroy(&et->store);
+bad3:
+	exit_exception_hash(&et->complete, exception_cache);
+	exit_exception_hash(&et->pending, pending_cache);
+bad2:
+	dm_put_device(ti, et->dest);
+	dm_put_device(ti, et->src);
+bad1:
+	kfree(et);
+	return NULL;
+}
+
+/**
+ * etable_delete
+ *
+ * Delete an exception-table.
+ **/
+void etable_delete(struct exception_table *etable)
+{
+	dm_table_event(etable->dm_target->table);
+	kcopyd_client_destroy(etable->kcopyd_client);
+	etable->store.destroy(&etable->store);
+	exit_exception_hash(&etable->complete, exception_cache);
+	/* KMC: We're free'ing the remaining pending exceptions directly
+	 *      back to the pending-cache instead of the pending-mempool.
+	 *      Does this matter? Should the pending-hash-table be empty
+	 *      at this point anyway? We shouldn't be deleting devices
+	 *      unless they're closed (open-count == 0)...and we can't
+	 *      close the device until all outstanding I/O's are complete...
+	 */
+	exit_exception_hash(&etable->pending, pending_cache);
+	dm_put_device(etable->dm_target, etable->src);
+	dm_put_device(etable->dm_target, etable->dest);
+	kfree(etable);
+}
+
+/**
+ * etable_read_metadata
+ *
+ * If we haven't read the metadata yet, call the exception-store to read the
+ * metadata and setup the initial exception hash-table.
+ **/
+int etable_read_metadata(struct exception_table *etable)
+{
+	int r = 0;
+
+	if (!etable->have_metadata) {
+		r = etable->store.read_metadata(&etable->store);
+		if (!r) {
+			etable->have_metadata = 1;
+		}
+	}
+
+	return r;
+}
+
+/*
+ * Convert LBAs to chunks and vice-versa.
+ */
+
+chunk_t sector_to_chunk(struct exception_table *etable, sector_t sector)
+{
+	return (sector & ~etable->chunk_mask) >> etable->chunk_shift;
+}
+
+sector_t chunk_to_sector(struct exception_table *etable, chunk_t chunk)
+{
+	return chunk << etable->chunk_shift;
+}
+
+/*
+ * Basic hash-table operations.
+ * FIXME: check how this hash fn is performing.
+ */
+static inline uint32_t exception_hash(struct exception_hash *eh, chunk_t chunk)
+{
+	return chunk & eh->hash_mask;
+}
+
+static void insert_exception(struct exception_hash *eh, struct exception *e)
+{
+	struct list_head *l = &eh->hash_table[exception_hash(eh, e->old_chunk)];
+	list_add(&e->hash_list, l);
+}
+
+static inline void remove_exception(struct exception *e)
+{
+	list_del(&e->hash_list);
+}
+
+/*
+ * Hash-table lookups. Return the exception data
+ * for a chunk, or NULL if not remapped.
+ */
+static struct exception *lookup_exception(struct exception_hash *eh,
+					  chunk_t chunk)
+{
+	struct list_head *slot, *el;
+	struct exception *e;
+
+	slot = &eh->hash_table[exception_hash(eh, chunk)];
+	list_for_each(el, slot) {
+		e = list_entry(el, struct exception, hash_list);
+		if (e->old_chunk == chunk)
+			return e;
+	}
+
+	return NULL;
+}
+
+/**
+ * _etable_lookup_exception
+ *
+ * Look for a complete exception. Only requires a read-lock, since once
+ * an exception is added to the "complete" table, it will never be removed.
+ **/
+struct exception *_etable_lookup_exception(struct exception_table *etable,
+					   struct buffer_head *bh)
+{
+	return lookup_exception(&etable->complete,
+				sector_to_chunk(etable, bh->b_rsector));
+}
+
+/**
+ * __etable_lookup_pending_exception
+ *
+ * Look for a pending exception. Requires a write-lock, since at some point
+ * the pending exception will be removed from the table and free'd, and no
+ * other ref-counting is done. This may have to change!!!
+ *
+ * KMC: If we add ref-counting to the pending-exceptions, can we call this
+ *      function with just the read-lock?
+ **/
+struct pending_exception *
+__etable_lookup_pending_exception(struct exception_table *etable,
+				  struct buffer_head *bh)
+{
+	struct exception *e;
+	struct pending_exception *pe = NULL;
+
+	e = lookup_exception(&etable->pending,
+			     sector_to_chunk(etable, bh->b_rsector));
+	if (e) {
+		/* cast the exception to a pending exception */
+		pe = list_entry(e, struct pending_exception, e);
+		atomic_inc(&pe->count);
+	}
+
+	return pe;
+}
+
+/**
+ * etable_remap_exception
+ *
+ * Update the buffer-head to redirect it to a remapped chunk on the
+ * destination device. No lock is required.
+ **/
+void etable_remap_exception(struct exception_table *et,
+			    struct exception *e,
+			    struct buffer_head *bh)
+{
+	bh->b_rdev = et->dest->dev;
+	bh->b_rsector = chunk_to_sector(et, e->new_chunk) +
+			(bh->b_rsector & et->chunk_mask);
+}
+
+/*
+ * The pending-exceptions hold lists of buffer_heads,
+ * using the b_reqnext field.
+ */
+static void queue_buffer(struct buffer_head **queue, struct buffer_head *bh)
+{
+	bh->b_reqnext = *queue;
+	*queue = bh;
+}
+
+/**
+ * __etable_queue_src_buffer
+ *
+ * Queue a buffer-head destined for the source device on the
+ * pending-exception. Requires a write-lock on the exception table.
+ **/
+void __etable_queue_src_buffer(struct pending_exception *pe,
+			       struct buffer_head *bh)
+{
+	queue_buffer(&pe->src_bhs, bh);
+}
+
+/**
+ * __etable_queue_dest_buffer
+ *
+ * Remap the buffer-head to the destination device and queue it on the
+ * pending-exception. Requires a write-lock on the exception-table.
+ **/
+void __etable_queue_dest_buffer(struct pending_exception *pe,
+				struct buffer_head *bh)
+{
+	etable_remap_exception(pe->etable, &pe->e, bh);
+	queue_buffer(&pe->dest_bhs, bh);
+}
+
+static inline struct exception *alloc_exception(void)
+{
+	struct exception *e;
+
+	e = kmem_cache_alloc(exception_cache, GFP_NOIO);
+	if (!e)
+		e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
+
+	return e;
+}
+
+static inline void free_exception(struct exception *e)
+{
+	kmem_cache_free(exception_cache, e);
+}
+
+static inline struct pending_exception *alloc_pending_exception(void)
+{
+	return mempool_alloc(pending_pool, GFP_NOIO);
+}
+
+static inline void free_pending_exception(struct pending_exception *pe)
+{
+	mempool_free(pe, pending_pool);
+}
+
+/**
+ * __etable_create_pending_exception
+ *
+ * Create and prepare new pending exception and add it to the pending
+ * hash table. Requires a write-lock since we're modifying the table.
+ **/
+struct pending_exception *
+__etable_create_pending_exception(struct exception_table *et,
+				  struct buffer_head *bh)
+{
+	struct pending_exception *pe;
+	chunk_t chunk = sector_to_chunk(et, bh->b_rsector);
+
+	pe = alloc_pending_exception();
+	pe->e.old_chunk = chunk;
+	pe->src_bhs = pe->dest_bhs = NULL;
+	INIT_LIST_HEAD(&pe->siblings);
+	pe->etable = et;
+	pe->started = 0;
+	atomic_set(&pe->count, 2);
+
+	if (et->store.prepare_exception(&et->store, &pe->e)) {
+		free_pending_exception(pe);
+		et->valid = 0;
+		return NULL;
+	}
+
+	insert_exception(&et->pending, &pe->e);
+
+	return pe;
+}
+
+static void list_merge(struct list_head *l1, struct list_head *l2)
+{
+	struct list_head *l1_n, *l2_p;
+
+	l1_n = l1->next;
+	l2_p = l2->prev;
+
+	l1->next = l2;
+	l2->prev = l1;
+
+	l2_p->next = l1_n;
+	l1_n->prev = l2_p;
+}
+
+/**
+ * __etable_merge_pending_exceptions
+ *
+ * Join these two pending-exceptions together through their "sibling" fields.
+ * Requires a write-lock.
+ *
+ * KMC: Needs better locking!
+ **/
+void __etable_merge_pending_exceptions(struct pending_exception *pe1,
+				       struct pending_exception *pe2)
+{
+	list_merge(&pe1->siblings, &pe2->siblings);
+}
+
+/**
+ * etable_add_exception
+ *
+ * Allocate and initialize an exception and add it to the
+ * complete-exceptions hash-table.
+ **/
+int etable_add_exception(struct exception_table *et, chunk_t old, chunk_t new)
+{
+	struct exception *e;
+
+	e = alloc_exception();
+	if (!e)
+		return -ENOMEM;
+
+	e->old_chunk = old;
+	e->new_chunk = new;
+	insert_exception(&et->complete, e);
+	return 0;
+}
+
+static void queue_buffers(struct buffer_head **queue, struct buffer_head *bhs)
+{
+	while (*queue)
+		queue = &((*queue)->b_reqnext);
+
+	*queue = bhs;
+}
+
+/*
+ * Flush a list of buffers.
+ */
+static void flush_buffers(struct buffer_head *bh)
+{
+	struct buffer_head *n;
+
+	while (bh) {
+		n = bh->b_reqnext;
+		bh->b_reqnext = NULL;
+		generic_make_request(WRITE, bh);
+		bh = n;
+	}
+
+	run_task_queue(&tq_disk);
+}
+
+/*
+ * Error a list of buffers.
+ */
+static void error_buffers(struct buffer_head *bh)
+{
+	struct buffer_head *n;
+
+	while (bh) {
+		n = bh->b_reqnext;
+		bh->b_reqnext = NULL;
+		buffer_IO_error(bh);
+		bh = n;
+	}
+}
+
+/* KMC: I think we need a big spinlock to protect the "sibling" lists.
+ *      Otherwise we need some "anchor" structure for each sibling list
+ *      where we can put a lock to control the list.
+ */
+static struct buffer_head *__flush_src_bhs(struct pending_exception *pe)
+{
+	struct pending_exception *sibling;
+
+	if (list_empty(&pe->siblings))
+		return pe->src_bhs;
+
+	sibling = list_entry(pe->siblings.next,
+			     struct pending_exception, siblings);
+
+	list_del(&pe->siblings);
+
+	/* FIXME: I think there's a race on SMP machines here, add spin lock */
+	queue_buffers(&sibling->src_bhs, pe->src_bhs);
+
+	return NULL;
+}
+
+/**
+ * etable_put_pending_exception
+ *
+ * Decrement the reference count in the pending exception. When the count
+ * reaches zero, we can flush the I/O queues, and free the structure.
+ *
+ * KMC: Not quite done yet!
+ **/
+void etable_put_pending_exception(struct pending_exception *pe)
+{
+	struct buffer_head *flush;
+
+	if (atomic_dec_and_test(&pe->count)) {
+		flush = __flush_src_bhs(pe);
+		if (flush) {
+			flush_buffers(flush);
+		}
+		free_pending_exception(pe);
+	}
+}
+
+static void pending_complete(struct pending_exception *pe, int success)
+{
+	struct exception_table *et = pe->etable;
+	struct exception *e;
+
+	if (success) {
+		e = alloc_exception();
+		if (!e) {
+			DMWARN("Unable to allocate exception.");
+			down_write(&et->lock);
+			/* KMC: Don't want to invalidate here. Need to return
+			 *      control to the caller and let them decide
+			 *      what action to take.
+			 */
+			et->store.invalidate(&et->store);
+			et->valid = 0;
+			remove_exception(&pe->e);
+			up_write(&et->lock);
+
+			error_buffers(pe->dest_bhs);
+			goto out;
+		}
+
+		/* Add a complete-exception and remove the pending-exception. */
+		down_write(&et->lock);
+		memcpy(e, &pe->e, sizeof(*e));
+		insert_exception(&et->complete, e);
+		remove_exception(&pe->e);
+		up_write(&et->lock);
+
+		/* Submit pending I/Os to the destination device. */
+		flush_buffers(pe->dest_bhs);
+
+		/* Notify any interested parties */
+		/* KMC: Should this be done here, or should we let
+		 *      the caller do this?
+		 */
+		if (et->store.fraction_full) {
+			sector_t numerator, denominator;
+			int pc;
+
+			et->store.fraction_full(&et->store, &numerator,
+					        &denominator);
+			pc = numerator * 100 / denominator;
+
+			if (pc >= et->last_percent + WAKE_UP_PERCENT) {
+				dm_table_event(et->dm_target->table);
+				et->last_percent = pc - pc % WAKE_UP_PERCENT;
+			}
+		}
+
+	} else {
+		/* Read/write error - snapshot is unusable */
+		/* KMC: Don't want to invalidate here. Need to return
+		 *      control to the caller and let them decide
+		 *      what action to take.
+		 */
+		down_write(&et->lock);
+		if (et->valid)
+			DMERR("Error reading/writing snapshot");
+		et->store.invalidate(&et->store);
+		et->valid = 0;
+		remove_exception(&pe->e);
+		up_write(&et->lock);
+
+		error_buffers(pe->dest_bhs);
+
+		dm_table_event(et->dm_target->table);
+		DMDEBUG("Exception failed.");
+	}
+
+ out:
+	etable_put_pending_exception(pe);
+}
+
+static void commit_callback(void *context, int success)
+{
+	struct pending_exception *pe = (struct pending_exception *) context;
+	pending_complete(pe, success);
+}
+
+/*
+ * Called when the copy I/O has finished.  kcopyd actually runs
+ * this code so don't block.
+ */
+static void copy_callback(int read_err, unsigned int write_err, void *context)
+{
+	struct pending_exception *pe = (struct pending_exception *) context;
+	struct exception_table *et = pe->etable;
+
+	if (read_err || write_err)
+		pending_complete(pe, 0);
+
+	else
+		/* Update the metadata if we are persistent */
+		et->store.commit_exception(&et->store, &pe->e,
+					   commit_callback, pe);
+}
+
+/**
+ * __etable_start_copy
+ *
+ * Dispatches the copy operation to kcopyd.
+ *
+ * KMC: Should we add a "callback" argument to this to notify the caller
+ *      when the copy is complete? Might make pending_complete() a bit
+ *      simpler.
+ **/
+void __etable_start_copy(struct pending_exception *pe)
+{
+	struct exception_table *et = pe->etable;
+	struct io_region src, dest;
+	kdev_t dev = et->src->dev;
+	sector_t dev_size;
+
+	if (pe->started)
+		return;
+
+	pe->started = 1;
+
+	dev_size = get_dev_size(dev);
+	if (!dev_size)
+		dev_size = (sector_t) -1;
+
+	src.dev = dev;
+	src.sector = chunk_to_sector(et, pe->e.old_chunk);
+	src.count = min(et->chunk_size, dev_size - src.sector);
+
+	dest.dev = et->dest->dev;
+	dest.sector = chunk_to_sector(et, pe->e.new_chunk);
+	dest.count = src.count;
+
+	/* Hand over to kcopyd */
+	kcopyd_copy(et->kcopyd_client,
+		    &src, 1, &dest, 0, copy_callback, pe);
+}
+
+/**
+ * etable_start_copies
+ *
+ * Start copies on a list of pending-exceptions. Queue the buffer-head
+ * on the first pending-exception.
+ */
+void etable_start_copies(struct pending_exception *list)
+{
+	struct pending_exception *next, *pe = list;
+
+	do {
+		down_write(&pe->etable->lock);
+		__etable_start_copy(pe);
+		up_write(&pe->etable->lock);
+
+		next = list_entry(pe->siblings.next,
+				  struct pending_exception, siblings);
+		etable_put_pending_exception(pe);
+		pe = next;
+	} while (pe != list);
+}
+
+/**
+ * __etable_invalidate
+ *
+ * Mark the exception table invalid (in memory and on disk). Requires
+ * a write-lock.
+ **/
+void __etable_invalidate(struct exception_table *etable)
+{
+	etable->valid = 0;
+	etable->store.invalidate(&etable->store);
+}
+
+int dm_exception_init(void)
+{
+	int r;
+
+	exception_cache = kmem_cache_create("dm-exception",
+					    sizeof(struct exception),
+					    __alignof__(struct exception),
+					    0, NULL, NULL);
+	if (!exception_cache) {
+		DMERR("Couldn't create exception cache.");
+		r = -ENOMEM;
+		goto bad1;
+	}
+
+	pending_cache =
+	    kmem_cache_create("dm-pend-exception",
+			      sizeof(struct pending_exception),
+			      __alignof__(struct pending_exception),
+			      0, NULL, NULL);
+	if (!pending_cache) {
+		DMERR("Couldn't create pending cache.");
+		r = -ENOMEM;
+		goto bad2;
+	}
+
+	pending_pool = mempool_create(128, mempool_alloc_slab,
+				      mempool_free_slab, pending_cache);
+	if (!pending_pool) {
+		DMERR("Couldn't create pending pool.");
+		r = -ENOMEM;
+		goto bad3;
+	}
+
+	return 0;
+
+bad3:
+	kmem_cache_destroy(pending_cache);
+bad2:
+	kmem_cache_destroy(exception_cache);
+bad1:
+	return r;
+}
+
+void dm_exception_exit(void)
+{
+	mempool_destroy(pending_pool);
+	kmem_cache_destroy(pending_cache);
+	kmem_cache_destroy(exception_cache);
+}
+
diff -Naur linux-2.4.22-dm-1/drivers/md/dm-exception.h linux-2.4.22-new-snapshot/drivers/md/dm-exception.h
--- linux-2.4.22-dm-1/drivers/md/dm-exception.h	1969-12-31 18:00:00.000000000 -0600
+++ linux-2.4.22-new-snapshot/drivers/md/dm-exception.h	2003-09-30 12:01:35.000000000 -0500
@@ -0,0 +1,81 @@
+/*
+ * dm-exception.h
+ *
+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_EXCEPTION_H
+#define DM_EXCEPTION_H
+
+struct exception;
+struct pending_exception;
+struct exception_table;
+
+/*
+ * The exception code deals with largish chunks of the disk at a
+ * time. Typically 64k - 256k.
+ */
+typedef sector_t chunk_t;
+
+/*
+ * Creating and deleting exception tables.
+ */
+struct exception_table *etable_create(struct dm_target *ti,
+				      char *src_path,
+				      char *dest_path,
+				      unsigned long chunk_size,
+				      char persistent);
+void etable_delete(struct exception_table *etable);
+int etable_read_metadata(struct exception_table *etable);
+void __etable_invalidate(struct exception_table *etable);
+
+/*
+ * Lock an exception table.
+ */
+void etable_write_lock(struct exception_table *etable);
+void etable_write_unlock(struct exception_table *etable);
+void etable_read_lock(struct exception_table *etable);
+void etable_read_unlock(struct exception_table *etable);
+
+/*
+ * Get basic info about an exception table.
+ */
+struct dm_dev *etable_src_dev(struct exception_table *etable);
+struct dm_dev *etable_dest_dev(struct exception_table *etable);
+chunk_t etable_chunk_size(struct exception_table *etable);
+int etable_valid(struct exception_table *etable);
+char etable_type(struct exception_table *etable);
+int etable_fraction_full(struct exception_table *etable,
+			 sector_t *numerator, sector_t *denominator);
+
+/*
+ * Access complete-exception information from the tables.
+ */
+struct exception *_etable_lookup_exception(struct exception_table *etable,
+					   struct buffer_head *bh);
+void etable_remap_exception(struct exception_table *et,
+			    struct exception *e, struct buffer_head *bh);
+
+/*
+ * Access pending-exception info.
+ */
+struct pending_exception *
+__etable_lookup_pending_exception(struct exception_table *etable,
+				  struct buffer_head *bh);
+struct pending_exception *
+__etable_create_pending_exception(struct exception_table *et,
+				  struct buffer_head *bh);
+void etable_put_pending_exception(struct pending_exception *pe);
+void __etable_queue_src_buffer(struct pending_exception *pe,
+			       struct buffer_head *bh);
+void __etable_queue_dest_buffer(struct pending_exception *pe,
+				struct buffer_head *bh);
+void __etable_start_copy(struct pending_exception *pe);
+void etable_start_copies(struct pending_exception *list);
+void __etable_merge_pending_exceptions(struct pending_exception *pe1,
+				       struct pending_exception *pe2);
+
+#endif
+
diff -Naur linux-2.4.22-dm-1/drivers/md/dm-snapshot.c linux-2.4.22-new-snapshot/drivers/md/dm-snapshot.c
--- linux-2.4.22-dm-1/drivers/md/dm-snapshot.c	2003-09-30 11:59:23.000000000 -0500
+++ linux-2.4.22-new-snapshot/drivers/md/dm-snapshot.c	2003-09-30 12:01:41.000000000 -0500
@@ -6,142 +6,70 @@
  * This file is released under the GPL.
  */
 
-#include <linux/config.h>
-#include <linux/ctype.h>
 #include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/list.h>
+#include <linux/ctype.h>
 #include <linux/fs.h>
-#include <linux/blkdev.h>
-#include <linux/mempool.h>
+#include <linux/slab.h>
 #include <linux/device-mapper.h>
-#include <linux/vmalloc.h>
 
-#include "dm-snapshot.h"
-#include "kcopyd.h"
+#include "dm.h"
+#include "dm-exception.h"
 
-/*
- * FIXME: Remove this before release.
- */
-#if 0
-#define DMDEBUG(x...) DMWARN( ## x)
-#else
-#define DMDEBUG(x...)
-#endif
+/**
+ * struct snapshot
+ *
+ * Describe one snapshot device.
+ *
+ * KMC: We may need to add a pointer to a "struct origin".
+ **/
+struct snapshot {
+	struct exception_table *etable;
 
-/*
- * The percentage increment we will wake up users at
- */
-#define WAKE_UP_PERCENT 5
-
-/*
- * kcopyd priority of snapshot operations
- */
-#define SNAPSHOT_COPY_PRIORITY 2
-
-/*
- * Each snapshot reserves this many pages for io
- * FIXME: calculate this
- */
-#define SNAPSHOT_PAGES 256
-
-struct pending_exception {
-	struct exception e;
-
-	/*
-	 * Origin buffers waiting for this to complete are held
-	 * in a list (using b_reqnext).
-	 */
-	struct buffer_head *origin_bhs;
-	struct buffer_head *snapshot_bhs;
-
-	/*
-	 * Other pending_exceptions that are processing this
-	 * chunk.  When this list is empty, we know we can
-	 * complete the origins.
-	 */
-	struct list_head siblings;
-
-	/* Pointer back to snapshot context */
-	struct dm_snapshot *snap;
-
-	/*
-	 * 1 indicates the exception has already been sent to
-	 * kcopyd.
-	 */
-	int started;
+	/* List of snapshots per origin. */
+	struct list_head list;
 };
 
-/*
- * Hash table mapping origin volumes to lists of snapshots and
- * a lock to protect it
- */
-static kmem_cache_t *exception_cache;
-static kmem_cache_t *pending_cache;
-static mempool_t *pending_pool;
-
-/*
- * One of these per registered origin, held in the snapshot_origins hash
- */
+/**
+ * struct origin
+ *
+ * One of these per registered origin, held in the _origins list.
+ *
+ * KMC: We may need to add a lock to this structure.
+ **/
 struct origin {
 	/* The origin device */
-	kdev_t dev;
+	struct dm_dev *dev;
 
-	struct list_head hash_list;
+	/* List of all known origins */
+	struct list_head origins;
 
 	/* List of snapshots for this origin */
 	struct list_head snapshots;
+
+	/* Count of snapshots and origins referrencing this structure. */
+	unsigned int count;
 };
 
 /*
- * Size of the hash table for origin volumes. If we make this
- * the size of the minors list then it should be nearly perfect
+ * Global list of all origin devices, and a lock to protect it.
+ *
+ * Need to read-lock any time we're examining the _origins list or any
+ * of the snaphots lists within each origin. Need to write-lock any time
+ * we're adding a new origin or snapshot structure.
  */
-#define ORIGIN_HASH_SIZE 256
-#define ORIGIN_MASK      0xFF
-static struct list_head *_origins;
-static struct rw_semaphore _origins_lock;
-
-static int init_origin_hash(void)
-{
-	int i;
-
-	_origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
-			   GFP_KERNEL);
-	if (!_origins) {
-		DMERR("Device mapper: Snapshot: unable to allocate memory");
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < ORIGIN_HASH_SIZE; i++)
-		INIT_LIST_HEAD(_origins + i);
-	init_rwsem(&_origins_lock);
 
-	return 0;
-}
+static LIST_HEAD(_origins);
+static DECLARE_RWSEM(_origins_lock);
 
-static void exit_origin_hash(void)
+static struct origin *__lookup_origin(struct dm_dev *dev)
 {
-	kfree(_origins);
-}
-
-static inline unsigned int origin_hash(kdev_t dev)
-{
-	return MINOR(dev) & ORIGIN_MASK;
-}
-
-static struct origin *__lookup_origin(kdev_t origin)
-{
-	struct list_head *slist;
-	struct list_head *ol;
+	struct list_head *lh;
 	struct origin *o;
 
-	ol = &_origins[origin_hash(origin)];
-	list_for_each(slist, ol) {
-		o = list_entry(slist, struct origin, hash_list);
+	list_for_each(lh, &_origins) {
+		o = list_entry(lh, struct origin, origins);
 
-		if (o->dev == origin)
+		if (o->dev == dev)
 			return o;
 	}
 
@@ -150,264 +78,138 @@
 
 static void __insert_origin(struct origin *o)
 {
-	struct list_head *sl = &_origins[origin_hash(o->dev)];
-	list_add_tail(&o->hash_list, sl);
-}
-
-/*
- * Make a note of the snapshot and its origin so we can look it
- * up when the origin has a write on it.
- */
-static int register_snapshot(struct dm_snapshot *snap)
-{
-	struct origin *o;
-	kdev_t dev = snap->origin->dev;
-
-	down_write(&_origins_lock);
-	o = __lookup_origin(dev);
-
-	if (!o) {
-		/* New origin */
-		o = kmalloc(sizeof(*o), GFP_KERNEL);
-		if (!o) {
-			up_write(&_origins_lock);
-			return -ENOMEM;
-		}
-
-		/* Initialise the struct */
-		INIT_LIST_HEAD(&o->snapshots);
-		o->dev = dev;
-
-		__insert_origin(o);
-	}
-
-	list_add_tail(&snap->list, &o->snapshots);
-
-	up_write(&_origins_lock);
-	return 0;
+	list_add_tail(&o->origins, &_origins);
 }
 
-static void unregister_snapshot(struct dm_snapshot *s)
+static void __remove_origin(struct origin *o)
 {
-	struct origin *o;
-
-	down_write(&_origins_lock);
-	o = __lookup_origin(s->origin->dev);
-
-	list_del(&s->list);
-	if (list_empty(&o->snapshots)) {
-		list_del(&o->hash_list);
-		kfree(o);
-	}
-
-	up_write(&_origins_lock);
+	list_del(&o->origins);
 }
 
-/*
- * Implementation of the exception hash tables.
- */
-static int init_exception_table(struct exception_table *et, uint32_t size)
+static void __insert_snapshot(struct snapshot *s, struct origin *o)
 {
-	unsigned int i;
-
-	et->hash_mask = size - 1;
-	et->table = vcalloc(size, sizeof(struct list_head));
-	if (!et->table)
-		return -ENOMEM;
-
-	for (i = 0; i < size; i++)
-		INIT_LIST_HEAD(et->table + i);
-
-	return 0;
+	list_add_tail(&s->list, &o->snapshots);
 }
 
-static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
+static void __remove_snapshot(struct snapshot *s)
 {
-	struct list_head *slot, *entry, *temp;
-	struct exception *ex;
-	int i, size;
-
-	size = et->hash_mask + 1;
-	for (i = 0; i < size; i++) {
-		slot = et->table + i;
-
-		list_for_each_safe(entry, temp, slot) {
-			ex = list_entry(entry, struct exception, hash_list);
-			kmem_cache_free(mem, ex);
-		}
-	}
-
-	vfree(et->table);
+	list_del(&s->list);
 }
 
 /*
- * FIXME: check how this hash fn is performing.
+ * Allocate and initialize an origin structure.
  */
-static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
+static struct origin * __alloc_origin(struct dm_dev *dev)
 {
-	return chunk & et->hash_mask;
+	struct origin *o = kmalloc(sizeof(*o), GFP_KERNEL);
+	if (o) {
+		o->dev = dev;
+		o->count = 0;
+		INIT_LIST_HEAD(&o->origins);
+		INIT_LIST_HEAD(&o->snapshots);
+		__insert_origin(o);
+	}
+	return o;
 }
 
-static void insert_exception(struct exception_table *eh, struct exception *e)
+static void __get_origin(struct origin *o)
 {
-	struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
-	list_add(&e->hash_list, l);
+	o->count++;
 }
 
-static inline void remove_exception(struct exception *e)
+static void __put_origin(struct origin *o)
 {
-	list_del(&e->hash_list);
+	o->count--;
+	if (o->count == 0) {
+		__remove_origin(o);
+		kfree(o);
+	}
 }
 
 /*
- * Return the exception data for a sector, or NULL if not
- * remapped.
+ * Make a note of the snapshot and its origin so we can look it
+ * up when the origin has a write on it.
  */
-static struct exception *lookup_exception(struct exception_table *et,
-					  chunk_t chunk)
-{
-	struct list_head *slot, *el;
-	struct exception *e;
-
-	slot = &et->table[exception_hash(et, chunk)];
-	list_for_each(el, slot) {
-		e = list_entry(el, struct exception, hash_list);
-		if (e->old_chunk == chunk)
-			return e;
-	}
-
-	return NULL;
-}
-
-static inline struct exception *alloc_exception(void)
-{
-	struct exception *e;
-
-	e = kmem_cache_alloc(exception_cache, GFP_NOIO);
-	if (!e)
-		e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
-
-	return e;
-}
-
-static inline void free_exception(struct exception *e)
+static int register_snapshot(struct snapshot *snap)
 {
-	kmem_cache_free(exception_cache, e);
-}
+	struct origin *o;
+	struct dm_dev *dev = etable_src_dev(snap->etable);
 
-static inline struct pending_exception *alloc_pending_exception(void)
-{
-	return mempool_alloc(pending_pool, GFP_NOIO);
-}
+	down_write(&_origins_lock);
 
-static inline void free_pending_exception(struct pending_exception *pe)
-{
-	mempool_free(pe, pending_pool);
-}
+	o = __lookup_origin(dev);
+	if (!o) {
+		/* New origin */
+		o = __alloc_origin(dev);
+		if (!o) {
+			up_write(&_origins_lock);
+			return -ENOMEM;
+		}
+	}
 
-int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
-{
-	struct exception *e;
+	__get_origin(o);
+	__insert_snapshot(snap, o);
 
-	e = alloc_exception();
-	if (!e)
-		return -ENOMEM;
+	up_write(&_origins_lock);
 
-	e->old_chunk = old;
-	e->new_chunk = new;
-	insert_exception(&s->complete, e);
 	return 0;
 }
 
-/*
- * Hard coded magic.
- */
-static int calc_max_buckets(void)
+static void unregister_snapshot(struct snapshot *s)
 {
-	unsigned long mem;
-
-	mem = num_physpages << PAGE_SHIFT;
-	mem /= 50;
-	mem /= sizeof(struct list_head);
+	struct origin *o;
 
-	return mem;
+	down_write(&_origins_lock);
+	o = __lookup_origin(etable_src_dev(s->etable));
+	__remove_snapshot(s);
+	__put_origin(o);
+	up_write(&_origins_lock);
 }
 
-/*
- * Rounds a number down to a power of 2.
- */
-static inline uint32_t round_down(uint32_t n)
+static struct origin *register_origin(struct dm_dev *dev)
 {
-	while (n & (n - 1))
-		n &= (n - 1);
-	return n;
-}
+	struct origin *o;
 
-/*
- * Allocate room for a suitable hash table.
- */
-static int init_hash_tables(struct dm_snapshot *s)
-{
-	sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
+	down_write(&_origins_lock);
 
-	/*
-	 * Calculate based on the size of the original volume or
-	 * the COW volume...
-	 */
-	cow_dev_size = get_dev_size(s->cow->dev);
-	origin_dev_size = get_dev_size(s->origin->dev);
-	max_buckets = calc_max_buckets();
-
-	hash_size = min(origin_dev_size, cow_dev_size) / s->chunk_size;
-	hash_size = min(hash_size, max_buckets);
-
-	/* Round it down to a power of 2 */
-	hash_size = round_down(hash_size);
-	if (init_exception_table(&s->complete, hash_size))
-		return -ENOMEM;
+	o = __lookup_origin(dev);
+	if (!o) {
+		/* New origin */
+		o = __alloc_origin(dev);
+		if (!o) {
+			up_write(&_origins_lock);
+			return NULL;
+		}
+	}
 
-	/*
-	 * Allocate hash table for in-flight exceptions
-	 * Make this smaller than the real hash table
-	 */
-	hash_size >>= 3;
-	if (!hash_size)
-		hash_size = 64;
+	__get_origin(o);
 
-	if (init_exception_table(&s->pending, hash_size)) {
-		exit_exception_table(&s->complete, exception_cache);
-		return -ENOMEM;
-	}
+	up_write(&_origins_lock);
 
-	return 0;
+	return o;
 }
 
-/*
- * Round a number up to the nearest 'size' boundary.  size must
- * be a power of 2.
- */
-static inline ulong round_up(ulong n, ulong size)
+static void unregister_origin(struct origin *o)
 {
-	size--;
-	return (n + size) & ~size;
+	down_write(&_origins_lock);
+	__put_origin(o);
+	up_write(&_origins_lock);
 }
 
 /*
- * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
+ * Construct a snapshot mapping: <origin-dev> <COW-dev> <p/n> <chunk-size>
  */
 static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
-	struct dm_snapshot *s;
+	struct snapshot *s;
 	unsigned long chunk_size;
 	int r = -EINVAL;
 	char persistent;
 	char *origin_path;
 	char *cow_path;
-	char *value;
-	int blocksize;
 
 	if (argc < 4) {
-		ti->error = "dm-snapshot: requires exactly 4 arguments";
+		ti->error = "dm-snapshot requires exactly 4 arguments";
 		r = -EINVAL;
 		goto bad1;
 	}
@@ -415,434 +217,63 @@
 	origin_path = argv[0];
 	cow_path = argv[1];
 	persistent = toupper(*argv[2]);
-
-	if (persistent != 'P' && persistent != 'N') {
-		ti->error = "Persistent flag is not P or N";
-		r = -EINVAL;
-		goto bad1;
-	}
-
-	chunk_size = simple_strtoul(argv[3], &value, 10);
-	if (chunk_size == 0 || value == NULL) {
-		ti->error = "Invalid chunk size";
-		r = -EINVAL;
-		goto bad1;
-	}
+	chunk_size = simple_strtoul(argv[3], NULL, 10);
 
 	s = kmalloc(sizeof(*s), GFP_KERNEL);
-	if (s == NULL) {
-		ti->error = "Cannot allocate snapshot context private "
-		    "structure";
+	if (!s) {
+		ti->error = "cannot allocate snapshot private structure";
 		r = -ENOMEM;
 		goto bad1;
 	}
 
-	r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
-	if (r) {
-		ti->error = "Cannot get origin device";
-		goto bad2;
-	}
-
-	/* FIXME: get cow length */
-	r = dm_get_device(ti, cow_path, 0, 0,
-			  FMODE_READ | FMODE_WRITE, &s->cow);
-	if (r) {
-		dm_put_device(ti, s->origin);
-		ti->error = "Cannot get COW device";
-		goto bad2;
-	}
-
-	/*
-	 * Chunk size must be multiple of page size.  Silently
-	 * round up if it's not.
-	 */
-	chunk_size = round_up(chunk_size, PAGE_SIZE / SECTOR_SIZE);
-
-	/* Validate the chunk size against the device block size */
-	blocksize = get_hardsect_size(s->cow->dev);
-	if (chunk_size % (blocksize / SECTOR_SIZE)) {
-		ti->error = "Chunk size is not a multiple of device blocksize";
+	s->etable = etable_create(ti, origin_path, cow_path,
+				  chunk_size, persistent);
+	if (!s->etable) {
+		ti->error = "cannot create exception table";
 		r = -EINVAL;
-		goto bad3;
-	}
-
-	/* Check the sizes are small enough to fit in one kiovec */
-	if (chunk_size > KIO_MAX_SECTORS) {
-		ti->error = "Chunk size is too big";
-		r = -EINVAL;
-		goto bad3;
-	}
-
-	/* Check chunk_size is a power of 2 */
-	if (chunk_size & (chunk_size - 1)) {
-		ti->error = "Chunk size is not a power of 2";
-		r = -EINVAL;
-		goto bad3;
-	}
-
-	s->chunk_size = chunk_size;
-	s->chunk_mask = chunk_size - 1;
-	s->type = persistent;
-	for (s->chunk_shift = 0; chunk_size;
-	     s->chunk_shift++, chunk_size >>= 1)
-		;
-	s->chunk_shift--;
-
-	s->valid = 1;
-	s->have_metadata = 0;
-	s->last_percent = 0;
-	init_rwsem(&s->lock);
-	s->table = ti->table;
-
-	/* Allocate hash table for COW data */
-	if (init_hash_tables(s)) {
-		ti->error = "Unable to allocate hash table space";
-		r = -ENOMEM;
-		goto bad3;
-	}
-
-	/*
-	 * Check the persistent flag - done here because we need the iobuf
-	 * to check the LV header
-	 */
-	s->store.snap = s;
-
-	if (persistent == 'P')
-		r = dm_create_persistent(&s->store, s->chunk_size);
-	else
-		r = dm_create_transient(&s->store, s, blocksize);
-
-	if (r) {
-		ti->error = "Couldn't create exception store";
-		r = -EINVAL;
-		goto bad4;
-	}
-
-	r = kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
-	if (r) {
-		ti->error = "Could not create kcopyd client";
-		goto bad5;
+		goto bad2;
 	}
 
 	/* Add snapshot to the list of snapshots for this origin */
 	if (register_snapshot(s)) {
 		r = -EINVAL;
-		ti->error = "Cannot register snapshot origin";
-		goto bad6;
+		ti->error = "cannot register snapshot device";
+		goto bad3;
 	}
 
 	ti->private = s;
 	return 0;
 
- bad6:
-	kcopyd_client_destroy(s->kcopyd_client);
-
- bad5:
-	s->store.destroy(&s->store);
-
- bad4:
-	exit_exception_table(&s->pending, pending_cache);
-	exit_exception_table(&s->complete, exception_cache);
-
  bad3:
-	dm_put_device(ti, s->cow);
-	dm_put_device(ti, s->origin);
-
+	etable_delete(s->etable);
  bad2:
 	kfree(s);
-
  bad1:
 	return r;
 }
 
 static void snapshot_dtr(struct dm_target *ti)
 {
-	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
-
-	dm_table_event(ti->table);
-
+	struct snapshot *s = (struct snapshot *) ti->private;
 	unregister_snapshot(s);
-
-	exit_exception_table(&s->pending, pending_cache);
-	exit_exception_table(&s->complete, exception_cache);
-
-	/* Deallocate memory used */
-	s->store.destroy(&s->store);
-
-	dm_put_device(ti, s->origin);
-	dm_put_device(ti, s->cow);
-	kcopyd_client_destroy(s->kcopyd_client);
+	etable_delete(s->etable);
 	kfree(s);
 }
 
-/*
- * We hold lists of buffer_heads, using the b_reqnext field.
- */
-static void queue_buffer(struct buffer_head **queue, struct buffer_head *bh)
-{
-	bh->b_reqnext = *queue;
-	*queue = bh;
-}
-
-/*
- * FIXME: inefficient.
- */
-static void queue_buffers(struct buffer_head **queue, struct buffer_head *bhs)
-{
-	while (*queue)
-		queue = &((*queue)->b_reqnext);
-
-	*queue = bhs;
-}
-
-/*
- * Flush a list of buffers.
- */
-static void flush_buffers(struct buffer_head *bh)
-{
-	struct buffer_head *n;
-
-	DMDEBUG("begin flush");
-	while (bh) {
-		n = bh->b_reqnext;
-		bh->b_reqnext = NULL;
-		DMDEBUG("flushing %p", bh);
-		generic_make_request(WRITE, bh);
-		bh = n;
-	}
-
-	run_task_queue(&tq_disk);
-}
-
-/*
- * Error a list of buffers.
- */
-static void error_buffers(struct buffer_head *bh)
-{
-	struct buffer_head *n;
-
-	while (bh) {
-		n = bh->b_reqnext;
-		bh->b_reqnext = NULL;
-		buffer_IO_error(bh);
-		bh = n;
-	}
-}
-
-static struct buffer_head *__flush_bhs(struct pending_exception *pe)
-{
-	struct pending_exception *sibling;
-
-	if (list_empty(&pe->siblings))
-		return pe->origin_bhs;
-
-	sibling = list_entry(pe->siblings.next,
-			     struct pending_exception, siblings);
-
-	list_del(&pe->siblings);
-
-	/* FIXME: I think there's a race on SMP machines here, add spin lock */
-	queue_buffers(&sibling->origin_bhs, pe->origin_bhs);
-
-	return NULL;
-}
-
-static void pending_complete(struct pending_exception *pe, int success)
-{
-	struct exception *e;
-	struct dm_snapshot *s = pe->snap;
-	struct buffer_head *flush = NULL;
-
-	if (success) {
-		e = alloc_exception();
-		if (!e) {
-			DMWARN("Unable to allocate exception.");
-			down_write(&s->lock);
-			s->store.drop_snapshot(&s->store);
-			s->valid = 0;
-			flush = __flush_bhs(pe);
-			up_write(&s->lock);
-
-			error_buffers(pe->snapshot_bhs);
-			goto out;
-		}
-
-		/*
-		 * Add a proper exception, and remove the
-		 * in-flight exception from the list.
-		 */
-		down_write(&s->lock);
-
-		memcpy(e, &pe->e, sizeof(*e));
-		insert_exception(&s->complete, e);
-		remove_exception(&pe->e);
-		flush = __flush_bhs(pe);
-
-		/* Submit any pending write BHs */
-		up_write(&s->lock);
-
-		flush_buffers(pe->snapshot_bhs);
-		DMDEBUG("Exception completed successfully.");
-
-		/* Notify any interested parties */
-		if (s->store.fraction_full) {
-			sector_t numerator, denominator;
-			int pc;
-
-			s->store.fraction_full(&s->store, &numerator,
-					       &denominator);
-			pc = numerator * 100 / denominator;
-
-			if (pc >= s->last_percent + WAKE_UP_PERCENT) {
-				dm_table_event(s->table);
-				s->last_percent = pc - pc % WAKE_UP_PERCENT;
-			}
-		}
-
-	} else {
-		/* Read/write error - snapshot is unusable */
-		down_write(&s->lock);
-		if (s->valid)
-			DMERR("Error reading/writing snapshot");
-		s->store.drop_snapshot(&s->store);
-		s->valid = 0;
-		remove_exception(&pe->e);
-		flush = __flush_bhs(pe);
-		up_write(&s->lock);
-
-		error_buffers(pe->snapshot_bhs);
-
-		dm_table_event(s->table);
-		DMDEBUG("Exception failed.");
-	}
-
- out:
-	if (flush)
-		flush_buffers(flush);
-
-	free_pending_exception(pe);
-}
-
-static void commit_callback(void *context, int success)
+static int snapshot_map(struct dm_target *ti, struct buffer_head *bh,
+			int rw, union map_info *map_context)
 {
-	struct pending_exception *pe = (struct pending_exception *) context;
-	pending_complete(pe, success);
-}
-
-/*
- * Called when the copy I/O has finished.  kcopyd actually runs
- * this code so don't block.
- */
-static void copy_callback(int read_err, unsigned int write_err, void *context)
-{
-	struct pending_exception *pe = (struct pending_exception *) context;
-	struct dm_snapshot *s = pe->snap;
-
-	if (read_err || write_err)
-		pending_complete(pe, 0);
-
-	else
-		/* Update the metadata if we are persistent */
-		s->store.commit_exception(&s->store, &pe->e, commit_callback,
-					  pe);
-}
-
-/*
- * Dispatches the copy operation to kcopyd.
- */
-static inline void start_copy(struct pending_exception *pe)
-{
-	struct dm_snapshot *s = pe->snap;
-	struct io_region src, dest;
-	kdev_t dev = s->origin->dev;
-	int *sizes = blk_size[major(dev)];
-	sector_t dev_size = (sector_t) -1;
-
-	if (pe->started)
-		return;
-
-	/* this is protected by snap->lock */
-	pe->started = 1;
-
-	if (sizes && sizes[minor(dev)])
-		dev_size = sizes[minor(dev)] << 1;
-
-	src.dev = dev;
-	src.sector = chunk_to_sector(s, pe->e.old_chunk);
-	src.count = min(s->chunk_size, dev_size - src.sector);
-
-	dest.dev = s->cow->dev;
-	dest.sector = chunk_to_sector(s, pe->e.new_chunk);
-	dest.count = src.count;
-
-	/* Hand over to kcopyd */
-	kcopyd_copy(s->kcopyd_client,
-		    &src, 1, &dest, 0, copy_callback, pe);
-}
-
-/*
- * Looks to see if this snapshot already has a pending exception
- * for this chunk, otherwise it allocates a new one and inserts
- * it into the pending table.
- */
-static struct pending_exception *find_pending_exception(struct dm_snapshot *s,
-							struct buffer_head *bh)
-{
-	struct exception *e;
+	struct snapshot *s = (struct snapshot *) ti->private;
+	struct exception_table *et = s->etable;
 	struct pending_exception *pe;
-	chunk_t chunk = sector_to_chunk(s, bh->b_rsector);
-
-	/*
-	 * Is there a pending exception for this already ?
-	 */
-	e = lookup_exception(&s->pending, chunk);
-	if (e) {
-		/* cast the exception to a pending exception */
-		pe = list_entry(e, struct pending_exception, e);
-
-	} else {
-		/* Create a new pending exception */
-		pe = alloc_pending_exception();
-		pe->e.old_chunk = chunk;
-		pe->origin_bhs = pe->snapshot_bhs = NULL;
-		INIT_LIST_HEAD(&pe->siblings);
-		pe->snap = s;
-		pe->started = 0;
-
-		if (s->store.prepare_exception(&s->store, &pe->e)) {
-			free_pending_exception(pe);
-			s->valid = 0;
-			return NULL;
-		}
-
-		insert_exception(&s->pending, &pe->e);
-	}
-
-	return pe;
-}
-
-static inline void remap_exception(struct dm_snapshot *s, struct exception *e,
-				   struct buffer_head *bh)
-{
-	bh->b_rdev = s->cow->dev;
-	bh->b_rsector = chunk_to_sector(s, e->new_chunk) +
-	    (bh->b_rsector & s->chunk_mask);
-}
-
-static int snapshot_map(struct dm_target *ti, struct buffer_head *bh, int rw,
-			union map_info *map_context)
-{
 	struct exception *e;
-	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
 	int r = 1;
-	chunk_t chunk;
-	struct pending_exception *pe;
-
-	chunk = sector_to_chunk(s, bh->b_rsector);
 
 	/* Full snapshots are not usable */
-	if (!s->valid)
+	/* KMC: Should this check be inside the lock? */
+	if (!etable_valid(et)) {
 		return -1;
+	}
 
 	/*
 	 * Write to snapshot - higher level takes care of RW/RO
@@ -851,29 +282,49 @@
 	 */
 	if (rw == WRITE) {
 
-		down_write(&s->lock);
+		/* Check for a complete-exception first. If one
+		 * exists we can go ahead with the remap.
+		 */
+		etable_read_lock(et);
+		e = _etable_lookup_exception(et, bh);
+		if (e) {
+			etable_read_unlock(et);
+			etable_remap_exception(et, e, bh);
+			goto out;
+		}
 
-		/* If the block is already remapped - use that, else remap it */
-		e = lookup_exception(&s->complete, chunk);
-		if (e)
-			remap_exception(s, e, bh);
+		etable_read_unlock(et);
+		etable_write_lock(et);
 
-		else {
-			pe = find_pending_exception(s, bh);
+		/* Need to check for a complete-exception again since we
+		 * dropped and re-took the lock. If we still don't find
+		 * a complete-exception, we need to create a new remap.
+		 */
+		e = _etable_lookup_exception(et, bh);
+		if (e) {
+			etable_write_unlock(et);
+			etable_remap_exception(et, e, bh);
+			goto out;
+		}
 
+		pe = __etable_lookup_pending_exception(et, bh);
+		if (!pe) {
+			pe = __etable_create_pending_exception(et, bh);
 			if (!pe) {
-				s->store.drop_snapshot(&s->store);
-				s->valid = 0;
+				__etable_invalidate(et);
+				etable_write_unlock(et);
 				r = -EIO;
-			} else {
-				remap_exception(s, &pe->e, bh);
-				queue_buffer(&pe->snapshot_bhs, bh);
-				start_copy(pe);
-				r = 0;
+				goto out;
 			}
+
+			__etable_start_copy(pe);
 		}
 
-		up_write(&s->lock);
+		__etable_queue_dest_buffer(pe, bh);
+		etable_put_pending_exception(pe);
+		r = 0;
+
+		etable_write_unlock(et);
 
 	} else {
 		/*
@@ -883,74 +334,82 @@
 		 * situation where this is wrong - ejt.
 		 */
 
+		/* KMC: Two issues to consider here. First, what happens when
+		 *      we get a snapshot read for a chunk that has a
+		 *      pending exception? Second, what happens when we need to
+		 *      start a new pending exception for a chunk with
+		 *      outstanding snapshot reads?
+		 */
+
 		/* Do reads */
-		down_read(&s->lock);
+		etable_read_lock(et);
 
-		/* See if it it has been remapped */
-		e = lookup_exception(&s->complete, chunk);
-		if (e)
-			remap_exception(s, e, bh);
-		else
-			bh->b_rdev = s->origin->dev;
+		/* See if it has been remapped */
+		e = _etable_lookup_exception(et, bh);
+		if (e) {
+			etable_remap_exception(et, e, bh);
+		} else {
+			struct dm_dev *dev = etable_src_dev(et);
+			bh->b_rdev = dev->dev;
+		}
 
-		up_read(&s->lock);
+		etable_read_unlock(et);
 	}
 
+out:
 	return r;
 }
 
-void snapshot_resume(struct dm_target *ti)
+static void snapshot_resume(struct dm_target *ti)
 {
-	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
-
-	if (s->have_metadata)
-		return;
+	struct snapshot *s = (struct snapshot *) ti->private;
+	int r;
 
-	if (s->store.read_metadata(&s->store)) {
-		down_write(&s->lock);
-		s->valid = 0;
-		up_write(&s->lock);
+	r = etable_read_metadata(s->etable);
+	if (r) {
+		etable_write_lock(s->etable);
+		__etable_invalidate(s->etable);
+		etable_write_unlock(s->etable);
 	}
-
-	s->have_metadata = 1;
 }
 
 static int snapshot_status(struct dm_target *ti, status_type_t type,
 			   char *result, unsigned int maxlen)
 {
-	struct dm_snapshot *snap = (struct dm_snapshot *) ti->private;
-	char cow[16];
-	char org[16];
+	struct snapshot *snap = (struct snapshot *) ti->private;
+	struct exception_table *et = snap->etable;
+	sector_t numerator, denominator;
+	struct dm_dev *dev;
+	char cow[16], org[16];
+	int r;
 
 	switch (type) {
 	case STATUSTYPE_INFO:
-		if (!snap->valid)
+		if (!etable_valid(et)) {
 			snprintf(result, maxlen, "Invalid");
-		else {
-			if (snap->store.fraction_full) {
-				sector_t numerator, denominator;
-				snap->store.fraction_full(&snap->store,
-							  &numerator,
-							  &denominator);
+		} else {
+			r = etable_fraction_full(et, &numerator, &denominator);
+			if (r) {
+				snprintf(result, maxlen, "Unknown");
+			} else {
 				snprintf(result, maxlen,
 					 SECTOR_FORMAT "/" SECTOR_FORMAT,
 					 numerator, denominator);
 			}
-			else
-				snprintf(result, maxlen, "Unknown");
 		}
 		break;
 
 	case STATUSTYPE_TABLE:
 		/*
-		 * kdevname returns a static pointer so we need
-		 * to make private copies if the output is to
-		 * make sense.
+		 * dm_kdevname returns a static pointer so we need to
+		 * make private copies if the output is to make sense.
 		 */
-		strncpy(cow, dm_kdevname(snap->cow->dev), sizeof(cow));
-		strncpy(org, dm_kdevname(snap->origin->dev), sizeof(org));
+		dev = etable_src_dev(et);
+		strncpy(org, dm_kdevname(dev->dev), sizeof(org));
+		dev = etable_dest_dev(et);
+		strncpy(cow, dm_kdevname(dev->dev), sizeof(cow));
 		snprintf(result, maxlen, "%s %s %c %ld", org, cow,
-			 snap->type, snap->chunk_size);
+			 etable_type(et), etable_chunk_size(et));
 		break;
 	}
 
@@ -960,123 +419,109 @@
 /*-----------------------------------------------------------------
  * Origin methods
  *---------------------------------------------------------------*/
-static void list_merge(struct list_head *l1, struct list_head *l2)
-{
-	struct list_head *l1_n, *l2_p;
-
-	l1_n = l1->next;
-	l2_p = l2->prev;
-
-	l1->next = l2;
-	l2->prev = l1;
-
-	l2_p->next = l1_n;
-	l1_n->prev = l2_p;
-}
-
 static int __origin_write(struct list_head *snapshots, struct buffer_head *bh)
 {
-	int r = 1, first = 1;
+	int r = 1;
 	struct list_head *sl;
-	struct dm_snapshot *snap;
+	struct snapshot *snap;
+	struct exception_table *et;
 	struct exception *e;
-	struct pending_exception *pe, *last = NULL;
-	chunk_t chunk;
+	struct pending_exception *pe, *list = NULL;
 
 	/* Do all the snapshots on this origin */
 	list_for_each(sl, snapshots) {
-		snap = list_entry(sl, struct dm_snapshot, list);
+		snap = list_entry(sl, struct snapshot, list);
+		et = snap->etable;
 
 		/* Only deal with valid snapshots */
-		if (!snap->valid)
+		/* KMC: Should this check be inside the lock? */
+		if (!etable_valid(et))
 			continue;
 
-		down_write(&snap->lock);
-
-		/*
-		 * Remember, different snapshots can have
-		 * different chunk sizes.
+		/* Check for a complete-exception first. If one
+		 * exists we can just go on to the next snapshot.
 		 */
-		chunk = sector_to_chunk(snap, bh->b_rsector);
+		etable_read_lock(et);
+		e = _etable_lookup_exception(et, bh);
+		if (e) {
+			etable_read_unlock(et);
+			continue;
+		}
 
-		/*
-		 * Check exception table to see if block
-		 * is already remapped in this snapshot
-		 * and trigger an exception if not.
+		etable_read_unlock(et);
+		etable_write_lock(et);
+
+		/* Need to check for a complete-exception again since we
+		 * dropped and re-took the lock. If we still don't find
+		 * a complete-exception, we need to remap this chunk.
 		 */
-		e = lookup_exception(&snap->complete, chunk);
+		e = _etable_lookup_exception(et, bh);
 		if (!e) {
-			pe = find_pending_exception(snap, bh);
+			pe = __etable_lookup_pending_exception(et, bh);
 			if (!pe) {
-				snap->store.drop_snapshot(&snap->store);
-				snap->valid = 0;
+				pe = __etable_create_pending_exception(et, bh);
+				if (!pe) {
+					__etable_invalidate(et);
+					etable_write_unlock(et);
+					continue;
+				}
+			}
 
+			if (list) {
+				/* KMC: This looks racy!!!
+				 * Two threads could be merging PEs into the
+				 * same list at the same time. The locks in
+				 * the etables aren't sufficient to protect
+				 * this call.
+				 */
+				__etable_merge_pending_exceptions(pe, list);
 			} else {
-				if (last)
-					list_merge(&pe->siblings,
-						   &last->siblings);
-
-				last = pe;
-				r = 0;
+				/* KMC: Can we queue the buffer on this pe,
+				 * since we know this is the first in the list?
+				 * If we do this, we must make an appropriate
+				 * change to etable_start_copies.
+				 */
+				__etable_queue_src_buffer(pe, bh);
 			}
+
+			list = pe;
+			r = 0;
 		}
 
-		up_write(&snap->lock);
+		etable_write_unlock(et);
 	}
 
+	/* KMC: This concerns me. We've locked each individual exception
+	 *      table as we've gathered the list of pending-exceptions. But
+	 *      now we've dropped all of the locks, but we're still holding
+	 *      a list of pending-exceptions. What if one/all of those
+	 *      complete before we get a change to call the next function?
+	 *      My guess is we need some kind of ref-counting for pending
+	 *      exceptions.
+	 */
+
 	/*
 	 * Now that we have a complete pe list we can start the copying.
 	 */
-	if (last) {
-		pe = last;
-		do {
-			down_write(&pe->snap->lock);
-			if (first)
-				queue_buffer(&pe->origin_bhs, bh);
-			start_copy(pe);
-			up_write(&pe->snap->lock);
-			first = 0;
-			pe = list_entry(pe->siblings.next,
-					struct pending_exception, siblings);
-
-		} while (pe != last);
+	if (list) {
+		etable_start_copies(list);
 	}
 
 	return r;
 }
 
 /*
- * Called on a write from the origin driver.
- */
-int do_origin(struct dm_dev *origin, struct buffer_head *bh)
-{
-	struct origin *o;
-	int r;
-
-	down_read(&_origins_lock);
-	o = __lookup_origin(origin->dev);
-	if (!o)
-		BUG();
-
-	r = __origin_write(&o->snapshots, bh);
-	up_read(&_origins_lock);
-
-	return r;
-}
-
-/*
  * Origin: maps a linear range of a device, with hooks for snapshotting.
  */
 
 /*
  * Construct an origin mapping: <dev_path>
- * The context for an origin is merely a 'struct dm_dev *'
- * pointing to the real device.
  */
 static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
-	int r;
 	struct dm_dev *dev;
+	struct origin *o;
+	int r;
 
 	if (argc != 1) {
 		ti->error = "dm-origin: incorrect number of arguments";
@@ -1086,34 +531,51 @@
 	r = dm_get_device(ti, argv[0], 0, ti->len,
 			  dm_table_get_mode(ti->table), &dev);
 	if (r) {
-		ti->error = "Cannot get target device";
+		ti->error = "cannot get origin device";
 		return r;
 	}
 
-	ti->private = dev;
+	o = register_origin(dev);
+	if (!o) {
+		ti->error = "cannot register origin";
+		dm_put_device(ti, dev);
+		return -ENOMEM;
+	}
+
+	ti->private = o;
 	return 0;
 }
 
 static void origin_dtr(struct dm_target *ti)
 {
-	struct dm_dev *dev = (struct dm_dev *) ti->private;
+	struct origin *o = (struct origin *) ti->private;
+	struct dm_dev *dev = o->dev;
+	unregister_origin(o);
 	dm_put_device(ti, dev);
 }
 
-static int origin_map(struct dm_target *ti, struct buffer_head *bh, int rw,
-		      union map_info *map_context)
+static int origin_map(struct dm_target *ti, struct buffer_head *bh,
+		      int rw, union map_info *map_context)
 {
-	struct dm_dev *dev = (struct dm_dev *) ti->private;
-	bh->b_rdev = dev->dev;
+	struct origin *o = (struct origin *) ti->private;
+	int r = 1;
+
+	bh->b_rdev = o->dev->dev;
 
 	/* Only tell snapshots if this is a write */
-	return (rw == WRITE) ? do_origin(dev, bh) : 1;
+	if (rw == WRITE) {
+		down_read(&_origins_lock);
+		r = __origin_write(&o->snapshots, bh);
+		up_read(&_origins_lock);
+	}
+
+	return r;
 }
 
-static int origin_status(struct dm_target *ti, status_type_t type, char *result,
-			 unsigned int maxlen)
+static int origin_status(struct dm_target *ti, status_type_t type,
+			 char *result, unsigned int maxlen)
 {
-	struct dm_dev *dev = (struct dm_dev *) ti->private;
+	struct origin *o = (struct origin *) ti->private;
 
 	switch (type) {
 	case STATUSTYPE_INFO:
@@ -1121,7 +583,7 @@
 		break;
 
 	case STATUSTYPE_TABLE:
-		snprintf(result, maxlen, "%s", dm_kdevname(dev->dev));
+		snprintf(result, maxlen, "%s", dm_kdevname(o->dev->dev));
 		break;
 	}
 
@@ -1153,64 +615,18 @@
 
 	r = dm_register_target(&snapshot_target);
 	if (r) {
-		DMERR("snapshot target register failed %d", r);
+		DMERR("failed to register snapshot target: %d", r);
 		return r;
 	}
 
 	r = dm_register_target(&origin_target);
-	if (r < 0) {
-		DMERR("Device mapper: Origin: register failed %d\n", r);
-		goto bad1;
-	}
-
-	r = init_origin_hash();
 	if (r) {
-		DMERR("init_origin_hash failed.");
-		goto bad2;
-	}
-
-	exception_cache = kmem_cache_create("dm-snapshot-ex",
-					    sizeof(struct exception),
-					    __alignof__(struct exception),
-					    0, NULL, NULL);
-	if (!exception_cache) {
-		DMERR("Couldn't create exception cache.");
-		r = -ENOMEM;
-		goto bad3;
-	}
-
-	pending_cache =
-	    kmem_cache_create("dm-snapshot-in",
-			      sizeof(struct pending_exception),
-			      __alignof__(struct pending_exception),
-			      0, NULL, NULL);
-	if (!pending_cache) {
-		DMERR("Couldn't create pending cache.");
-		r = -ENOMEM;
-		goto bad4;
-	}
-
-	pending_pool = mempool_create(128, mempool_alloc_slab,
-				      mempool_free_slab, pending_cache);
-	if (!pending_pool) {
-		DMERR("Couldn't create pending pool.");
-		r = -ENOMEM;
-		goto bad5;
+		DMERR("failed to register origin target: %d\n", r);
+		dm_unregister_target(&snapshot_target);
+		return r;
 	}
 
 	return 0;
-
-      bad5:
-	kmem_cache_destroy(pending_cache);
-      bad4:
-	kmem_cache_destroy(exception_cache);
-      bad3:
-	exit_origin_hash();
-      bad2:
-	dm_unregister_target(&origin_target);
-      bad1:
-	dm_unregister_target(&snapshot_target);
-	return r;
 }
 
 void dm_snapshot_exit(void)
@@ -1219,14 +635,10 @@
 
 	r = dm_unregister_target(&snapshot_target);
 	if (r)
-		DMERR("snapshot unregister failed %d", r);
+		DMERR("failed to unregister snapshot target: %d", r);
 
 	r = dm_unregister_target(&origin_target);
 	if (r)
-		DMERR("origin unregister failed %d", r);
-
-	exit_origin_hash();
-	mempool_destroy(pending_pool);
-	kmem_cache_destroy(pending_cache);
-	kmem_cache_destroy(exception_cache);
+		DMERR("failed to unregister origin target: %d", r);
 }
+
diff -Naur linux-2.4.22-dm-1/drivers/md/dm-snapshot.h linux-2.4.22-new-snapshot/drivers/md/dm-snapshot.h
--- linux-2.4.22-dm-1/drivers/md/dm-snapshot.h	2003-09-30 11:59:23.000000000 -0500
+++ linux-2.4.22-new-snapshot/drivers/md/dm-snapshot.h	1969-12-31 18:00:00.000000000 -0600
@@ -1,158 +0,0 @@
-/*
- * dm-snapshot.c
- *
- * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_SNAPSHOT_H
-#define DM_SNAPSHOT_H
-
-#include "dm.h"
-#include <linux/blkdev.h>
-
-struct exception_table {
-	uint32_t hash_mask;
-	struct list_head *table;
-};
-
-/*
- * The snapshot code deals with largish chunks of the disk at a
- * time. Typically 64k - 256k.
- */
-/* FIXME: can we get away with limiting these to a uint32_t ? */
-typedef sector_t chunk_t;
-
-/*
- * An exception is used where an old chunk of data has been
- * replaced by a new one.
- */
-struct exception {
-	struct list_head hash_list;
-
-	chunk_t old_chunk;
-	chunk_t new_chunk;
-};
-
-/*
- * Abstraction to handle the meta/layout of exception stores (the
- * COW device).
- */
-struct exception_store {
-
-	/*
-	 * Destroys this object when you've finished with it.
-	 */
-	void (*destroy) (struct exception_store *store);
-
-	/*
-	 * The target shouldn't read the COW device until this is
-	 * called.
-	 */
-	int (*read_metadata) (struct exception_store *store);
-
-	/*
-	 * Find somewhere to store the next exception.
-	 */
-	int (*prepare_exception) (struct exception_store *store,
-				  struct exception *e);
-
-	/*
-	 * Update the metadata with this exception.
-	 */
-	void (*commit_exception) (struct exception_store *store,
-				  struct exception *e,
-				  void (*callback) (void *, int success),
-				  void *callback_context);
-
-	/*
-	 * The snapshot is invalid, note this in the metadata.
-	 */
-	void (*drop_snapshot) (struct exception_store *store);
-
-	/*
-	 * Return how full the snapshot is.
-	 */
-	void (*fraction_full) (struct exception_store *store,
-			       sector_t *numerator,
-			       sector_t *denominator);
-
-	struct dm_snapshot *snap;
-	void *context;
-};
-
-struct dm_snapshot {
-	struct rw_semaphore lock;
-	struct dm_table *table;
-
-	struct dm_dev *origin;
-	struct dm_dev *cow;
-
-	/* List of snapshots per Origin */
-	struct list_head list;
-
-	/* Size of data blocks saved - must be a power of 2 */
-	chunk_t chunk_size;
-	chunk_t chunk_mask;
-	chunk_t chunk_shift;
-
-	/* You can't use a snapshot if this is 0 (e.g. if full) */
-	int valid;
-	int have_metadata;
-
-	/* Used for display of table */
-	char type;
-
-	/* The last percentage we notified */
-	int last_percent;
-
-	struct exception_table pending;
-	struct exception_table complete;
-
-	/* The on disk metadata handler */
-	struct exception_store store;
-
-	struct kcopyd_client *kcopyd_client;
-};
-
-/*
- * Used by the exception stores to load exceptions hen
- * initialising.
- */
-int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new);
-
-/*
- * Constructor and destructor for the default persistent
- * store.
- */
-int dm_create_persistent(struct exception_store *store, uint32_t chunk_size);
-
-int dm_create_transient(struct exception_store *store,
-			struct dm_snapshot *s, int blocksize);
-
-/*
- * Return the number of sectors in the device.
- */
-static inline sector_t get_dev_size(kdev_t dev)
-{
-	int *sizes;
-
-	sizes = blk_size[MAJOR(dev)];
-	if (sizes)
-		return sizes[MINOR(dev)] << 1;
-
-	return 0;
-}
-
-static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
-{
-	return (sector & ~s->chunk_mask) >> s->chunk_shift;
-}
-
-static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
-{
-	return chunk << s->chunk_shift;
-}
-
-#endif
diff -Naur linux-2.4.22-dm-1/drivers/md/dm-table.c linux-2.4.22-new-snapshot/drivers/md/dm-table.c
--- linux-2.4.22-dm-1/drivers/md/dm-table.c	2003-09-30 11:59:23.000000000 -0500
+++ linux-2.4.22-new-snapshot/drivers/md/dm-table.c	2003-09-30 12:01:56.000000000 -0500
@@ -319,16 +319,14 @@
  */
 static int check_device_area(kdev_t dev, sector_t start, sector_t len)
 {
-	int *sizes;
 	sector_t dev_size;
 
-	if (!(sizes = blk_size[major(dev)]) || !(dev_size = sizes[minor(dev)]))
+	dev_size = get_dev_size(dev);
+	if (!dev_size) {
 		/* we don't know the device details,
 		 * so give the benefit of the doubt */
 		return 1;
-
-	/* convert to 512-byte sectors */
-	dev_size <<= 1;
+	}
 
 	return ((start < dev_size) && (len <= (dev_size - start)));
 }
diff -Naur linux-2.4.22-dm-1/drivers/md/dm.c linux-2.4.22-new-snapshot/drivers/md/dm.c
--- linux-2.4.22-dm-1/drivers/md/dm.c	2003-09-30 11:59:23.000000000 -0500
+++ linux-2.4.22-new-snapshot/drivers/md/dm.c	2003-09-30 12:01:52.000000000 -0500
@@ -369,6 +369,7 @@
 	xx(dm_linear)
 	xx(dm_stripe)
 	xx(dm_snapshot)
+	xx(dm_exception)
 	xx(dm_interface)
 #undef xx
 };
@@ -456,7 +457,7 @@
 			unsigned int command, unsigned long a)
 {
 	kdev_t dev = inode->i_rdev;
-	long size;
+	sector_t size;
 
 	switch (command) {
 	case BLKROSET:
diff -Naur linux-2.4.22-dm-1/drivers/md/dm.h linux-2.4.22-new-snapshot/drivers/md/dm.h
--- linux-2.4.22-dm-1/drivers/md/dm.h	2003-09-30 11:59:23.000000000 -0500
+++ linux-2.4.22-new-snapshot/drivers/md/dm.h	2003-09-30 12:01:53.000000000 -0500
@@ -19,6 +19,12 @@
 #define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x)
 #define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x)
 
+#if 0
+#define DMDEBUG(x...) DMWARN( ## x)
+#else
+#define DMDEBUG(x...)
+#endif
+
 /*
  * FIXME: I think this should be with the definition of sector_t
  * in types.h.
@@ -128,6 +134,21 @@
 /*-----------------------------------------------------------------
  * Useful inlines.
  *---------------------------------------------------------------*/
+
+/*
+ * Return the number of sectors in the device.
+ */
+static inline sector_t get_dev_size(kdev_t dev)
+{
+	int *sizes;
+
+	sizes = blk_size[MAJOR(dev)];
+	if (sizes)
+		return sizes[MINOR(dev)] << 1;
+
+	return 0;
+}
+
 static inline int array_too_big(unsigned long fixed, unsigned long obj,
 				unsigned long num)
 {
@@ -172,4 +193,7 @@
 int dm_snapshot_init(void);
 void dm_snapshot_exit(void);
 
+int dm_exception_init(void);
+void dm_exception_exit(void);
+
 #endif