
This patch contains:

 - Initial support for SCSI PUNCH
 - Initial support for ATA TRIM
 - Initial support for IDE TRIM
 - Support in ata_ram for TRIM

Changes since last version:

 - Added dwmw2's IDE patch
 - Factored some common code from IDE patch and my ATA work into ata.h
 - Changed how we encode the sector/length

Still todo:

 - Figure out how to tell whether a SCSI device supports PUNCH or not
 - Translate ATA TRIM support bit into the SCSI equivalent
 - Actually try this with a drive and not ata_ram.
 - Get an official command from T10 for PUNCH
 - Put the 0x06 into ata.h (first need to merge some other patches)
 - Add discard support to MD/DM

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 5c99ff8..531c18c 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -324,6 +324,8 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
 		clear_bit(BIO_UPTODATE, &bio->bi_flags);
 	}
 
+	if (bio_has_data(bio))
+		__free_page(bio_page(bio));
 	bio_put(bio);
 }
 
@@ -355,7 +357,7 @@ int blkdev_issue_discard(struct block_device *bdev,
 		return -EOPNOTSUPP;
 
 	while (nr_sects && !ret) {
-		bio = bio_alloc(gfp_mask, 0);
+		bio = bio_alloc(gfp_mask, 1);
 		if (!bio)
 			return -ENOMEM;
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 2d053b5..e92e578 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1069,6 +1069,8 @@ EXPORT_SYMBOL(blk_put_request);
 
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
+	might_sleep();
+
 	req->cpu = bio->bi_comp_cpu;
 	req->cmd_type = REQ_TYPE_FS;
 
@@ -1085,7 +1087,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 		req->cmd_flags |= REQ_DISCARD;
 		if (bio_barrier(bio))
 			req->cmd_flags |= REQ_SOFTBARRIER;
-		req->q->prepare_discard_fn(req->q, req);
+		req->q->prepare_discard_fn(req->q, req, bio);
 	} else if (unlikely(bio_barrier(bio)))
 		req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
 
diff --git a/drivers/ata/ata_ram.c b/drivers/ata/ata_ram.c
index 0405781..59e9c17 100644
--- a/drivers/ata/ata_ram.c
+++ b/drivers/ata/ata_ram.c
@@ -215,6 +215,25 @@ static struct page *ata_ram_alloc_page(struct ata_ram_port *arport,
 	return page;
 }
 
+static void ata_ram_free_pages(struct ata_ram_port *arport,
+				unsigned long pfn, unsigned long last_pfn)
+{
+	unsigned int count;
+
+	for (;;) {
+		struct page *page;
+		count = radix_tree_gang_lookup(&arport->root, (void **)&page,
+								pfn, 1);
+		if (count == 0)
+			break;
+		pfn = page->private;
+		if (pfn > last_pfn)
+			break;
+		radix_tree_delete(&arport->root, pfn);
+		__free_page(page);
+	}
+}
+
 /*
  * We could steal the pages we need from the requests as they come in, which
  * is what rd.c does.  However, that's not a realistic simulator of how a
@@ -511,6 +530,46 @@ static int ata_ram_write_fpdma(struct ata_queued_cmd *qc)
 	return ata_ram_write(qc, first_block, length, 1);
 }
 
+static int ata_ram_trim(struct ata_queued_cmd *qc)
+{
+	struct ata_ram_port *arport = qc->ap->private_data;
+	struct scatterlist *sg;
+	unsigned i;
+
+	if (preallocate)
+		return 0;
+
+	for_each_sg(qc->sg, sg, qc->n_elem, i) {
+		struct page *sgpage = sg_page(sg);
+		unsigned int offset = sg->offset;
+		unsigned int length = sg->length;
+		unsigned char *buffer = get_sg_page(sgpage);
+
+		for (;;) {
+			u64 entry = le64_to_cpup((u64 *)(buffer + offset));
+			u64 start = (entry & 0xffffffffffffULL) * sector_size;
+			u32 len = (entry >> 48) * sector_size;
+			u64 start_pfn = DIV_ROUND_UP(start, PAGE_SIZE);
+			u64 end_pfn = (start + len) / PAGE_SIZE;
+
+			if (len == 0)
+				break;
+
+			ata_ram_free_pages(arport, start_pfn, end_pfn);
+			offset += 8;
+			if (offset >= length)
+				break;
+			if (offset == PAGE_SIZE) {
+				offset = 0;
+				put_sg_page(buffer);
+				buffer = get_sg_page(++sgpage);
+			}
+		}
+		put_sg_page(buffer);
+	}
+	return 0;
+}
+
 static void ata_ram_identify(struct ata_queued_cmd *qc)
 {
 	u16 id[256];
@@ -626,6 +685,9 @@ static void ata_ram_execute_command(struct ata_queued_cmd *qc)
 	case ATA_CMD_FPDMA_WRITE:
 		err = ata_ram_write_fpdma(qc);
 		break;
+	case 0x06:
+		err = ata_ram_trim(qc);
+		break;
 	case ATA_CMD_FLUSH:
 	case ATA_CMD_FLUSH_EXT:
 	case ATA_CMD_VERIFY:
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index da60d3d..43720b2 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1687,6 +1687,38 @@ nothing_to_do:
 	return 1;
 }
 
+static unsigned int ata_scsi_punch_xlat(struct ata_queued_cmd *qc)
+{
+	struct scsi_cmnd *scmd = qc->scsicmd;
+	struct request *req = scmd->request;
+	char *buffer = bio_data(req->bio);
+	struct ata_taskfile *tf = &qc->tf;
+	unsigned i = 0, size;
+
+	/*
+	 * Ignore what SCSI has written to the buffer.  Will make it easier
+	 * to implement TRIM when ATA is no longer part of SCSI.
+	 */
+
+	i = ata_set_lba_range_entries(buffer, PAGE_SIZE / 8,
+					req->sector, req->nr_sectors);
+	size = ALIGN(i * 8, 512);
+	memset(buffer + i * 8, 0, size - i * 8);
+
+	qc->flags |= ATA_QCFLAG_IO;
+	qc->nbytes = size;
+
+	tf->feature = 0x01;		/* TRIM */
+	tf->hob_feature = 0x00;
+	tf->command = 0x06;		/* Data Set Management */
+	tf->nsect = size / 512;
+	tf->hob_nsect = (size / 512) >> 8;
+	tf->protocol = ATA_PROT_DMA;
+	tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_WRITE;
+
+	return 0;
+}
+
 static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
@@ -2941,6 +2973,9 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd)
 	case VERIFY_16:
 		return ata_scsi_verify_xlat;
 
+	case PUNCH:
+		return ata_scsi_punch_xlat;
+
 	case ATA_12:
 	case ATA_16:
 		return ata_scsi_pass_thru;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 3853bde..bee6923 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -433,6 +433,49 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
 	rq->special = task;
 }
 
+static int idedisk_prepare_discard(struct request_queue *q, struct request *rq,
+				   struct bio *bio)
+{
+	ide_task_t *task;
+	struct page *page;
+	char *buffer;
+	unsigned i, size;
+
+	/* FIXME: map struct ide_taskfile on rq->cmd[] */
+	task = kzalloc(sizeof(*task), GFP_KERNEL);
+	if (!task)
+		return -ENOMEM;
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page) {
+		kfree(task);
+		return -ENOMEM;
+	}
+
+	buffer = page_address(page);
+	i = ata_set_lba_range_entries(buffer, PAGE_SIZE / 8,
+					bio->bi_sector, bio_sectors(bio));
+	size = ALIGN(i * 8, 512);
+	memset(buffer + i * 8, 0, size - i * 8);
+	bio_add_pc_page(q, bio, page, i * 8, 0);
+
+	task->tf.command = 0x06; /* Data Set Management */
+	task->tf.feature = 0x01; /* TRIM */
+	task->tf.hob_feature = 0x00;
+	task->tf.nsect = size / 512;
+	task->tf.hob_nsect = (size / 512) >> 8;
+
+	task->tf_flags	 = IDE_TFLAG_LBA48 | IDE_TFLAG_OUT_HOB |
+			   IDE_TFLAG_OUT_TF | IDE_TFLAG_OUT_DEVICE |
+			   IDE_TFLAG_DYN;
+	task->data_phase = TASKFILE_OUT_DMA;
+
+	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
+	rq->special = task;
+
+	return 0;
+}
+
 ide_devset_get(multcount, mult_count);
 
 /*
@@ -550,6 +593,9 @@ static int set_wcache(ide_drive_t *drive, int arg)
 
 	update_ordered(drive);
 
+	if (ata_id_has_trim(drive->id))
+		blk_queue_set_discard(drive->queue, idedisk_prepare_discard);
+
 	return err;
 }
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 77c6eae..0f9b49c 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -576,7 +576,12 @@ void ide_map_sg(ide_drive_t *drive, struct request *rq)
 	if (hwif->sg_mapped)	/* needed by ide-scsi */
 		return;
 
-	if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) {
+	if (blk_discard_rq(rq)) {
+		ide_task_t *task = rq->special;
+		unsigned len = (task->tf.hob_nsect << 8) + task->tf.nsect;
+		sg_init_one(sg, rq->buffer, len * 512);
+		hwif->sg_nents = 1;
+	} else if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) {
 		hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
 	} else {
 		sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index a7b53be..a160cbc 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -49,6 +49,7 @@
 #include <linux/mutex.h>
 #include <linux/string_helpers.h>
 #include <asm/uaccess.h>
+#include <asm/unaligned.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -368,6 +369,32 @@ static void scsi_disk_put(struct scsi_disk *sdkp)
 }
 
 /**
+ * sd_discard_fn - Prepare a discard request for transmission
+ * XXX: Add support for sending multiple extents to PUNCH.
+ */
+static int sd_discard_fn(struct request_queue *q, struct request *rq,
+							struct bio *bio)
+{
+	char *punch;
+	struct page *page = alloc_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+	bio_add_pc_page(q, bio, page, 24, 0);
+
+	punch = bio_data(bio);
+	memset(punch, 0, 12);
+	put_unaligned_be64(bio->bi_sector, punch + 12);
+	put_unaligned_be32(bio_sectors(bio), punch + 20);
+
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	rq->cmd_len = 10;
+	rq->cmd[0] = PUNCH;
+	put_unaligned_be16(24, rq->cmd + 7);
+
+	return 0;
+}
+
+/**
  *	sd_init_command - build a scsi (read or write) command from
  *	information in the request structure.
  *	@SCpnt: pointer to mid-level's per scsi command structure that
@@ -1901,6 +1928,7 @@ static int sd_probe(struct device *dev)
 	sd_revalidate_disk(gd);
 
 	blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
+	blk_queue_set_discard(sdp->request_queue, sd_discard_fn);
 
 	gd->driverfs_dev = &sdp->sdev_gendev;
 	gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS;
diff --git a/include/linux/ata.h b/include/linux/ata.h
index bc4cf7a..5f5c4bd 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -89,6 +89,7 @@ enum {
 	ATA_ID_DLF		= 128,
 	ATA_ID_CSFO		= 129,
 	ATA_ID_CFA_POWER	= 160,
+	ATA_ID_DATA_SET_MGMT	= 169,
 	ATA_ID_ROT_SPEED	= 217,
 	ATA_ID_PIO4		= (1 << 1),
 
@@ -750,6 +751,14 @@ static inline int ata_id_has_unload(const u16 *id)
 	return 0;
 }
 
+static inline int ata_id_has_trim(const u16 *id)
+{
+	if (ata_id_major_version(id) >= 7 &&
+	    (id[ATA_ID_DATA_SET_MGMT] & 1))
+		return 1;
+	return 0;
+}
+
 static inline int ata_id_current_chs_valid(const u16 *id)
 {
 	/* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command
@@ -885,6 +894,29 @@ static inline void ata_id_to_hd_driveid(u16 *id)
 #endif
 }
 
+/*
+ * Write up to 'max' LBA Range Entries to the buffer that will cover the
+ * extent from sector to sector + count.  This is used for TRIM and for
+ * ADD LBA(S) TO NV CACHE PINNED SET.
+ */
+static inline unsigned ata_set_lba_range_entries(void *_buffer, unsigned max,
+						u64 sector, unsigned long count)
+{
+	__le64 *buffer = _buffer;
+	unsigned i = 0;
+	while (i < max) {
+		u64 entry = sector |
+			((u64)(count > 0xffff ? 0xffff : count) << 48);
+		buffer[i++] = __cpu_to_le64(entry);
+		if (count <= 0xffff)
+			break;
+		count -= 0xffff;
+		sector += 0xffff;
+	}
+
+	return i;
+}
+
 static inline int is_multi_taskfile(struct ata_taskfile *tf)
 {
 	return (tf->command == ATA_CMD_READ_MULTI) ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a92d9e4..302089a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -255,7 +255,8 @@ typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unplug_fn) (struct request_queue *);
-typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
+typedef int (prepare_discard_fn) (struct request_queue *, struct request *,
+							struct bio *bio);
 
 struct bio_vec;
 struct bvec_merge_data {
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 192f871..73b5e93 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -93,6 +93,7 @@
 #define WRITE_LONG            0x3f
 #define CHANGE_DEFINITION     0x40
 #define WRITE_SAME            0x41
+#define PUNCH                 0x42	/* XXX: Provisional */
 #define READ_TOC              0x43
 #define LOG_SELECT            0x4c
 #define LOG_SENSE             0x4d

