/*
 * LVM kiobuf-based I/O support (C) 2000, 2001 Silicon Graphics, Inc.
 * Written by Martin K. Petersen <mkp@mkp.net>
 *
 * LVM is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * LVM is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 * License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with GNU CC; see the file COPYING.  If not, write to
 * the Free Software Foundation, 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 * 
 */

#include <linux/config.h>
#include <linux/version.h>

#include <linux/kernel.h>
#include <linux/vmalloc.h>
#include <linux/smp_lock.h>

#include <linux/devfs_fs_kernel.h>
#include <linux/proc_fs.h>
#include <linux/lvm.h>

#include <linux/fs.h>
#include <linux/iobuf.h>
#include <linux/blkdev.h>

#include "lvm-internal.h"


/*
 * Completion function for kiobuf clone I/O
 */

static void 
lvm_kio_end_io (struct kiobuf *kiobuf)
{
	struct kiobuf *parent = (struct kiobuf *)(kiobuf->k_dev_id);
	
	if (!parent)
		BUG();

	printk ("lvm_kio_end_io: %lx for %lx\n", &kiobuf, parent);

	/* Bail out on error. REVISIT */
	if (kiobuf->errno != 0)
		parent->errno = kiobuf->errno;

	/* Commit suicide */
	free_kiovec (1, &kiobuf);

	/* Call parent I/O completion function when done */
	if (atomic_dec_and_test (&parent->io_count))
		parent->end_io (parent);
}


/*
 * Request splitter for concatenated kiobuf based I/O 
 */

static void 
lvm_kio_concat (int rw, struct kiobuf *parent, kdev_t dev, ulong sector)
{
	lv_t *lv; 
	ulong i = 0;

	/* We increment the iocount so the parent I/O completion function
	 * won't be called until all sub-requests have been submitted. 
	 */
	atomic_set (&parent->io_count, 1);

	/* Fetch logical volume structure */
	lv = vg[VG_BLK(MINOR(dev))]->lv[LV_BLK(MINOR(dev))];

	/* Iterate over the kiobuf with sector granularity */
	while (i < parent->length) {
		ulong extent;	 	  /* Logical extent number */
		ulong extoffset; 	  /* Offset within extent */
		ulong rsector;	 	  /* Physical sector */
		ulong start_sector;	  /* Sub-request phys. start sector */
		ulong prev_sector;	  /* Previous physical sector */
		ulong partial;		  /* Partial first page */
		kdev_t rdev = 0;	  /* Physical device */
		kdev_t prev_dev = 0;	  /* Previous physical device */
		struct kiobuf *clone; 	  /* Sub-request */

		/* Allocate new sub-request. REVISIT */
		if (alloc_kiovec (1, &clone) != 0)
			BUG();

		/* Assemble clone kiobuf */
		clone->errno = 0;
		clone->end_io = lvm_kio_end_io;
		clone->k_dev_id = parent;
		clone->maplist = parent->maplist;
		clone->offset = parent->offset + i;
		clone->length = 512;

		/* First sector */
		extent = sector / vg[VG_BLK(MINOR(dev))]->pe_size;
		extoffset = sector % vg[VG_BLK(MINOR(dev))]->pe_size;
		rsector = lv->lv_current_pe[extent].pe + extoffset;
		rdev = prev_dev = lv->lv_current_pe[extent].dev;
		prev_sector = start_sector = rsector;
		i += 512;
		sector++;

		/* Add sectors until we cross a boundary */
		while (i < parent->length) {
			prev_sector = rsector;
			prev_dev = rdev;

			extent = sector / vg[VG_BLK(MINOR(dev))]->pe_size;
			extoffset = sector % vg[VG_BLK(MINOR(dev))]->pe_size;
			rsector = lv->lv_current_pe[extent].pe + extoffset;
			rdev = lv->lv_current_pe[extent].dev;

			if (rdev != prev_dev || rsector != prev_sector + 1 ||
			    i >= parent->length) 
				continue;

			clone->length += 512;
			i += 512;
			sector++;
		}

		/* Calculate number of pages */
		partial = PAGE_SIZE - (clone->offset % PAGE_SIZE);

		if (partial)
			clone->nr_pages = 1;
		else
			clone->nr_pages = 0;

		clone->nr_pages += ((clone->length - partial + ~PAGE_MASK)
				    >> PAGE_SHIFT);

		/* Submit I/O */
		atomic_inc (&parent->io_count);
		generic_make_request (rw, NULL, clone, prev_dev, start_sector,
				      512);
	}

	/* Decrement the io_count and call the completion function if all
	 * sub-requests have completed.
	 */
	if (atomic_dec_and_test (&parent->io_count))
		parent->end_io (parent);
}


/*
 * Request splitter for striped kiobuf based I/O 
 */

static void 
lvm_kio_stripe (int rw, struct kiobuf *parent, kdev_t dev, ulong sector)
{
	lv_t *lv; 
	ulong i = 0;

	/* We increment the iocount so the parent I/O completion function
	 * won't be called until all sub-requests have been submitted. 
	 */
	atomic_set (&parent->io_count, 1);

	/* Fetch logical volume structure */
	lv = vg[VG_BLK(MINOR(dev))]->lv[LV_BLK(MINOR(dev))];

	printk ("lvm_stripe_kio: Splitting a %ld byte request\n",
		parent->length);

	/* Iterate over the kiobuf with sector granularity */
	while (i < parent->length) {
		ulong sarea;
                ulong swidth = lv->lv_stripesize;
                ulong stripe;
                ulong sunit;
		ulong rsector;	 	  /* Physical sector */
		ulong start_sector;	  /* Sub-request phys. start sector */
		ulong prev_sector;	  /* Previous physical sector */
		ulong partial;		  /* Partial first page */
		kdev_t rdev = 0;	  /* Physical device */
		kdev_t prev_dev = 0;	  /* Previous physical device */
		struct kiobuf *clone; 	  /* Sub-request */

		/* Allocate new sub-request. REVISIT */
		if (alloc_kiovec (1, &clone) != 0)
			BUG();

		/* Assemble clone kiobuf */
		clone->errno = 0;
		clone->end_io = lvm_kio_end_io;
		clone->k_dev_id = parent;
		clone->maplist = parent->maplist;
		clone->offset = parent->offset + i;
		clone->length = 512;

		/* Number of sectors in all stripes in this group of extents */
                sarea = vg[VG_BLK(MINOR(dev))]->pe_size * lv->lv_stripesize;

                /* Horizontal stripe within the stripe area */
                stripe = (sector % sarea) / swidth;

                /* Select stripe unit in horizontal stripe */
                sunit = sector / sarea + 
                        (stripe % lv->lv_stripes) * 
                        (lv->lv_allocated_le / lv->lv_stripes);

                /* Find physical sector for the I/O */
                rsector = lv->lv_current_pe[sunit].pe +
                        (sector % sarea) -
                        (stripe % lv->lv_stripes) * swidth -
                        stripe / lv->lv_stripes * 
                        (lv->lv_stripes - 1) * swidth;

                /* And device */
                rdev = lv->lv_current_pe[sunit].dev;

		prev_sector = start_sector = rsector;
		i += 512;
		sector++;

		/* Add sectors until we cross a boundary */
		while (i < parent->length) {
			prev_sector = rsector;
			prev_dev = rdev;

			sarea = vg[VG_BLK(MINOR(dev))]->pe_size * 
				lv->lv_stripesize;
			stripe = (sector % sarea) / swidth;
			sunit = sector / sarea + 
				(stripe % lv->lv_stripes) * 
				(lv->lv_allocated_le / lv->lv_stripes);
			rsector = lv->lv_current_pe[sunit].pe +
				(sector % sarea) -
				(stripe % lv->lv_stripes) * swidth -
				stripe / lv->lv_stripes * 
				(lv->lv_stripes - 1) * swidth;
			rdev = lv->lv_current_pe[sunit].dev;
			
			if (rdev != prev_dev || rsector != prev_sector + 1 ||
			    i >= parent->length) 
				goto blah;

			clone->length += 512;
			i += 512;
			sector++;
		}
	blah:

		/* Calculate number of pages */
		partial = PAGE_SIZE - (clone->offset % PAGE_SIZE);

		if (partial)
			clone->nr_pages = 1;
		else
			clone->nr_pages = 0;

		clone->nr_pages += ((clone->length - partial) >> PAGE_SHIFT);

		if ((clone->length - partial) % PAGE_SIZE)
			clone->nr_pages++;

		/* Submit I/O */
		atomic_inc (&parent->io_count);

printk ("lvm_stripe_kio: %d for dev = %x, offset = %ld, length = %ld\n",
	parent->io_count, prev_dev, clone->offset, clone->length);

		generic_make_request (rw, NULL, clone, prev_dev, start_sector,
				      512);
	}

	/* Decrement the io_count and call the completion function if all
	 * sub-requests have completed.
	 */
	if (atomic_dec_and_test (&parent->io_count))
		parent->end_io (parent);
}


/*
 * Mapping function for kiobuf based I/O
 */
int 
lvm_kio_map (struct kiobuf *kiobuf, int rw, kdev_t dev, unsigned int sector, 
	     unsigned int count)
{
	lv_t *lv; 

	/* Sanity checking */
	if (!kiobuf || !dev || !count) {
		printk (KERN_CRIT "lvm_map_kio: Invalid arguments\n");
		return 0;
	}

	/* Check that this Logical Volume actually exists... */
	lv = vg[VG_BLK(MINOR(dev))]->lv[LV_BLK(MINOR(dev))];

	if (!lv) {
		printk (KERN_CRIT "lvm_map_kio: %x is not an LV\n", dev);
		goto ioerr;
	}

	/* ...and that it is active... */
	if (! (lv->lv_status & LV_ACTIVE)) {
		printk (KERN_CRIT "lvm_map_kio: I/O on inactive LV %s\n",
			lv->lv_name);
		goto ioerr; 
	}

	/* ...and writable. */
	if ((rw == WRITE) && !(lv->lv_access & LV_WRITE)) {
		printk (KERN_CRIT "lvm_map_kio: Can't write readonly LV %s\n", 
			lv->lv_name);
		goto ioerr;
	}

	/* Running concatenated - the simple case. */
	if (lv->lv_stripes < 2) {
#if 0
		ulong extent;	 /* Logical extent */
		ulong extoffset; /* Offset within extent */
		ulong rsector;	 /* Physical sector */
		kdev_t rdev;	 /* Physical device */

		/* In most cases, pe_size >> io size so we have the following
		 * optimization.  If pe_size ~ io size or we're crossing an
		 * extent boundary, we resort to request splitting.
		 */

		extent = sector / vg[VG_BLK(MINOR(dev))]->pe_size;
		extoffset = sector % vg[VG_BLK(MINOR(dev))]->pe_size;

		/* If kiobuf is contained within a single extent,
		 * submit it directly. REVISIT: Measure whether this
		 * optimization is worth it.
		 */
		if ( (vg[VG_BLK(MINOR(dev))]->pe_size << 9) - extoffset 
		     >= kiobuf->length) 
		{
			/* Calculate remapped offset and device */ 
			rsector = lv->lv_current_pe[extent].pe + extoffset;
			rdev = lv->lv_current_pe[extent].dev; 

			/* Submit single I/O to the underlying device */ 
			generic_make_request(rw, NULL, kiobuf, rdev, rsector,
					     512); 
		}
		else
#endif
			/* Do request splitting */ 
			lvm_kio_concat (rw, kiobuf, dev, sector);
	}
	/* Striping */
	else
		lvm_kio_stripe (rw, kiobuf, dev, sector);

	return 0;

 ioerr:
	kiobuf->errno = -1;
	return 0;

}


/* EOF */
