block: Export I/O topology for block devices and partitions Export physical start offset and preferred I/O sizes in sysfs. This allows filesystems to align data structures to RAID stripes. `physical offset' indicates the physical offset in sectors to the start of the block device. `optimal_io_block' indicates the smallest request that can be submitted without incurring a performance penalty (RAID read-modify-write, 512-byte sector emulation). It is recommended that I/O requests are a multiple of this size. `optimal-io-length' indicates the optimal I/O length for the device (i.e. stripe size). `maximum-io-length' indicates the largest single I/O the device can handle. These values are largely modeled after the SCSI SBC-3 Block Limits VPD. Block device drivers need to initialize the subsystem using disk_topology_register(). Coherent I/O regions can then be added with disk_topology_add_region(). Use disk_topology_set() from drivers to set characterestics for a given region. Signed-off-by: Martin K. Petersen --- diff --git a/block/Makefile b/block/Makefile --- a/block/Makefile +++ b/block/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-co obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \ - cmd-filter.o + cmd-filter.o io-topology.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o diff --git a/block/io-topology.c b/block/io-topology.c new file mode 100644 --- /dev/null +++ b/block/io-topology.c @@ -0,0 +1,352 @@ +/* + * io-topology.c - Block device topology functions + * + * Copyright (C) 2008 Oracle Corporation + * Written by: Martin K. Petersen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, + * USA. + * + */ + +#include +#include +#include +#include + +struct io_topology_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct io_topology *, char *); +}; + +static ssize_t io_topology_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + struct io_topology *iot = container_of(kobj, struct io_topology, kobj); + struct io_topology_sysfs_entry *entry = + container_of(attr, struct io_topology_sysfs_entry, attr); + + return entry->show(iot, page); +} + +static ssize_t io_topology_region_start_show(struct io_topology *iot, char *page) +{ + return sprintf(page, "%lu\n", iot->region_start); +} + +static ssize_t io_topology_region_length_show(struct io_topology *iot, char *page) +{ + return sprintf(page, "%lu\n", iot->region_length); +} + +static ssize_t io_topology_phys_off_show(struct io_topology *iot, char *page) +{ + return sprintf(page, "%lu\n", iot->phys_off); +} + +static ssize_t io_topology_opt_block_show(struct io_topology *iot, char *page) +{ + return sprintf(page, "%u\n", iot->opt_block); +} + +static ssize_t io_topology_opt_length_show(struct io_topology *iot, char *page) +{ + return sprintf(page, "%u\n", iot->opt_length); +} + +static ssize_t io_topology_max_length_show(struct io_topology *iot, char *page) +{ + return sprintf(page, "%u\n", iot->max_length); +} + +static ssize_t io_topology_dev_type_show(struct io_topology *iot, char *page) +{ + switch (iot->dev_type) { + case IOT_TYPE_DISK: + return sprintf(page, "disk\n"); + case IOT_TYPE_SSD: + return sprintf(page, "ssd\n"); + case IOT_TYPE_ARRAY: + return sprintf(page, "array\n"); + case IOT_TYPE_LVM: + return sprintf(page, "lvm\n"); + case IOT_TYPE_MD: + return sprintf(page, "md\n"); + } + + return sprintf(page, "unknown\n"); +} + +static struct io_topology_sysfs_entry io_topology_region_start_entry = { + .attr = { .name = "region_start", .mode = S_IRUGO }, + .show = io_topology_region_start_show, +}; + +static struct io_topology_sysfs_entry io_topology_region_length_entry = { + .attr = { .name = "region_length", .mode = S_IRUGO }, + .show = io_topology_region_length_show, +}; + +static struct io_topology_sysfs_entry io_topology_phys_off_entry = { + .attr = { .name = "physical_offset", .mode = S_IRUGO }, + .show = io_topology_phys_off_show, +}; + +static struct io_topology_sysfs_entry io_topology_opt_block_entry = { + .attr = { .name = "optimal_block_size", .mode = S_IRUGO }, + .show = io_topology_opt_block_show, +}; + +static struct io_topology_sysfs_entry io_topology_opt_length_entry = { + .attr = { .name = "optimal_io_length", .mode = S_IRUGO }, + .show = io_topology_opt_length_show, +}; + +static struct io_topology_sysfs_entry io_topology_max_length_entry = { + .attr = { .name = "maximal_io_length", .mode = S_IRUGO }, + .show = io_topology_max_length_show, +}; + +static struct io_topology_sysfs_entry io_topology_dev_type_entry = { + .attr = { .name = "device_type", .mode = S_IRUGO }, + .show = io_topology_dev_type_show, +}; + +static struct attribute *io_topology_attrs[] = { + &io_topology_region_start_entry.attr, + &io_topology_region_length_entry.attr, + &io_topology_phys_off_entry.attr, + &io_topology_opt_block_entry.attr, + &io_topology_opt_length_entry.attr, + &io_topology_max_length_entry.attr, + &io_topology_dev_type_entry.attr, + NULL, +}; + +static struct sysfs_ops io_topology_ops = { + .show = &io_topology_attr_show, +}; + +static void io_topology_release(struct kobject *kobj) +{ + struct io_topology *iot = + container_of(kobj, struct io_topology, kobj); + + kfree(iot); +} + +static struct kobj_type io_topology_ktype = { + .default_attrs = io_topology_attrs, + .sysfs_ops = &io_topology_ops, + .release = io_topology_release, +}; + +struct io_topology *disk_topology_add_region(struct gendisk *disk, sector_t start, unsigned long length) +{ + struct io_topology *iot; + + iot = kzalloc(sizeof(struct io_topology), GFP_KERNEL); + if (iot == NULL) { + printk(KERN_ERR "%s: Could not allocate struct io_topology\n", + disk->disk_name); + return NULL; + } + + printk(KERN_ERR "%s: add iot %p, sector %llu, len %lu\n", __func__, + iot, (unsigned long long)start, length); + list_add_tail(&iot->list, &disk->topology_list); + + if (kobject_init_and_add(&iot->kobj, &io_topology_ktype, + disk->topology_dir, "%u", + disk->topology_regions)) { + kfree(iot); + return NULL; + } + + iot->region_start = start; + iot->region_length = length; + iot->region_number = disk->topology_regions; + + disk->topology_regions++; + printk(KERN_ERR "%s: %s topology_regions = %u\n", __func__, + disk->disk_name, disk->topology_regions); + + kobject_uevent(&iot->kobj, KOBJ_ADD); + + return iot; +} + +struct io_topology *disk_topology_lookup_offset(struct gendisk *disk, off_t offset) +{ + struct io_topology *iot; + + list_for_each_entry(iot, &disk->topology_list, list) { + unsigned long start = iot->region_start; + unsigned long end = iot->region_start + iot->region_length; + + if (offset >= start && offset <= end) { + printk(KERN_ERR "%s: %p, offset %lu\n", __func__, iot, offset); + return iot; + } + } + + return NULL; +} + +struct io_topology *disk_topology_lookup_region(struct gendisk *disk, unsigned int region) +{ + struct io_topology *iot; + + list_for_each_entry(iot, &disk->topology_list, list) { + if (iot->region_number == region) { + printk(KERN_ERR "%s: %p, region %u\n", __func__, iot, region); + return iot; + } + } + + return NULL; +} + +void disk_topology_remove_region(struct gendisk *disk, unsigned int region) +{ + struct io_topology *iot = disk_topology_lookup_region(disk, region); + + if (iot) { + list_del(&iot->list); + + kobject_uevent(&iot->kobj, KOBJ_REMOVE); + kobject_del(&iot->kobj); + kobject_put(&iot->kobj); + + kfree(iot); + } +} + +unsigned int disk_topology_set(struct gendisk *disk, unsigned int region, unsigned int key, u64 value) +{ + struct io_topology *iot = disk_topology_lookup_region(disk, region); + + if (iot == NULL) + return -EINVAL; + + switch (key) { + + case IOT_REGION_START: + iot->region_start = value; + break; + + case IOT_REGION_LENGTH: + iot->region_length = value; + break; + + case IOT_PHYS_OFF: + iot->phys_off = value; + break; + + case IOT_OPT_BLOCK_SIZE: + iot->opt_block = value; + break; + + case IOT_OPT_IO_LENTGH: + iot->opt_length = value; + break; + + case IOT_MAX_IO_LENGTH: + iot->max_length = value; + break; + + case IOT_DEV_TYPE: + iot->dev_type = value; + break; + + default: + return -EINVAL; + } + + return 0; +} + +u64 disk_topology_get(struct gendisk *disk, unsigned int region, unsigned int key) +{ + struct io_topology *iot = disk_topology_lookup_region(disk, region); + + if (iot == NULL) + return 0; + + switch (key) { + + case IOT_REGION_START: + return iot->region_start; + + case IOT_REGION_LENGTH: + return iot->region_length; + + case IOT_PHYS_OFF: + return iot->phys_off; + + case IOT_OPT_BLOCK_SIZE: + return iot->opt_block; + + case IOT_OPT_IO_LENTGH: + return iot->opt_length; + + case IOT_MAX_IO_LENGTH: + return iot->max_length; + + case IOT_DEV_TYPE: + return iot->dev_type; + + default: + BUG(); + } + + return 0; +} + +void disk_topology_register(struct gendisk *disk) +{ + struct kobject *kobj; + + BUG_ON(disk->topology_dir); + + kobj = kobject_get(&disk->dev.kobj); + disk->topology_dir = kobject_create_and_add("topology", kobj); + kobject_put(kobj); + + INIT_LIST_HEAD(&disk->topology_list); +} + +void disk_topology_unregister(struct gendisk *disk) +{ + struct io_topology *iot; + + printk(KERN_ERR "%s: unreg %s\n", __func__, disk->disk_name); + + while (!list_empty(&disk->topology_list)) { + iot = list_entry(disk->topology_list.next, struct io_topology, + list); + + printk(KERN_ERR "%s: unreg %p\n", __func__, iot); + + list_del(&iot->list); + + kobject_uevent(&iot->kobj, KOBJ_REMOVE); + kobject_del(&iot->kobj); + kobject_put(&iot->kobj); + } + + kobject_uevent(disk->topology_dir, KOBJ_REMOVE); + kobject_del(disk->topology_dir); + kobject_put(disk->topology_dir); +} + diff --git a/include/linux/genhd.h b/include/linux/genhd.h --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -61,6 +61,41 @@ enum { #include #include #include + +enum io_topology_type { + IOT_TYPE_UNKNOWN, + IOT_TYPE_DISK, /* Spinning media */ + IOT_TYPE_SSD, /* Solid state */ + IOT_TYPE_ARRAY, /* Array controller */ + IOT_TYPE_LVM, /* Logical Volume Manager */ + IOT_TYPE_MD, /* Software RAID */ +}; + +enum io_topology_key { + IOT_REGION_START, + IOT_REGION_LENGTH, + + IOT_PHYS_OFF, + IOT_OPT_BLOCK_SIZE, + IOT_OPT_IO_LENTGH, + IOT_MAX_IO_LENGTH, + IOT_DEV_TYPE, +}; + +struct io_topology { + off_t region_start; /* Start of region described */ + unsigned long region_length; /* Length of region described */ + unsigned int region_number; + + off_t phys_off; /* Offset to physical start sector */ + unsigned int opt_block; /* Optimal block size (stripe chunk) */ + unsigned int opt_length; /* Optimal length (stripe width) */ + unsigned int max_length; /* Max I/O length */ + unsigned int dev_type; /* Disk, array, RAID, etc. */ + + struct list_head list; + struct kobject kobj; +}; struct partition { unsigned char boot_ind; /* 0x80 - active */ @@ -137,6 +172,10 @@ struct gendisk { struct device dev; struct kobject *holder_dir; struct kobject *slave_dir; + + struct kobject *topology_dir; + struct list_head topology_list; + unsigned int topology_regions; struct timer_rand_state *random; int policy; @@ -369,9 +408,23 @@ extern void del_gendisk(struct gendisk * extern void del_gendisk(struct gendisk *gp); extern void unlink_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *part); +extern struct io_geo *io_geo_alloc(struct gendisk *); +extern void io_geo_free(struct gendisk *); +extern struct io_geo *io_geo_lookup(struct gendisk *, off_t); +extern struct io_geo *bdev_geo_lookup(struct block_device *, off_t); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); + +/* block/io-topology.c */ +extern struct io_topology *disk_topology_add_region(struct gendisk *, sector_t, unsigned long); +extern struct io_topology *disk_topology_lookup_offset(struct gendisk *, off_t); +extern struct io_topology *disk_topology_lookup_region(struct gendisk *, unsigned int); +extern void disk_topology_remove_region(struct gendisk *, unsigned int); +extern unsigned int disk_topology_set(struct gendisk *, unsigned int, unsigned int, u64); +extern u64 disk_topology_get(struct gendisk *disk, unsigned int, unsigned int); +extern void disk_topology_register(struct gendisk *); +extern void disk_topology_unregister(struct gendisk *); /* drivers/char/random.c */ extern void add_disk_randomness(struct gendisk *disk);