Skip to content

Commit

Permalink
add discard support for vhd
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex Lanzano committed Sep 19, 2018
1 parent 278efd0 commit 9520a10
Show file tree
Hide file tree
Showing 21 changed files with 498 additions and 30 deletions.
4 changes: 3 additions & 1 deletion control/tap-ctl-info.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
#include "tap-ctl.h"

int tap_ctl_info(pid_t pid, unsigned long long *sectors,
unsigned int *sector_size, unsigned int *info, const int minor)
unsigned int *sector_size, unsigned int *info,
bool *discard, const int minor)
{
tapdisk_message_t message;
int err;
Expand All @@ -60,6 +61,7 @@ int tap_ctl_info(pid_t pid, unsigned long long *sectors,
*sectors = message.u.image.sectors;
*sector_size = message.u.image.sector_size;
*info = message.u.image.info;
*discard = message.u.image.discard;
return 0;
} else if (TAPDISK_MESSAGE_ERROR == message.type) {
return -message.u.response.error;
Expand Down
207 changes: 204 additions & 3 deletions drivers/block-vhd.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,19 +107,21 @@ unsigned int SPB;
#endif

/******VHD DEFINES******/
#define VHD_CACHE_SIZE 32
#define VHD_CACHE_SIZE TAPDISK_DATA_REQUESTS

#define VHD_REQS_DATA TAPDISK_DATA_REQUESTS
#define VHD_REQS_META (VHD_CACHE_SIZE + 2)
#define VHD_REQS_TOTAL (VHD_REQS_DATA + VHD_REQS_META)


#define VHD_OP_BAT_WRITE 0
#define VHD_OP_DATA_READ 1
#define VHD_OP_DATA_WRITE 2
#define VHD_OP_BITMAP_READ 3
#define VHD_OP_BITMAP_WRITE 4
#define VHD_OP_ZERO_BM_WRITE 5
#define VHD_OP_REDUNDANT_BM_WRITE 6
#define VHD_OP_DISCARD 7

#define VHD_BM_BAT_LOCKED 0
#define VHD_BM_BAT_CLEAR 1
Expand Down Expand Up @@ -718,6 +720,7 @@ __vhd_open(td_driver_t *driver, const char *name, vhd_flag_t flags)
driver->info.size = s->vhd.footer.curr_size >> VHD_SECTOR_SHIFT;
driver->info.sector_size = VHD_SECTOR_SIZE;
driver->info.info = 0;
driver->info.discard = true;

DBG(TLOG_INFO, "vhd_open: done (sz:%"PRIu64", sct:%lu, inf:%u)\n",
driver->info.size, driver->info.sector_size, driver->info.info);
Expand Down Expand Up @@ -1240,7 +1243,7 @@ read_bitmap_cache(struct vhd_state *s, uint64_t sector, uint8_t op)
return VHD_BM_BAT_CLEAR;
}

if (test_batmap(s, blk)) {
if (test_batmap(s, blk) && op != VHD_OP_DISCARD) {
DBG(TLOG_DBG, "batmap set for 0x%04x\n", blk);
return VHD_BM_BIT_SET;
}
Expand Down Expand Up @@ -1315,6 +1318,8 @@ aio_read(struct vhd_state *s, struct vhd_request *req, uint64_t offset)
{
struct tiocb *tiocb = &req->tiocb;

tiocb->op = TIO_CMD_READ;

td_prep_read(tiocb, s->vhd.fd, req->treq.buf,
vhd_sectors_to_bytes(req->treq.secs),
offset, vhd_complete, req);
Expand All @@ -1331,6 +1336,8 @@ aio_write(struct vhd_state *s, struct vhd_request *req, uint64_t offset)
{
struct tiocb *tiocb = &req->tiocb;

tiocb->op = TIO_CMD_WRITE;

td_prep_write(tiocb, s->vhd.fd, req->treq.buf,
vhd_sectors_to_bytes(req->treq.secs),
offset, vhd_complete, req);
Expand All @@ -1342,6 +1349,22 @@ aio_write(struct vhd_state *s, struct vhd_request *req, uint64_t offset)
TRACE(s);
}

static inline void
aio_discard(struct vhd_state *s, struct vhd_request *req, uint64_t offset)
{
struct tiocb *tiocb = &req->tiocb;

tiocb->op = TIO_CMD_DISCARD;

td_prep_write(tiocb, s->vhd.fd, req->treq.buf,
vhd_sectors_to_bytes(req->treq.secs),
offset, vhd_complete, req);
tiocb->iocb.aio_lio_opcode = IO_CMD_NOOP;
td_queue_tiocb(s->driver, tiocb);
s->queued++;

}

/**
* Reserves a new extent.
*
Expand Down Expand Up @@ -1704,6 +1727,50 @@ schedule_data_write(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
return 0;
}

static int
schedule_discard(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
{
uint64_t offset;
uint32_t blk = 0, sec = 0;
struct vhd_bitmap *bm = NULL;
struct vhd_request *req;

blk = treq.sec / s->spb;
sec = treq.sec % s->spb;

offset = bat_entry(s, blk);
offset += s->bm_secs + sec;
offset = vhd_sectors_to_bytes(offset);

req = alloc_vhd_request(s);
if (!req) {
return -EBUSY;
}

req->treq = treq;
req->op = VHD_OP_DISCARD;
req->next = NULL;


if (test_vhd_flag(flags, VHD_FLAG_REQ_UPDATE_BITMAP)) {
bm = get_bitmap(s, blk);
ASSERT(bm && bitmap_valid(bm));
lock_bitmap(bm);

if (bm->tx.closed) {
add_to_tail(&bm->queue, req);
set_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED);
} else
add_to_transaction(&bm->tx, req);
} else if (sec == 0 && /* first sector inside data block */
s->vhd.footer.type != HD_TYPE_FIXED &&
bat_entry(s, blk) != s->first_db &&
test_batmap(s, blk))
schedule_redundant_bm_write(s, blk);
aio_discard(s, req, offset);
return 0;
}

static int
schedule_bitmap_read(struct vhd_state *s, uint32_t blk)
{
Expand Down Expand Up @@ -1982,6 +2049,92 @@ vhd_queue_write(td_driver_t *driver, td_request_t treq)
}
}

static void
vhd_queue_discard(td_driver_t *driver, td_request_t treq)
{
struct vhd_state *s = (struct vhd_state *)driver->data;

DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", secs: 0x%04x, (seg: %d)\n",
s->vhd.file, treq.sec, treq.secs, treq.sidx);

if (s->vhd.footer.type == HD_TYPE_FIXED)
return;

while (treq.secs) {
int err;
uint8_t flags;
td_request_t clone;

err = 0;
flags = 0;
clone = treq;

switch (read_bitmap_cache(s, clone.sec, VHD_OP_DISCARD)) {
case -EINVAL:
err = -EINVAL;
goto fail;

case VHD_BM_BAT_LOCKED:
err = -EBUSY;
goto fail;

case VHD_BM_BAT_CLEAR:
clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
break;

case VHD_BM_BIT_CLEAR:
clone.secs = read_bitmap_cache_span(s, clone.sec, clone.secs, 0);
err = schedule_discard(s, clone, 0);
if (err)
goto fail;
break;

case VHD_BM_BIT_SET:
flags = VHD_FLAG_REQ_UPDATE_BITMAP;
clone.secs = read_bitmap_cache_span(s, clone.sec, clone.secs, 1);
err = schedule_discard(s, clone, flags);
if (err)
goto fail;
break;

case VHD_BM_NOT_CACHED:
clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
err = schedule_bitmap_read(s, clone.sec / s->spb);
if (err)
goto fail;

err = __vhd_queue_request(s, VHD_OP_DISCARD, clone);
if (err)
goto fail;
break;

case VHD_BM_READ_PENDING:
clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
err = __vhd_queue_request(s, VHD_OP_DISCARD, clone);
if (err)
goto fail;
break;

default:
ASSERT(0);
break;
}

treq.sec += clone.secs;
treq.secs -= clone.secs;
continue;

fail:
DPRINTF("FAIL!");
clone.secs = treq.secs;
td_complete_request(clone, err);
break;
}

//td_complete_request(treq, err);

}

static inline void
signal_completion(struct vhd_request *list, int error)
{
Expand Down Expand Up @@ -2263,12 +2416,15 @@ finish_bitmap_read(struct vhd_request *req)
free_vhd_request(s, r);

ASSERT(tmp.op == VHD_OP_DATA_READ ||
tmp.op == VHD_OP_DATA_WRITE);
tmp.op == VHD_OP_DATA_WRITE ||
tmp.op == VHD_OP_DISCARD);

if (tmp.op == VHD_OP_DATA_READ)
vhd_queue_read(s->driver, tmp.treq);
else if (tmp.op == VHD_OP_DATA_WRITE)
vhd_queue_write(s->driver, tmp.treq);
else if (tmp.op == VHD_OP_DISCARD)
vhd_queue_discard(s->driver, tmp.treq);

r = next;
}
Expand Down Expand Up @@ -2359,6 +2515,46 @@ finish_data_write(struct vhd_request *req)
}
}

static void
finish_discard(struct vhd_request *req)
{
int i;
struct vhd_transaction *tx = req->tx;
struct vhd_state *s = (struct vhd_state *)req->state;

set_vhd_flag(req->flags, VHD_FLAG_REQ_FINISHED);

if (tx) {
uint32_t blk, sec;
struct vhd_bitmap *bm;

blk = req->treq.sec / s->spb;
sec = req->treq.sec % s->spb;
bm = get_bitmap(s, blk);

ASSERT(bm && bitmap_valid(bm) && bitmap_locked(bm));

tx->finished++;

DBG(TLOG_DBG, "lsec: 0x%08"PRIx64", blk: 0x04%"PRIx64", "
"tx->started: %d, tx->finished: %d\n", req->treq.sec,
req->treq.sec / s->spb, tx->started, tx->finished);

if (!req->error)
for (i = 0; i < req->treq.secs; i++)
vhd_bitmap_clear(&s->vhd, bm->shadow, sec + i);

if (transaction_completed(tx))
finish_data_transaction(s, bm);

} else if (!test_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED)) {
ASSERT(!req->next);
DBG(TLOG_DBG, "lsec: 0x%08"PRIx64", blk: 0x%04"PRIx64"\n",
req->treq.sec, req->treq.sec / s->spb);
signal_completion(req, 0);
}
}

void
vhd_complete(void *arg, struct tiocb *tiocb, int err)
{
Expand Down Expand Up @@ -2407,6 +2603,10 @@ vhd_complete(void *arg, struct tiocb *tiocb, int err)
finish_bat_write(req);
break;

case VHD_OP_DISCARD:
finish_discard(req);
break;

default:
ASSERT(0);
break;
Expand Down Expand Up @@ -2494,6 +2694,7 @@ struct tap_disk tapdisk_vhd = {
.td_close = _vhd_close,
.td_queue_read = vhd_queue_read,
.td_queue_write = vhd_queue_write,
.td_queue_discard = vhd_queue_discard,
.td_get_parent_id = vhd_get_parent_id,
.td_validate_parent = vhd_validate_parent,
.td_debug = vhd_debug,
Expand Down
6 changes: 6 additions & 0 deletions drivers/tapdisk-filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,9 @@ tapdisk_filter_iocbs(struct tfilter *filter, struct iocb **iocbs, int num)
for (i = 0; i < num; i++) {
struct iocb *io = iocbs[i];

if (io->aio_lio_opcode == IO_CMD_NOOP)
continue;

if (filter->mode & TD_INJECT_FAULTS) {
if ((random() % 100) <= TD_FAULT_RATE) {
inject_fault(filter, io);
Expand All @@ -260,6 +263,9 @@ tapdisk_filter_events(struct tfilter *filter, struct io_event *events, int num)
for (i = 0; i < num; i++) {
struct iocb *io = events[i].obj;

if (io->aio_lio_opcode == IO_CMD_NOOP)
continue;

if (filter->mode & TD_INJECT_FAULTS) {
if (fault_injected(filter, io)) {
recover_fault(filter, io);
Expand Down
8 changes: 6 additions & 2 deletions drivers/tapdisk-image.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,17 @@ tapdisk_image_check_td_request(td_image_t *image, td_request_t treq)
info = &image->info;
rdonly = td_flag_test(image->flags, TD_OPEN_RDONLY);

if (treq.op != TD_OP_READ && treq.op != TD_OP_WRITE)
if (treq.op != TD_OP_READ && treq.op != TD_OP_WRITE &&
treq.op != TD_OP_DISCARD)
goto fail;

if (treq.op == TD_OP_WRITE && rdonly) {
err = -EPERM;
goto fail;
}

if (treq.secs <= 0 || treq.sec + treq.secs > info->size)
if ((treq.secs <= 0 || treq.sec + treq.secs > info->size) &&
treq.op != TD_OP_DISCARD)
goto fail;

return 0;
Expand Down Expand Up @@ -153,6 +155,8 @@ tapdisk_image_check_request(td_image_t *image, td_vbd_request_t *vreq)
secs += vreq->iov[i].secs;

switch (vreq->op) {
case TD_OP_DISCARD:
/* falls through */
case TD_OP_WRITE:
if (rdonly) {
err = -EPERM;
Expand Down

0 comments on commit 9520a10

Please sign in to comment.