diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/include/rados.h | 11 | ||||
-rw-r--r-- | src/os/FileStore.cc | 47 | ||||
-rw-r--r-- | src/os/FileStore.h | 1 | ||||
-rw-r--r-- | src/os/ObjectStore.h | 11 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.cc | 18 | ||||
-rw-r--r-- | src/osd/osd_types.cc | 3 | ||||
-rw-r--r-- | src/osdc/Objecter.h | 6 |
7 files changed, 97 insertions, 0 deletions
diff --git a/src/include/rados.h b/src/include/rados.h index 073ad62bd5f..9a7aa159862 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -195,6 +195,8 @@ enum { CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15, + CEPH_OSD_OP_PREALLOC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 16, + /* omap */ CEPH_OSD_OP_OMAPGETKEYS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17, CEPH_OSD_OP_OMAPGETVALS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18, @@ -348,6 +350,11 @@ enum { CEPH_OSD_CMPXATTR_MODE_U64 = 2 }; +enum { + CEPH_OSD_PREALLOC_FLAG_ONCREATE = 1, /* on creation only */ + CEPH_OSD_PREALLOC_FLAG_IFEXISTS = 2, /* if exists only */ +}; + /* * an individual object operation. each may be accompanied by some data * payload @@ -389,6 +396,10 @@ struct ceph_osd_op { __le64 offset, length; __le64 src_offset; } __attribute__ ((packed)) clonerange; + struct { + __le64 offset, length; + __le64 flags; /* CEPH_OSD_PREALLOC_FLAG_* */ + } __attribute__ ((packed)) prealloc; }; __le32 payload_len; } __attribute__ ((packed)); diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index 2c66a5ea7db..e2bd4988a09 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -2351,6 +2351,17 @@ unsigned FileStore::_do_transaction(Transaction& t, uint64_t op_seq, int trans_n } break; + case Transaction::OP_FALLOCATE: + { + coll_t cid = i.get_cid(); + hobject_t oid = i.get_oid(); + uint64_t off = i.get_length(); + uint64_t len = i.get_length(); + if (_check_replay_guard(cid, oid, spos) > 0) + r = _fallocate(cid, oid, off, len); + } + break; + case Transaction::OP_TRIMCACHE: { i.get_cid(); @@ -2932,6 +2943,42 @@ int FileStore::_zero(coll_t cid, const hobject_t& oid, uint64_t offset, size_t l return ret; } +int FileStore::_fallocate(coll_t cid, const hobject_t& oid, uint64_t offset, size_t len) +{ + dout(15) << "fallocate " << cid << "/" << oid << " " << offset << "~" << len << dendl; + int ret = 0; + + int fd = lfn_open(cid, oid, O_RDONLY); + if (fd < 0) { + ret = -errno; + goto out; + } + + // try the real way +#ifdef CEPH_HAVE_FALLOCATE +# if !defined(DARWIN) && !defined(__FreeBSD__) + ret = fallocate(fd, 0, offset, len); + if (ret < 0) + ret = -errno; + goto out_close; +# endif +#endif + + // oh well; just make sure we adjust i_size + struct stat st; + ret = ::fstat(fd, &st); + if (ret == 0 && st.st_size < offset + len) + ret = ::ftruncate(fd, offset + len); + if (ret < 0) + ret = -errno; + + out_close: + TEMP_FAILURE_RETRY(::close(fd)); + out: + dout(20) << "fallocate " << cid << "/" << oid << " " << offset << "~" << len << " = " << ret << dendl; + return ret; +} + int FileStore::_clone(coll_t cid, const hobject_t& oldoid, const hobject_t& newoid, const SequencerPosition& spos) { diff --git a/src/os/FileStore.h b/src/os/FileStore.h index f18e1f88269..8d300b094e2 100644 --- a/src/os/FileStore.h +++ b/src/os/FileStore.h @@ -367,6 +367,7 @@ public: int _touch(coll_t cid, const hobject_t& oid); int _write(coll_t cid, const hobject_t& oid, uint64_t offset, size_t len, const bufferlist& bl); int _zero(coll_t cid, const hobject_t& oid, uint64_t offset, size_t len); + int _fallocate(coll_t cid, const hobject_t& oid, uint64_t offset, size_t len); int _truncate(coll_t cid, const hobject_t& oid, uint64_t size); int _clone(coll_t cid, const hobject_t& oldoid, const hobject_t& newoid, const SequencerPosition& spos); diff --git a/src/os/ObjectStore.h b/src/os/ObjectStore.h index 439897f273a..3da605494ce 100644 --- a/src/os/ObjectStore.h +++ b/src/os/ObjectStore.h @@ -152,6 +152,8 @@ public: OP_OMAP_SETKEYS = 32, // cid, attrset OP_OMAP_RMKEYS = 33, // cid, keyset OP_OMAP_SETHEADER = 34, // cid, header + + OP_FALLOCATE = 35, // cid, oid, offset, length }; private: @@ -344,6 +346,15 @@ public: ::encode(len, tbl); ops++; } + void fallocate(coll_t cid, const hobject_t& oid, uint64_t off, uint64_t len) { + __u32 op = OP_FALLOCATE; + ::encode(op, tbl); + ::encode(cid, tbl); + ::encode(oid, tbl); + ::encode(off, tbl); + ::encode(len, tbl); + ops++; + } void truncate(coll_t cid, const hobject_t& oid, uint64_t off) { __u32 op = OP_TRUNCATE; ::encode(op, tbl); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 76ad5089493..c86a3d41a27 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -2424,6 +2424,24 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) } } break; + + case CEPH_OSD_OP_PREALLOC: + { + if ((op.prealloc.flags & CEPH_OSD_PREALLOC_FLAG_ONCREATE) && obs.exists) { + // obj already exists, no-op + } else if ((op.prealloc.flags & CEPH_OSD_PREALLOC_FLAG_IFEXISTS) && !obs.exists) { + // obj does not exit, no-op + } else { + if (!obs.exists) { + ctx->delta_stats.num_objects++; + t.touch(coll, soid); + obs.exists = true; + } + t.fallocate(coll, soid, op.prealloc.offset, op.prealloc.length); + } + } + break; + case CEPH_OSD_OP_CREATE: { int flags = le32_to_cpu(op.flags); diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 4a1b3fcf2ef..329b5d14eb3 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -2747,6 +2747,9 @@ ostream& operator<<(ostream& out, const OSDOp& op) case CEPH_OSD_OP_ROLLBACK: out << " " << snapid_t(op.op.snap.snapid); break; + case CEPH_OSD_OP_PREALLOC: + out << " " << op.op.prealloc.offset << "~" << op.op.prealloc.length << " flags " << op.op.prealloc.flags; + break; default: out << " " << op.op.extent.offset << "~" << op.op.extent.length; if (op.op.extent.truncate_seq) diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 9a20849d574..3f4c8dde3fe 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -260,6 +260,12 @@ struct ObjectOperation { bufferlist bl; add_data(CEPH_OSD_OP_SPARSE_READ, off, len, bl); } + void prealloc(uint64_t off, uint64_t len, uint64_t flags) { + OSDOp& osd_op = add_op(CEPH_OSD_OP_PREALLOC); + osd_op.op.prealloc.offset = off; + osd_op.op.prealloc.length = len; + osd_op.op.prealloc.flags = flags; + } void clone_range(const object_t& src_oid, uint64_t src_offset, uint64_t len, uint64_t dst_offset) { add_clone_range(CEPH_OSD_OP_CLONERANGE, dst_offset, len, src_oid, src_offset, CEPH_NOSNAP); |