summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/include/rados.h11
-rw-r--r--src/os/FileStore.cc47
-rw-r--r--src/os/FileStore.h1
-rw-r--r--src/os/ObjectStore.h11
-rw-r--r--src/osd/ReplicatedPG.cc18
-rw-r--r--src/osd/osd_types.cc3
-rw-r--r--src/osdc/Objecter.h6
7 files changed, 97 insertions, 0 deletions
diff --git a/src/include/rados.h b/src/include/rados.h
index 073ad62bd5f..9a7aa159862 100644
--- a/src/include/rados.h
+++ b/src/include/rados.h
@@ -195,6 +195,8 @@ enum {
CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15,
+ CEPH_OSD_OP_PREALLOC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 16,
+
/* omap */
CEPH_OSD_OP_OMAPGETKEYS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17,
CEPH_OSD_OP_OMAPGETVALS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18,
@@ -348,6 +350,11 @@ enum {
CEPH_OSD_CMPXATTR_MODE_U64 = 2
};
+enum {
+ CEPH_OSD_PREALLOC_FLAG_ONCREATE = 1, /* on creation only */
+ CEPH_OSD_PREALLOC_FLAG_IFEXISTS = 2, /* if exists only */
+};
+
/*
* an individual object operation. each may be accompanied by some data
* payload
@@ -389,6 +396,10 @@ struct ceph_osd_op {
__le64 offset, length;
__le64 src_offset;
} __attribute__ ((packed)) clonerange;
+ struct {
+ __le64 offset, length;
+ __le64 flags; /* CEPH_OSD_PREALLOC_FLAG_* */
+ } __attribute__ ((packed)) prealloc;
};
__le32 payload_len;
} __attribute__ ((packed));
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc
index 2c66a5ea7db..e2bd4988a09 100644
--- a/src/os/FileStore.cc
+++ b/src/os/FileStore.cc
@@ -2351,6 +2351,17 @@ unsigned FileStore::_do_transaction(Transaction& t, uint64_t op_seq, int trans_n
}
break;
+ case Transaction::OP_FALLOCATE:
+ {
+ coll_t cid = i.get_cid();
+ hobject_t oid = i.get_oid();
+ uint64_t off = i.get_length();
+ uint64_t len = i.get_length();
+ if (_check_replay_guard(cid, oid, spos) > 0)
+ r = _fallocate(cid, oid, off, len);
+ }
+ break;
+
case Transaction::OP_TRIMCACHE:
{
i.get_cid();
@@ -2932,6 +2943,42 @@ int FileStore::_zero(coll_t cid, const hobject_t& oid, uint64_t offset, size_t l
return ret;
}
+int FileStore::_fallocate(coll_t cid, const hobject_t& oid, uint64_t offset, size_t len)
+{
+ dout(15) << "fallocate " << cid << "/" << oid << " " << offset << "~" << len << dendl;
+ int ret = 0;
+
+ int fd = lfn_open(cid, oid, O_RDONLY);
+ if (fd < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ // try the real way
+#ifdef CEPH_HAVE_FALLOCATE
+# if !defined(DARWIN) && !defined(__FreeBSD__)
+ ret = fallocate(fd, 0, offset, len);
+ if (ret < 0)
+ ret = -errno;
+ goto out_close;
+# endif
+#endif
+
+ // oh well; just make sure we adjust i_size
+ struct stat st;
+ ret = ::fstat(fd, &st);
+ if (ret == 0 && st.st_size < offset + len)
+ ret = ::ftruncate(fd, offset + len);
+ if (ret < 0)
+ ret = -errno;
+
+ out_close:
+ TEMP_FAILURE_RETRY(::close(fd));
+ out:
+ dout(20) << "fallocate " << cid << "/" << oid << " " << offset << "~" << len << " = " << ret << dendl;
+ return ret;
+}
+
int FileStore::_clone(coll_t cid, const hobject_t& oldoid, const hobject_t& newoid,
const SequencerPosition& spos)
{
diff --git a/src/os/FileStore.h b/src/os/FileStore.h
index f18e1f88269..8d300b094e2 100644
--- a/src/os/FileStore.h
+++ b/src/os/FileStore.h
@@ -367,6 +367,7 @@ public:
int _touch(coll_t cid, const hobject_t& oid);
int _write(coll_t cid, const hobject_t& oid, uint64_t offset, size_t len, const bufferlist& bl);
int _zero(coll_t cid, const hobject_t& oid, uint64_t offset, size_t len);
+ int _fallocate(coll_t cid, const hobject_t& oid, uint64_t offset, size_t len);
int _truncate(coll_t cid, const hobject_t& oid, uint64_t size);
int _clone(coll_t cid, const hobject_t& oldoid, const hobject_t& newoid,
const SequencerPosition& spos);
diff --git a/src/os/ObjectStore.h b/src/os/ObjectStore.h
index 439897f273a..3da605494ce 100644
--- a/src/os/ObjectStore.h
+++ b/src/os/ObjectStore.h
@@ -152,6 +152,8 @@ public:
OP_OMAP_SETKEYS = 32, // cid, attrset
OP_OMAP_RMKEYS = 33, // cid, keyset
OP_OMAP_SETHEADER = 34, // cid, header
+
+ OP_FALLOCATE = 35, // cid, oid, offset, length
};
private:
@@ -344,6 +346,15 @@ public:
::encode(len, tbl);
ops++;
}
+ void fallocate(coll_t cid, const hobject_t& oid, uint64_t off, uint64_t len) {
+ __u32 op = OP_FALLOCATE;
+ ::encode(op, tbl);
+ ::encode(cid, tbl);
+ ::encode(oid, tbl);
+ ::encode(off, tbl);
+ ::encode(len, tbl);
+ ops++;
+ }
void truncate(coll_t cid, const hobject_t& oid, uint64_t off) {
__u32 op = OP_TRUNCATE;
::encode(op, tbl);
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 76ad5089493..c86a3d41a27 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -2424,6 +2424,24 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
}
break;
+
+ case CEPH_OSD_OP_PREALLOC:
+ {
+ if ((op.prealloc.flags & CEPH_OSD_PREALLOC_FLAG_ONCREATE) && obs.exists) {
+ // obj already exists, no-op
+ } else if ((op.prealloc.flags & CEPH_OSD_PREALLOC_FLAG_IFEXISTS) && !obs.exists) {
+ // obj does not exit, no-op
+ } else {
+ if (!obs.exists) {
+ ctx->delta_stats.num_objects++;
+ t.touch(coll, soid);
+ obs.exists = true;
+ }
+ t.fallocate(coll, soid, op.prealloc.offset, op.prealloc.length);
+ }
+ }
+ break;
+
case CEPH_OSD_OP_CREATE:
{
int flags = le32_to_cpu(op.flags);
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 4a1b3fcf2ef..329b5d14eb3 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -2747,6 +2747,9 @@ ostream& operator<<(ostream& out, const OSDOp& op)
case CEPH_OSD_OP_ROLLBACK:
out << " " << snapid_t(op.op.snap.snapid);
break;
+ case CEPH_OSD_OP_PREALLOC:
+ out << " " << op.op.prealloc.offset << "~" << op.op.prealloc.length << " flags " << op.op.prealloc.flags;
+ break;
default:
out << " " << op.op.extent.offset << "~" << op.op.extent.length;
if (op.op.extent.truncate_seq)
diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h
index 9a20849d574..3f4c8dde3fe 100644
--- a/src/osdc/Objecter.h
+++ b/src/osdc/Objecter.h
@@ -260,6 +260,12 @@ struct ObjectOperation {
bufferlist bl;
add_data(CEPH_OSD_OP_SPARSE_READ, off, len, bl);
}
+ void prealloc(uint64_t off, uint64_t len, uint64_t flags) {
+ OSDOp& osd_op = add_op(CEPH_OSD_OP_PREALLOC);
+ osd_op.op.prealloc.offset = off;
+ osd_op.op.prealloc.length = len;
+ osd_op.op.prealloc.flags = flags;
+ }
void clone_range(const object_t& src_oid, uint64_t src_offset, uint64_t len, uint64_t dst_offset) {
add_clone_range(CEPH_OSD_OP_CLONERANGE, dst_offset, len, src_oid, src_offset, CEPH_NOSNAP);