Added utility scripts to dump/load the contents of DynamoDB tables.

author: Max Noel <mnoel@ludia.com> 2013-02-25 16:02:06 -0500
committer: Max Noel <mnoel@ludia.com> 2013-02-25 16:02:06 -0500
commit: 2d31dff5f1172c566c799a5fd643f8cc94761bf9 (patch)
tree: 577ed7153b6b07e2c816681c017320e2da26e118
parent: d443e9581bd0aaf862d5622b455d9079c9ddb750 (diff)
download: boto-2d31dff5f1172c566c799a5fd643f8cc94761bf9.tar.gz
2 files changed, 183 insertions, 0 deletions
diff --git a/bin/dynamodb_dump b/bin/dynamodb_dump
new file mode 100755
index 00000000..223d529f
--- /dev/null
+++ b/bin/dynamodb_dump
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+import argparse
+import os
+
+import boto
+from boto.compat import json
+
+
+DESCRIPTION = """Dump the contents of one or more DynamoDB tables to the local filesystem.
+
+Each table is dumped into two files:
+  - {table_name}.metadata stores the table's name, schema and provisioned
+    throughput.
+  - {table_name}.data stores the table's actual contents.
+
+Both files are created in the current directory. To write them somewhere else,
+use the --out-dir parameter (the target directory will be created if needed).
+"""
+
+
+def dump_table(table, out_dir):
+    metadata_file = os.path.join(out_dir, "%s.metadata" % table.name)
+    data_file = os.path.join(out_dir, "%s.data" % table.name)
+
+    with open(metadata_file, "w") as metadata_fd:
+        json.dump(
+            {
+                "name": table.name,
+                "schema": table.schema.dict,
+                "read_units": table.read_units,
+                "write_units": table.write_units,
+            },
+            metadata_fd
+        )
+
+    with open(data_file, "w") as data_fd:
+        for item in table.scan():
+            # JSON can't serialize sets -- convert those to lists.
+            data = {}
+            for k, v in item.iteritems():
+                if isinstance(v, (set, frozenset)):
+                    data[k] = list(v)
+                else:
+                    data[k] = v
+
+            data_fd.write(json.dumps(data))
+            data_fd.write("\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        prog="dynamodb_dump",
+        description=DESCRIPTION
+    )
+    parser.add_argument("--out-dir", default=".")
+    parser.add_argument("tables", metavar="TABLES", nargs="+")
+
+    namespace = parser.parse_args()
+
+    try:
+        os.makedirs(namespace.out_dir)
+    except OSError as e:
+        # Error 17 is "file exists", which we don't care about
+        if e.errno != 17:
+            raise
+
+    conn = boto.connect_dynamodb()
+    for t in namespace.tables:
+        dump_table(conn.get_table(t), namespace.out_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/dynamodb_load b/bin/dynamodb_load
new file mode 100755
index 00000000..942514c2
--- /dev/null
+++ b/bin/dynamodb_load
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+
+import argparse
+import os
+
+import boto
+from boto.compat import json
+from boto.dynamodb.schema import Schema
+
+
+DESCRIPTION = """Load data into one or more DynamoDB tables.
+
+For each table, data is read from two files:
+  - {table_name}.metadata for the table's name, schema and provisioned
+    throughput (only required if creating the table).
+  - {table_name}.data for the table's actual contents.
+
+Both files are searched for in the current directory. To read them from
+somewhere else, use the --in-dir parameter.
+
+This program does not wipe the tables prior to loading data. However, any
+items present in the data files will overwrite the table's contents.
+"""
+
+
+def _json_iterload(fd):
+    """Lazily load newline-separated JSON objects from a file-like object."""
+    buffer = ""
+    eof = False
+    while not eof:
+        try:
+            # Add a line to the buffer
+            buffer += fd.next()
+        except StopIteration:
+            # We can't let that exception bubble up, otherwise the last
+            # object in the file will never be decoded.
+            eof = True
+        try:
+            # Try to decode a JSON object.
+            json_object = json.loads(buffer.strip())
+
+            # Success: clear the buffer (everything was decoded).
+            buffer = ""
+        except ValueError:
+            if eof and buffer.strip():
+                # No more lines to load and the buffer contains something other
+                # than whitespace: the file is, in fact, malformed.
+                raise
+            # We couldn't decode a complete JSON object: load more lines.
+            continue
+
+        yield json_object
+
+
+def create_table(metadata_fd):
+    """Create a table from a metadata file-like object."""
+
+
+def load_table(table, in_fd):
+    """Load items into a table from a file-like object."""
+    for i in _json_iterload(in_fd):
+        # Convert lists back to sets.
+        data = {}
+        for k, v in i.iteritems():
+            if isinstance(v, list):
+                data[k] = set(v)
+            else:
+                data[k] = v
+        table.new_item(attrs=i).put()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        prog="dynamodb_load",
+        description=DESCRIPTION
+    )
+    parser.add_argument(
+        "--create-tables",
+        action="store_true",
+        help="Create the tables if they don't exist already (without this flag, attempts to load data into non-existing tables fail)."
+    )
+    parser.add_argument("--in-dir", default=".")
+    parser.add_argument("tables", metavar="TABLES", nargs="+")
+
+    namespace = parser.parse_args()
+
+    conn = boto.connect_dynamodb()
+    for t in namespace.tables:
+        metadata_file = os.path.join(namespace.in_dir, "%s.metadata" % t)
+        data_file = os.path.join(namespace.in_dir, "%s.data" % t)
+        if namespace.create_tables:
+            with open(metadata_file) as meta_fd:
+                metadata = json.load(meta_fd)
+            table = conn.create_table(
+                name=t,
+                schema=Schema(metadata["schema"]),
+                read_units=metadata["read_units"],
+                write_units=metadata["write_units"],
+            )
+            table.refresh(wait_for_active=True)
+        else:
+            table = conn.get_table(t)
+
+        with open(data_file) as in_fd:
+            load_table(table, in_fd)
+
+
+if __name__ == "__main__":
+    main()
author	Max Noel <mnoel@ludia.com>	2013-02-25 16:02:06 -0500
committer	Max Noel <mnoel@ludia.com>	2013-02-25 16:02:06 -0500
commit	2d31dff5f1172c566c799a5fd643f8cc94761bf9 (patch)
tree	577ed7153b6b07e2c816681c017320e2da26e118
parent	d443e9581bd0aaf862d5622b455d9079c9ddb750 (diff)
download	boto-2d31dff5f1172c566c799a5fd643f8cc94761bf9.tar.gz