diff options
author | Max Noel <mnoel@ludia.com> | 2013-02-25 16:02:06 -0500 |
---|---|---|
committer | Max Noel <mnoel@ludia.com> | 2013-02-25 16:02:06 -0500 |
commit | 2d31dff5f1172c566c799a5fd643f8cc94761bf9 (patch) | |
tree | 577ed7153b6b07e2c816681c017320e2da26e118 | |
parent | d443e9581bd0aaf862d5622b455d9079c9ddb750 (diff) | |
download | boto-2d31dff5f1172c566c799a5fd643f8cc94761bf9.tar.gz |
Added utility scripts to dump/load the contents of DynamoDB tables.
-rwxr-xr-x | bin/dynamodb_dump | 74 | ||||
-rwxr-xr-x | bin/dynamodb_load | 109 |
2 files changed, 183 insertions, 0 deletions
diff --git a/bin/dynamodb_dump b/bin/dynamodb_dump new file mode 100755 index 00000000..223d529f --- /dev/null +++ b/bin/dynamodb_dump @@ -0,0 +1,74 @@ +#!/usr/bin/env python + +import argparse +import os + +import boto +from boto.compat import json + + +DESCRIPTION = """Dump the contents of one or more DynamoDB tables to the local filesystem. + +Each table is dumped into two files: + - {table_name}.metadata stores the table's name, schema and provisioned + throughput. + - {table_name}.data stores the table's actual contents. + +Both files are created in the current directory. To write them somewhere else, +use the --out-dir parameter (the target directory will be created if needed). +""" + + +def dump_table(table, out_dir): + metadata_file = os.path.join(out_dir, "%s.metadata" % table.name) + data_file = os.path.join(out_dir, "%s.data" % table.name) + + with open(metadata_file, "w") as metadata_fd: + json.dump( + { + "name": table.name, + "schema": table.schema.dict, + "read_units": table.read_units, + "write_units": table.write_units, + }, + metadata_fd + ) + + with open(data_file, "w") as data_fd: + for item in table.scan(): + # JSON can't serialize sets -- convert those to lists. + data = {} + for k, v in item.iteritems(): + if isinstance(v, (set, frozenset)): + data[k] = list(v) + else: + data[k] = v + + data_fd.write(json.dumps(data)) + data_fd.write("\n") + + +def main(): + parser = argparse.ArgumentParser( + prog="dynamodb_dump", + description=DESCRIPTION + ) + parser.add_argument("--out-dir", default=".") + parser.add_argument("tables", metavar="TABLES", nargs="+") + + namespace = parser.parse_args() + + try: + os.makedirs(namespace.out_dir) + except OSError as e: + # Error 17 is "file exists", which we don't care about + if e.errno != 17: + raise + + conn = boto.connect_dynamodb() + for t in namespace.tables: + dump_table(conn.get_table(t), namespace.out_dir) + + +if __name__ == "__main__": + main() diff --git a/bin/dynamodb_load b/bin/dynamodb_load new file mode 100755 index 00000000..942514c2 --- /dev/null +++ b/bin/dynamodb_load @@ -0,0 +1,109 @@ +#!/usr/bin/env python + +import argparse +import os + +import boto +from boto.compat import json +from boto.dynamodb.schema import Schema + + +DESCRIPTION = """Load data into one or more DynamoDB tables. + +For each table, data is read from two files: + - {table_name}.metadata for the table's name, schema and provisioned + throughput (only required if creating the table). + - {table_name}.data for the table's actual contents. + +Both files are searched for in the current directory. To read them from +somewhere else, use the --in-dir parameter. + +This program does not wipe the tables prior to loading data. However, any +items present in the data files will overwrite the table's contents. +""" + + +def _json_iterload(fd): + """Lazily load newline-separated JSON objects from a file-like object.""" + buffer = "" + eof = False + while not eof: + try: + # Add a line to the buffer + buffer += fd.next() + except StopIteration: + # We can't let that exception bubble up, otherwise the last + # object in the file will never be decoded. + eof = True + try: + # Try to decode a JSON object. + json_object = json.loads(buffer.strip()) + + # Success: clear the buffer (everything was decoded). + buffer = "" + except ValueError: + if eof and buffer.strip(): + # No more lines to load and the buffer contains something other + # than whitespace: the file is, in fact, malformed. + raise + # We couldn't decode a complete JSON object: load more lines. + continue + + yield json_object + + +def create_table(metadata_fd): + """Create a table from a metadata file-like object.""" + + +def load_table(table, in_fd): + """Load items into a table from a file-like object.""" + for i in _json_iterload(in_fd): + # Convert lists back to sets. + data = {} + for k, v in i.iteritems(): + if isinstance(v, list): + data[k] = set(v) + else: + data[k] = v + table.new_item(attrs=i).put() + + +def main(): + parser = argparse.ArgumentParser( + prog="dynamodb_load", + description=DESCRIPTION + ) + parser.add_argument( + "--create-tables", + action="store_true", + help="Create the tables if they don't exist already (without this flag, attempts to load data into non-existing tables fail)." + ) + parser.add_argument("--in-dir", default=".") + parser.add_argument("tables", metavar="TABLES", nargs="+") + + namespace = parser.parse_args() + + conn = boto.connect_dynamodb() + for t in namespace.tables: + metadata_file = os.path.join(namespace.in_dir, "%s.metadata" % t) + data_file = os.path.join(namespace.in_dir, "%s.data" % t) + if namespace.create_tables: + with open(metadata_file) as meta_fd: + metadata = json.load(meta_fd) + table = conn.create_table( + name=t, + schema=Schema(metadata["schema"]), + read_units=metadata["read_units"], + write_units=metadata["write_units"], + ) + table.refresh(wait_for_active=True) + else: + table = conn.get_table(t) + + with open(data_file) as in_fd: + load_table(table, in_fd) + + +if __name__ == "__main__": + main() |