From 68f64ff8b49ce5b32b11fe136ac89f0ba838a61c Mon Sep 17 00:00:00 2001 From: Florian Achleitner Date: Wed, 19 Sep 2012 17:21:15 +0200 Subject: Implement a remote helper for svn in C Enable basic fetching from subversion repositories. When processing remote URLs starting with testsvn::, git invokes this remote-helper. It starts svnrdump to extract revisions from the subversion repository in the 'dump file format', and converts them to a git-fast-import stream using the functions of vcs-svn/. Imported refs are created in a private namespace at refs/svn//master. The revision history is imported linearly (no branch detection) and completely, i.e. from revision 0 to HEAD. The 'bidi-import' capability is used. The remote-helper expects data from fast-import on its stdin. It buffers a batch of 'import' command lines in a string_list before starting to process them. Signed-off-by: Florian Achleitner Acked-by: David Michael Barr Signed-off-by: Junio C Hamano --- remote-testsvn.c | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 remote-testsvn.c (limited to 'remote-testsvn.c') diff --git a/remote-testsvn.c b/remote-testsvn.c new file mode 100644 index 0000000000..c30ffcdb37 --- /dev/null +++ b/remote-testsvn.c @@ -0,0 +1,176 @@ +#include "cache.h" +#include "remote.h" +#include "strbuf.h" +#include "url.h" +#include "exec_cmd.h" +#include "run-command.h" +#include "vcs-svn/svndump.h" +#include "notes.h" +#include "argv-array.h" + +static const char *url; +static const char *private_ref; +static const char *remote_ref = "refs/heads/master"; + +static int cmd_capabilities(const char *line); +static int cmd_import(const char *line); +static int cmd_list(const char *line); + +typedef int (*input_command_handler)(const char *); +struct input_command_entry { + const char *name; + input_command_handler fn; + unsigned char batchable; /* whether the command starts or is part of a batch */ +}; + +static const struct input_command_entry input_command_list[] = { + { "capabilities", cmd_capabilities, 0 }, + { "import", cmd_import, 1 }, + { "list", cmd_list, 0 }, + { NULL, NULL } +}; + +static int cmd_capabilities(const char *line) +{ + printf("import\n"); + printf("bidi-import\n"); + printf("refspec %s:%s\n\n", remote_ref, private_ref); + fflush(stdout); + return 0; +} + +static void terminate_batch(void) +{ + /* terminate a current batch's fast-import stream */ + printf("done\n"); + fflush(stdout); +} + +static int cmd_import(const char *line) +{ + int code; + int dumpin_fd; + unsigned int startrev = 0; + struct argv_array svndump_argv = ARGV_ARRAY_INIT; + struct child_process svndump_proc; + + memset(&svndump_proc, 0, sizeof(struct child_process)); + svndump_proc.out = -1; + argv_array_push(&svndump_argv, "svnrdump"); + argv_array_push(&svndump_argv, "dump"); + argv_array_push(&svndump_argv, url); + argv_array_pushf(&svndump_argv, "-r%u:HEAD", startrev); + svndump_proc.argv = svndump_argv.argv; + + code = start_command(&svndump_proc); + if (code) + die("Unable to start %s, code %d", svndump_proc.argv[0], code); + dumpin_fd = svndump_proc.out; + + svndump_init_fd(dumpin_fd, STDIN_FILENO); + svndump_read(url, private_ref); + svndump_deinit(); + svndump_reset(); + + close(dumpin_fd); + code = finish_command(&svndump_proc); + if (code) + warning("%s, returned %d", svndump_proc.argv[0], code); + argv_array_clear(&svndump_argv); + + return 0; +} + +static int cmd_list(const char *line) +{ + printf("? %s\n\n", remote_ref); + fflush(stdout); + return 0; +} + +static int do_command(struct strbuf *line) +{ + const struct input_command_entry *p = input_command_list; + static struct string_list batchlines = STRING_LIST_INIT_DUP; + static const struct input_command_entry *batch_cmd; + /* + * commands can be grouped together in a batch. + * Batches are ended by \n. If no batch is active the program ends. + * During a batch all lines are buffered and passed to the handler function + * when the batch is terminated. + */ + if (line->len == 0) { + if (batch_cmd) { + struct string_list_item *item; + for_each_string_list_item(item, &batchlines) + batch_cmd->fn(item->string); + terminate_batch(); + batch_cmd = NULL; + string_list_clear(&batchlines, 0); + return 0; /* end of the batch, continue reading other commands. */ + } + return 1; /* end of command stream, quit */ + } + if (batch_cmd) { + if (prefixcmp(batch_cmd->name, line->buf)) + die("Active %s batch interrupted by %s", batch_cmd->name, line->buf); + /* buffer batch lines */ + string_list_append(&batchlines, line->buf); + return 0; + } + + for (p = input_command_list; p->name; p++) { + if (!prefixcmp(line->buf, p->name) && (strlen(p->name) == line->len || + line->buf[strlen(p->name)] == ' ')) { + if (p->batchable) { + batch_cmd = p; + string_list_append(&batchlines, line->buf); + return 0; + } + return p->fn(line->buf); + } + } + die("Unknown command '%s'\n", line->buf); + return 0; +} + +int main(int argc, const char **argv) +{ + struct strbuf buf = STRBUF_INIT, url_sb = STRBUF_INIT, + private_ref_sb = STRBUF_INIT; + static struct remote *remote; + const char *url_in; + + git_extract_argv0_path(argv[0]); + setup_git_directory(); + if (argc < 2 || argc > 3) { + usage("git-remote-svn []"); + return 1; + } + + remote = remote_get(argv[1]); + url_in = (argc == 3) ? argv[2] : remote->url[0]; + + end_url_with_slash(&url_sb, url_in); + url = url_sb.buf; + + strbuf_addf(&private_ref_sb, "refs/svn/%s/master", remote->name); + private_ref = private_ref_sb.buf; + + while (1) { + if (strbuf_getline(&buf, stdin, '\n') == EOF) { + if (ferror(stdin)) + die("Error reading command stream"); + else + die("Unexpected end of command stream"); + } + if (do_command(&buf)) + break; + strbuf_reset(&buf); + } + + strbuf_release(&buf); + strbuf_release(&url_sb); + strbuf_release(&private_ref_sb); + return 0; +} -- cgit v1.2.1 From f6529de9f4fc66de7679fa1204da1da50f6292fb Mon Sep 17 00:00:00 2001 From: Florian Achleitner Date: Wed, 19 Sep 2012 17:21:23 +0200 Subject: Allow reading svn dumps from files via file:// urls For testing as well as for importing large, already available dumps, it's useful to bypass svnrdump and replay the svndump from a file directly. Add support for file:// urls in the remote url, e.g. svn::file:///path/to/dump When the remote helper finds an url starting with file:// it tries to open that file instead of invoking svnrdump. Signed-off-by: Florian Achleitner Acked-by: David Michael Barr Signed-off-by: Junio C Hamano --- remote-testsvn.c | 52 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 19 deletions(-) (limited to 'remote-testsvn.c') diff --git a/remote-testsvn.c b/remote-testsvn.c index c30ffcdb37..67466a9962 100644 --- a/remote-testsvn.c +++ b/remote-testsvn.c @@ -9,6 +9,7 @@ #include "argv-array.h" static const char *url; +static int dump_from_file; static const char *private_ref; static const char *remote_ref = "refs/heads/master"; @@ -54,29 +55,36 @@ static int cmd_import(const char *line) struct argv_array svndump_argv = ARGV_ARRAY_INIT; struct child_process svndump_proc; - memset(&svndump_proc, 0, sizeof(struct child_process)); - svndump_proc.out = -1; - argv_array_push(&svndump_argv, "svnrdump"); - argv_array_push(&svndump_argv, "dump"); - argv_array_push(&svndump_argv, url); - argv_array_pushf(&svndump_argv, "-r%u:HEAD", startrev); - svndump_proc.argv = svndump_argv.argv; - - code = start_command(&svndump_proc); - if (code) - die("Unable to start %s, code %d", svndump_proc.argv[0], code); - dumpin_fd = svndump_proc.out; - + if (dump_from_file) { + dumpin_fd = open(url, O_RDONLY); + if(dumpin_fd < 0) + die_errno("Couldn't open svn dump file %s.", url); + } else { + memset(&svndump_proc, 0, sizeof(struct child_process)); + svndump_proc.out = -1; + argv_array_push(&svndump_argv, "svnrdump"); + argv_array_push(&svndump_argv, "dump"); + argv_array_push(&svndump_argv, url); + argv_array_pushf(&svndump_argv, "-r%u:HEAD", startrev); + svndump_proc.argv = svndump_argv.argv; + + code = start_command(&svndump_proc); + if (code) + die("Unable to start %s, code %d", svndump_proc.argv[0], code); + dumpin_fd = svndump_proc.out; + } svndump_init_fd(dumpin_fd, STDIN_FILENO); svndump_read(url, private_ref); svndump_deinit(); svndump_reset(); close(dumpin_fd); - code = finish_command(&svndump_proc); - if (code) - warning("%s, returned %d", svndump_proc.argv[0], code); - argv_array_clear(&svndump_argv); + if (!dump_from_file) { + code = finish_command(&svndump_proc); + if (code) + warning("%s, returned %d", svndump_proc.argv[0], code); + argv_array_clear(&svndump_argv); + } return 0; } @@ -151,8 +159,14 @@ int main(int argc, const char **argv) remote = remote_get(argv[1]); url_in = (argc == 3) ? argv[2] : remote->url[0]; - end_url_with_slash(&url_sb, url_in); - url = url_sb.buf; + if (!prefixcmp(url_in, "file://")) { + dump_from_file = 1; + url = url_decode(url_in + sizeof("file://")-1); + } else { + dump_from_file = 0; + end_url_with_slash(&url_sb, url_in); + url = url_sb.buf; + } strbuf_addf(&private_ref_sb, "refs/svn/%s/master", remote->name); private_ref = private_ref_sb.buf; -- cgit v1.2.1 From 8d7cd8eb3b4cee410ee391f7d59610e2f90adf4f Mon Sep 17 00:00:00 2001 From: Florian Achleitner Date: Wed, 19 Sep 2012 17:21:26 +0200 Subject: remote-svn: Activate import/export-marks for fast-import Enable import and export of a marks file by sending the appropriate feature commands to fast-import before sending data. Signed-off-by: Florian Achleitner Acked-by: David Michael Barr Signed-off-by: Junio C Hamano --- remote-testsvn.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'remote-testsvn.c') diff --git a/remote-testsvn.c b/remote-testsvn.c index 67466a9962..45eba9f2d9 100644 --- a/remote-testsvn.c +++ b/remote-testsvn.c @@ -12,6 +12,7 @@ static const char *url; static int dump_from_file; static const char *private_ref; static const char *remote_ref = "refs/heads/master"; +static const char *marksfilename; static int cmd_capabilities(const char *line); static int cmd_import(const char *line); @@ -73,6 +74,10 @@ static int cmd_import(const char *line) die("Unable to start %s, code %d", svndump_proc.argv[0], code); dumpin_fd = svndump_proc.out; } + /* setup marks file import/export */ + printf("feature import-marks-if-exists=%s\n" + "feature export-marks=%s\n", marksfilename, marksfilename); + svndump_init_fd(dumpin_fd, STDIN_FILENO); svndump_read(url, private_ref); svndump_deinit(); @@ -145,7 +150,7 @@ static int do_command(struct strbuf *line) int main(int argc, const char **argv) { struct strbuf buf = STRBUF_INIT, url_sb = STRBUF_INIT, - private_ref_sb = STRBUF_INIT; + private_ref_sb = STRBUF_INIT, marksfilename_sb = STRBUF_INIT; static struct remote *remote; const char *url_in; @@ -171,6 +176,10 @@ int main(int argc, const char **argv) strbuf_addf(&private_ref_sb, "refs/svn/%s/master", remote->name); private_ref = private_ref_sb.buf; + strbuf_addf(&marksfilename_sb, "%s/info/fast-import/remote-svn/%s.marks", + get_git_dir(), remote->name); + marksfilename = marksfilename_sb.buf; + while (1) { if (strbuf_getline(&buf, stdin, '\n') == EOF) { if (ferror(stdin)) @@ -186,5 +195,6 @@ int main(int argc, const char **argv) strbuf_release(&buf); strbuf_release(&url_sb); strbuf_release(&private_ref_sb); + strbuf_release(&marksfilename_sb); return 0; } -- cgit v1.2.1 From 8e43a1d010e22a0e432de440b08cc4389d8370f9 Mon Sep 17 00:00:00 2001 From: Florian Achleitner Date: Wed, 19 Sep 2012 17:21:27 +0200 Subject: remote-svn: add incremental import Search for a note attached to the ref to update and read it's 'Revision-number:'-line. Start import from the next svn revision. If there is no next revision in the svn repo, svnrdump terminates with a message on stderr an non-zero return value. This looks a little weird, but there is no other way to know whether there is a new revision in the svn repo. On the start of an incremental import, the parent of the first commit in the fast-import stream is set to the branch name to update. All following commits specify their parent by a mark number. Previous mark files are currently not reused. Signed-off-by: Florian Achleitner Acked-by: David Michael Barr Signed-off-by: Junio C Hamano --- remote-testsvn.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 4 deletions(-) (limited to 'remote-testsvn.c') diff --git a/remote-testsvn.c b/remote-testsvn.c index 45eba9f2d9..b741f6d9a7 100644 --- a/remote-testsvn.c +++ b/remote-testsvn.c @@ -12,7 +12,8 @@ static const char *url; static int dump_from_file; static const char *private_ref; static const char *remote_ref = "refs/heads/master"; -static const char *marksfilename; +static const char *marksfilename, *notes_ref; +struct rev_note { unsigned int rev_nr; }; static int cmd_capabilities(const char *line); static int cmd_import(const char *line); @@ -48,14 +49,79 @@ static void terminate_batch(void) fflush(stdout); } +/* NOTE: 'ref' refers to a git reference, while 'rev' refers to a svn revision. */ +static char *read_ref_note(const unsigned char sha1[20]) +{ + const unsigned char *note_sha1; + char *msg = NULL; + unsigned long msglen; + enum object_type type; + + init_notes(NULL, notes_ref, NULL, 0); + if (!(note_sha1 = get_note(NULL, sha1))) + return NULL; /* note tree not found */ + if (!(msg = read_sha1_file(note_sha1, &type, &msglen))) + error("Empty notes tree. %s", notes_ref); + else if (!msglen || type != OBJ_BLOB) { + error("Note contains unusable content. " + "Is something else using this notes tree? %s", notes_ref); + free(msg); + msg = NULL; + } + free_notes(NULL); + return msg; +} + +static int parse_rev_note(const char *msg, struct rev_note *res) +{ + const char *key, *value, *end; + size_t len; + + while (*msg) { + end = strchr(msg, '\n'); + len = end ? end - msg : strlen(msg); + + key = "Revision-number: "; + if (!prefixcmp(msg, key)) { + long i; + char *end; + value = msg + strlen(key); + i = strtol(value, &end, 0); + if (end == value || i < 0 || i > UINT32_MAX) + return -1; + res->rev_nr = i; + } + msg += len + 1; + } + return 0; +} + static int cmd_import(const char *line) { int code; int dumpin_fd; - unsigned int startrev = 0; + char *note_msg; + unsigned char head_sha1[20]; + unsigned int startrev; struct argv_array svndump_argv = ARGV_ARRAY_INIT; struct child_process svndump_proc; + if (read_ref(private_ref, head_sha1)) + startrev = 0; + else { + note_msg = read_ref_note(head_sha1); + if(note_msg == NULL) { + warning("No note found for %s.", private_ref); + startrev = 0; + } else { + struct rev_note note = { 0 }; + if (parse_rev_note(note_msg, ¬e)) + die("Revision number couldn't be parsed from note."); + startrev = note.rev_nr + 1; + free(note_msg); + } + } + if (dump_from_file) { dumpin_fd = open(url, O_RDONLY); if(dumpin_fd < 0) @@ -79,7 +145,7 @@ static int cmd_import(const char *line) "feature export-marks=%s\n", marksfilename, marksfilename); svndump_init_fd(dumpin_fd, STDIN_FILENO); - svndump_read(url, private_ref); + svndump_read(url, private_ref, notes_ref); svndump_deinit(); svndump_reset(); @@ -150,7 +216,8 @@ static int do_command(struct strbuf *line) int main(int argc, const char **argv) { struct strbuf buf = STRBUF_INIT, url_sb = STRBUF_INIT, - private_ref_sb = STRBUF_INIT, marksfilename_sb = STRBUF_INIT; + private_ref_sb = STRBUF_INIT, marksfilename_sb = STRBUF_INIT, + notes_ref_sb = STRBUF_INIT; static struct remote *remote; const char *url_in; @@ -176,6 +243,9 @@ int main(int argc, const char **argv) strbuf_addf(&private_ref_sb, "refs/svn/%s/master", remote->name); private_ref = private_ref_sb.buf; + strbuf_addf(¬es_ref_sb, "refs/notes/%s/revs", remote->name); + notes_ref = notes_ref_sb.buf; + strbuf_addf(&marksfilename_sb, "%s/info/fast-import/remote-svn/%s.marks", get_git_dir(), remote->name); marksfilename = marksfilename_sb.buf; @@ -195,6 +265,7 @@ int main(int argc, const char **argv) strbuf_release(&buf); strbuf_release(&url_sb); strbuf_release(&private_ref_sb); + strbuf_release(¬es_ref_sb); strbuf_release(&marksfilename_sb); return 0; } -- cgit v1.2.1 From 5bfc76b5b275bbf0ddc2cf1a3a33fa6156addbd8 Mon Sep 17 00:00:00 2001 From: Florian Achleitner Date: Wed, 19 Sep 2012 17:21:29 +0200 Subject: remote-svn: add marks-file regeneration fast-import mark files are stored outside the object database and are therefore not fetched and can be lost somehow else. marks provide a svn revision --> git sha1 mapping, while the notes that are attached to each commit when it is imported provide a git sha1 --> svn revision mapping. If the marks file is not available or not plausible, regenerate it by walking through the notes tree. , i.e. The plausibility check tests if the highest revision in the marks file matches the revision of the top ref. It doesn't ensure that the mark file is completely correct. This could only be done with an effort equal to unconditional regeneration. Signed-off-by: Florian Achleitner Acked-by: David Michael Barr Signed-off-by: Junio C Hamano --- remote-testsvn.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'remote-testsvn.c') diff --git a/remote-testsvn.c b/remote-testsvn.c index b741f6d9a7..51fba059a2 100644 --- a/remote-testsvn.c +++ b/remote-testsvn.c @@ -96,6 +96,76 @@ static int parse_rev_note(const char *msg, struct rev_note *res) return 0; } +static int note2mark_cb(const unsigned char *object_sha1, + const unsigned char *note_sha1, char *note_path, + void *cb_data) +{ + FILE *file = (FILE *)cb_data; + char *msg; + unsigned long msglen; + enum object_type type; + struct rev_note note; + + if (!(msg = read_sha1_file(note_sha1, &type, &msglen)) || + !msglen || type != OBJ_BLOB) { + free(msg); + return 1; + } + if (parse_rev_note(msg, ¬e)) + return 2; + if (fprintf(file, ":%d %s\n", note.rev_nr, sha1_to_hex(object_sha1)) < 1) + return 3; + return 0; +} + +static void regenerate_marks(void) +{ + int ret; + FILE *marksfile = fopen(marksfilename, "w+"); + + if (!marksfile) + die_errno("Couldn't create mark file %s.", marksfilename); + ret = for_each_note(NULL, 0, note2mark_cb, marksfile); + if (ret) + die("Regeneration of marks failed, returned %d.", ret); + fclose(marksfile); +} + +static void check_or_regenerate_marks(int latestrev) +{ + FILE *marksfile; + struct strbuf sb = STRBUF_INIT; + struct strbuf line = STRBUF_INIT; + int found = 0; + + if (latestrev < 1) + return; + + init_notes(NULL, notes_ref, NULL, 0); + marksfile = fopen(marksfilename, "r"); + if (!marksfile) { + regenerate_marks(); + marksfile = fopen(marksfilename, "r"); + if (!marksfile) + die_errno("cannot read marks file %s!", marksfilename); + fclose(marksfile); + } else { + strbuf_addf(&sb, ":%d ", latestrev); + while (strbuf_getline(&line, marksfile, '\n') != EOF) { + if (!prefixcmp(line.buf, sb.buf)) { + found++; + break; + } + } + fclose(marksfile); + if (!found) + regenerate_marks(); + } + free_notes(NULL); + strbuf_release(&sb); + strbuf_release(&line); +} + static int cmd_import(const char *line) { int code; @@ -121,6 +191,7 @@ static int cmd_import(const char *line) free(note_msg); } } + check_or_regenerate_marks(startrev - 1); if (dump_from_file) { dumpin_fd = open(url, O_RDONLY); -- cgit v1.2.1