diff options
author | Torsten Bögershausen <tboegi@web.de> | 2016-01-16 07:50:02 +0100 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2016-01-18 19:48:43 -0800 |
commit | a7630bd4274a0dff7cff8b92de3d3f064e321359 (patch) | |
tree | 6fe17973ac36c4e9295aa64751dbf16eaadb491b /convert.c | |
parent | 0c83680e9c047170614fb08ef222ea4f460e514d (diff) | |
download | git-a7630bd4274a0dff7cff8b92de3d3f064e321359.tar.gz |
ls-files: add eol diagnosticstb/ls-files-eol
When working in a cross-platform environment, a user may want to
check if text files are stored normalized in the repository and
if .gitattributes are set appropriately.
Make it possible to let Git show the line endings in the index and
in the working tree and the effective text/eol attributes.
The end of line ("eolinfo") are shown like this:
"-text" binary (or with bare CR) file
"none" text file without any EOL
"lf" text file with LF
"crlf" text file with CRLF
"mixed" text file with mixed line endings.
The effective text/eol attribute is one of these:
"", "-text", "text", "text=auto", "text eol=lf", "text eol=crlf"
git ls-files --eol gives an output like this:
i/none w/none attr/text=auto t/t5100/empty
i/-text w/-text attr/-text t/test-binary-2.png
i/lf w/lf attr/text eol=lf t/t5100/rfc2047-info-0007
i/lf w/crlf attr/text eol=crlf doit.bat
i/mixed w/mixed attr/ locale/XX.po
to show what eol convention is used in the data in the index ('i'),
and in the working tree ('w'), and what attribute is in effect,
for each path that is shown.
Add test cases in t0027.
Helped-By: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'convert.c')
-rw-r--r-- | convert.c | 119 |
1 files changed, 91 insertions, 28 deletions
@@ -13,6 +13,11 @@ * translation when the "text" attribute or "auto_crlf" option is set. */ +/* Stat bits: When BIN is set, the txt bits are unset */ +#define CONVERT_STAT_BITS_TXT_LF 0x1 +#define CONVERT_STAT_BITS_TXT_CRLF 0x2 +#define CONVERT_STAT_BITS_BIN 0x4 + enum crlf_action { CRLF_GUESS = -1, CRLF_BINARY = 0, @@ -75,26 +80,75 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat * /* * The same heuristics as diff.c::mmfile_is_binary() + * We treat files with bare CR as binary */ -static int is_binary(unsigned long size, struct text_stat *stats) +static int convert_is_binary(unsigned long size, const struct text_stat *stats) { - + if (stats->cr != stats->crlf) + return 1; if (stats->nul) return 1; if ((stats->printable >> 7) < stats->nonprintable) return 1; - /* - * Other heuristics? Average line length might be relevant, - * as might LF vs CR vs CRLF counts.. - * - * NOTE! It might be normal to have a low ratio of CRLF to LF - * (somebody starts with a LF-only file and edits it with an editor - * that adds CRLF only to lines that are added..). But do we - * want to support CR-only? Probably not. - */ return 0; } +static unsigned int gather_convert_stats(const char *data, unsigned long size) +{ + struct text_stat stats; + if (!data || !size) + return 0; + gather_stats(data, size, &stats); + if (convert_is_binary(size, &stats)) + return CONVERT_STAT_BITS_BIN; + else if (stats.crlf && stats.crlf == stats.lf) + return CONVERT_STAT_BITS_TXT_CRLF; + else if (stats.crlf && stats.lf) + return CONVERT_STAT_BITS_TXT_CRLF | CONVERT_STAT_BITS_TXT_LF; + else if (stats.lf) + return CONVERT_STAT_BITS_TXT_LF; + else + return 0; +} + +static const char *gather_convert_stats_ascii(const char *data, unsigned long size) +{ + unsigned int convert_stats = gather_convert_stats(data, size); + + if (convert_stats & CONVERT_STAT_BITS_BIN) + return "-text"; + switch (convert_stats) { + case CONVERT_STAT_BITS_TXT_LF: + return "lf"; + case CONVERT_STAT_BITS_TXT_CRLF: + return "crlf"; + case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF: + return "mixed"; + default: + return "none"; + } +} + +const char *get_cached_convert_stats_ascii(const char *path) +{ + const char *ret; + unsigned long sz; + void *data = read_blob_data_from_cache(path, &sz); + ret = gather_convert_stats_ascii(data, sz); + free(data); + return ret; +} + +const char *get_wt_convert_stats_ascii(const char *path) +{ + const char *ret = ""; + struct strbuf sb = STRBUF_INIT; + if (strbuf_read_file(&sb, path, 0) >= 0) + ret = gather_convert_stats_ascii(sb.buf, sb.len); + strbuf_release(&sb); + return ret; +} + static enum eol output_eol(enum crlf_action crlf_action) { switch (crlf_action) { @@ -187,18 +241,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len, gather_stats(src, len, &stats); if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) { - /* - * We're currently not going to even try to convert stuff - * that has bare CR characters. Does anybody do that crazy - * stuff? - */ - if (stats.cr != stats.crlf) - return 0; - - /* - * And add some heuristics for binary vs text, of course... - */ - if (is_binary(len, &stats)) + if (convert_is_binary(len, &stats)) return 0; if (crlf_action == CRLF_GUESS) { @@ -277,11 +320,7 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len, return 0; } - /* If we have any bare CR characters, we're not going to touch it */ - if (stats.cr != stats.crlf) - return 0; - - if (is_binary(len, &stats)) + if (convert_is_binary(len, &stats)) return 0; } @@ -777,6 +816,30 @@ int would_convert_to_git_filter_fd(const char *path) return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean); } +const char *get_convert_attr_ascii(const char *path) +{ + struct conv_attrs ca; + enum crlf_action crlf_action; + + convert_attrs(&ca, path); + crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); + switch (crlf_action) { + case CRLF_GUESS: + return ""; + case CRLF_BINARY: + return "-text"; + case CRLF_TEXT: + return "text"; + case CRLF_INPUT: + return "text eol=lf"; + case CRLF_CRLF: + return "text=auto eol=crlf"; + case CRLF_AUTO: + return "text=auto"; + } + return ""; +} + int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst, enum safe_crlf checksafe) { |