5 * convert.c - convert a file when checking it out and checking it in.
7 * This should use the pathname to decide on whether it wants to do some
8 * more interesting conversions (automatic gzip/unzip, general format
9 * conversions etc etc), but by default it just does automatic CRLF<->LF
10 * translation when the "auto_crlf" option is set.
13 #define CRLF_GUESS (-1)
19 /* CR, LF and CRLF counts */
20 unsigned cr, lf, crlf;
22 /* These are just approximations! */
23 unsigned printable, nonprintable;
26 static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
30 memset(stats, 0, sizeof(*stats));
32 for (i = 0; i < size; i++) {
33 unsigned char c = buf[i];
36 if (i+1 < size && buf[i+1] == '\n')
46 stats->nonprintable++;
49 /* BS, HT, ESC and FF */
50 case '\b': case '\t': case '\033': case '\014':
54 stats->nonprintable++;
63 * The same heuristics as diff.c::mmfile_is_binary()
65 static int is_binary(unsigned long size, struct text_stat *stats)
68 if ((stats->printable >> 7) < stats->nonprintable)
71 * Other heuristics? Average line length might be relevant,
72 * as might LF vs CR vs CRLF counts..
74 * NOTE! It might be normal to have a low ratio of CRLF to LF
75 * (somebody starts with a LF-only file and edits it with an editor
76 * that adds CRLF only to lines that are added..). But do we
77 * want to support CR-only? Probably not.
82 static char *crlf_to_git(const char *path, const char *src, unsigned long *sizep, int action)
85 unsigned long size, nsize;
86 struct text_stat stats;
88 if ((action == CRLF_BINARY) || (action == CRLF_GUESS && !auto_crlf))
95 gather_stats(src, size, &stats);
97 /* No CR? Nothing to convert, regardless. */
101 if (action == CRLF_GUESS) {
103 * We're currently not going to even try to convert stuff
104 * that has bare CR characters. Does anybody do that crazy
107 if (stats.cr != stats.crlf)
111 * And add some heuristics for binary vs text, of course...
113 if (is_binary(size, &stats))
118 * Ok, allocate a new buffer, fill it in, and return it
119 * to let the caller know that we switched buffers.
121 nsize = size - stats.crlf;
122 buffer = xmalloc(nsize);
126 if (action == CRLF_GUESS) {
128 * If we guessed, we already know we rejected a file with
129 * lone CR, and we can strip a CR without looking at what
133 unsigned char c = *src++;
139 unsigned char c = *src++;
140 if (! (c == '\r' && (1 < size && *src == '\n')))
148 static char *crlf_to_worktree(const char *path, const char *src, unsigned long *sizep, int action)
151 unsigned long size, nsize;
152 struct text_stat stats;
155 if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
156 (action == CRLF_GUESS && auto_crlf <= 0))
163 gather_stats(src, size, &stats);
165 /* No LF? Nothing to convert, regardless. */
169 /* Was it already in CRLF format? */
170 if (stats.lf == stats.crlf)
173 if (action == CRLF_GUESS) {
174 /* If we have any bare CR characters, we're not going to touch it */
175 if (stats.cr != stats.crlf)
178 if (is_binary(size, &stats))
183 * Ok, allocate a new buffer, fill it in, and return it
184 * to let the caller know that we switched buffers.
186 nsize = size + stats.lf - stats.crlf;
187 buffer = xmalloc(nsize);
193 unsigned char c = *src++;
194 if (c == '\n' && last != '\r')
203 static void setup_convert_check(struct git_attr_check *check)
205 static struct git_attr *attr_crlf;
208 attr_crlf = git_attr("crlf", 4);
209 check->attr = attr_crlf;
212 static int git_path_check_crlf(const char *path, struct git_attr_check *check)
214 const char *value = check->value;
216 if (ATTR_TRUE(value))
218 else if (ATTR_FALSE(value))
220 else if (ATTR_UNSET(value))
222 else if (!strcmp(value, "input"))
227 char *convert_to_git(const char *path, const char *src, unsigned long *sizep)
229 struct git_attr_check check[1];
230 int crlf = CRLF_GUESS;
232 setup_convert_check(check);
233 if (!git_checkattr(path, 1, check)) {
234 crlf = git_path_check_crlf(path, check);
236 return crlf_to_git(path, src, sizep, crlf);
239 char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep)
241 struct git_attr_check check[1];
242 int crlf = CRLF_GUESS;
244 setup_convert_check(check);
245 if (!git_checkattr(path, 1, check)) {
246 crlf = git_path_check_crlf(path, check);
248 return crlf_to_worktree(path, src, sizep, crlf);