5 * convert.c - convert a file when checking it out and checking it in.
7 * This should use the pathname to decide on whether it wants to do some
8 * more interesting conversions (automatic gzip/unzip, general format
9 * conversions etc etc), but by default it just does automatic CRLF<->LF
10 * translation when the "auto_crlf" option is set.
14 /* CR, LF and CRLF counts */
15 unsigned cr, lf, crlf;
17 /* These are just approximations! */
18 unsigned printable, nonprintable;
21 static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
25 memset(stats, 0, sizeof(*stats));
27 for (i = 0; i < size; i++) {
28 unsigned char c = buf[i];
31 if (i+1 < size && buf[i+1] == '\n')
41 stats->nonprintable++;
44 /* BS, HT, ESC and FF */
45 case '\b': case '\t': case '\033': case '\014':
49 stats->nonprintable++;
58 * The same heuristics as diff.c::mmfile_is_binary()
60 static int is_binary(unsigned long size, struct text_stat *stats)
63 if ((stats->printable >> 7) < stats->nonprintable)
66 * Other heuristics? Average line length might be relevant,
67 * as might LF vs CR vs CRLF counts..
69 * NOTE! It might be normal to have a low ratio of CRLF to LF
70 * (somebody starts with a LF-only file and edits it with an editor
71 * that adds CRLF only to lines that are added..). But do we
72 * want to support CR-only? Probably not.
77 static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int guess)
80 unsigned long size, nsize;
81 struct text_stat stats;
83 if (guess && !auto_crlf)
91 gather_stats(buffer, size, &stats);
93 /* No CR? Nothing to convert, regardless. */
99 * We're currently not going to even try to convert stuff
100 * that has bare CR characters. Does anybody do that crazy
103 if (stats.cr != stats.crlf)
107 * And add some heuristics for binary vs text, of course...
109 if (is_binary(size, &stats))
114 * Ok, allocate a new buffer, fill it in, and return true
115 * to let the caller know that we switched buffers on it.
117 nsize = size - stats.crlf;
118 nbuf = xmalloc(nsize);
124 unsigned char c = *buffer++;
130 unsigned char c = *buffer++;
131 if (! (c == '\r' && (1 < size && *buffer == '\n')))
139 static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
141 return crlf_to_git(path, bufp, sizep, 1);
144 static int forcecrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
146 return crlf_to_git(path, bufp, sizep, 0);
149 static int crlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep, int guess)
152 unsigned long size, nsize;
153 struct text_stat stats;
156 if (guess && auto_crlf <= 0)
164 gather_stats(buffer, size, &stats);
166 /* No LF? Nothing to convert, regardless. */
170 /* Was it already in CRLF format? */
171 if (stats.lf == stats.crlf)
175 /* If we have any bare CR characters, we're not going to touch it */
176 if (stats.cr != stats.crlf)
179 if (is_binary(size, &stats))
184 * Ok, allocate a new buffer, fill it in, and return true
185 * to let the caller know that we switched buffers on it.
187 nsize = size + stats.lf - stats.crlf;
188 nbuf = xmalloc(nsize);
193 unsigned char c = *buffer++;
194 if (c == '\n' && last != '\r')
203 static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
205 return crlf_to_working_tree(path, bufp, sizep, 1);
208 static int forcecrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
210 return crlf_to_working_tree(path, bufp, sizep, 0);
213 static void setup_crlf_check(struct git_attr_check *check)
215 static struct git_attr *attr_crlf;
218 attr_crlf = git_attr("crlf", 4);
219 check->attr = attr_crlf;
222 static int git_path_check_crlf(const char *path)
224 struct git_attr_check attr_crlf_check;
226 setup_crlf_check(&attr_crlf_check);
228 if (!git_checkattr(path, 1, &attr_crlf_check)) {
229 void *value = attr_crlf_check.value;
230 if (ATTR_TRUE(value))
232 else if (ATTR_FALSE(value))
234 else if (ATTR_UNSET(value))
237 die("unknown value %s given to 'crlf' attribute",
243 int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
245 switch (git_path_check_crlf(path)) {
249 return forcecrlf_to_git(path, bufp, sizep);
251 return autocrlf_to_git(path, bufp, sizep);
255 int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
257 switch (git_path_check_crlf(path)) {
261 return forcecrlf_to_working_tree(path, bufp, sizep);
263 return autocrlf_to_working_tree(path, bufp, sizep);