Merge branch 'jc/cocci-preincr' into maint
[git] / ws.c
1 /*
2  * Whitespace rules
3  *
4  * Copyright (c) 2007 Junio C Hamano
5  */
6
7 #include "cache.h"
8 #include "attr.h"
9
10 static struct whitespace_rule {
11         const char *rule_name;
12         unsigned rule_bits;
13         unsigned loosens_error:1,
14                 exclude_default:1;
15 } whitespace_rule_names[] = {
16         { "trailing-space", WS_TRAILING_SPACE, 0 },
17         { "space-before-tab", WS_SPACE_BEFORE_TAB, 0 },
18         { "indent-with-non-tab", WS_INDENT_WITH_NON_TAB, 0 },
19         { "cr-at-eol", WS_CR_AT_EOL, 1 },
20         { "blank-at-eol", WS_BLANK_AT_EOL, 0 },
21         { "blank-at-eof", WS_BLANK_AT_EOF, 0 },
22         { "tab-in-indent", WS_TAB_IN_INDENT, 0, 1 },
23 };
24
25 unsigned parse_whitespace_rule(const char *string)
26 {
27         unsigned rule = WS_DEFAULT_RULE;
28
29         while (string) {
30                 int i;
31                 size_t len;
32                 const char *ep;
33                 int negated = 0;
34
35                 string = string + strspn(string, ", \t\n\r");
36                 ep = strchrnul(string, ',');
37                 len = ep - string;
38
39                 if (*string == '-') {
40                         negated = 1;
41                         string++;
42                         len--;
43                 }
44                 if (!len)
45                         break;
46                 for (i = 0; i < ARRAY_SIZE(whitespace_rule_names); i++) {
47                         if (strncmp(whitespace_rule_names[i].rule_name,
48                                     string, len))
49                                 continue;
50                         if (negated)
51                                 rule &= ~whitespace_rule_names[i].rule_bits;
52                         else
53                                 rule |= whitespace_rule_names[i].rule_bits;
54                         break;
55                 }
56                 if (strncmp(string, "tabwidth=", 9) == 0) {
57                         unsigned tabwidth = atoi(string + 9);
58                         if (0 < tabwidth && tabwidth < 0100) {
59                                 rule &= ~WS_TAB_WIDTH_MASK;
60                                 rule |= tabwidth;
61                         }
62                         else
63                                 warning("tabwidth %.*s out of range",
64                                         (int)(len - 9), string + 9);
65                 }
66                 string = ep;
67         }
68
69         if (rule & WS_TAB_IN_INDENT && rule & WS_INDENT_WITH_NON_TAB)
70                 die("cannot enforce both tab-in-indent and indent-with-non-tab");
71         return rule;
72 }
73
74 unsigned whitespace_rule(const char *pathname)
75 {
76         static struct attr_check *attr_whitespace_rule;
77         const char *value;
78
79         if (!attr_whitespace_rule)
80                 attr_whitespace_rule = attr_check_initl("whitespace", NULL);
81
82         git_check_attr(&the_index, pathname, attr_whitespace_rule);
83         value = attr_whitespace_rule->items[0].value;
84         if (ATTR_TRUE(value)) {
85                 /* true (whitespace) */
86                 unsigned all_rule = ws_tab_width(whitespace_rule_cfg);
87                 int i;
88                 for (i = 0; i < ARRAY_SIZE(whitespace_rule_names); i++)
89                         if (!whitespace_rule_names[i].loosens_error &&
90                             !whitespace_rule_names[i].exclude_default)
91                                 all_rule |= whitespace_rule_names[i].rule_bits;
92                 return all_rule;
93         } else if (ATTR_FALSE(value)) {
94                 /* false (-whitespace) */
95                 return ws_tab_width(whitespace_rule_cfg);
96         } else if (ATTR_UNSET(value)) {
97                 /* reset to default (!whitespace) */
98                 return whitespace_rule_cfg;
99         } else {
100                 /* string */
101                 return parse_whitespace_rule(value);
102         }
103 }
104
105 /* The returned string should be freed by the caller. */
106 char *whitespace_error_string(unsigned ws)
107 {
108         struct strbuf err = STRBUF_INIT;
109         if ((ws & WS_TRAILING_SPACE) == WS_TRAILING_SPACE)
110                 strbuf_addstr(&err, "trailing whitespace");
111         else {
112                 if (ws & WS_BLANK_AT_EOL)
113                         strbuf_addstr(&err, "trailing whitespace");
114                 if (ws & WS_BLANK_AT_EOF) {
115                         if (err.len)
116                                 strbuf_addstr(&err, ", ");
117                         strbuf_addstr(&err, "new blank line at EOF");
118                 }
119         }
120         if (ws & WS_SPACE_BEFORE_TAB) {
121                 if (err.len)
122                         strbuf_addstr(&err, ", ");
123                 strbuf_addstr(&err, "space before tab in indent");
124         }
125         if (ws & WS_INDENT_WITH_NON_TAB) {
126                 if (err.len)
127                         strbuf_addstr(&err, ", ");
128                 strbuf_addstr(&err, "indent with spaces");
129         }
130         if (ws & WS_TAB_IN_INDENT) {
131                 if (err.len)
132                         strbuf_addstr(&err, ", ");
133                 strbuf_addstr(&err, "tab in indent");
134         }
135         return strbuf_detach(&err, NULL);
136 }
137
138 /* If stream is non-NULL, emits the line after checking. */
139 static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
140                                 FILE *stream, const char *set,
141                                 const char *reset, const char *ws)
142 {
143         unsigned result = 0;
144         int written = 0;
145         int trailing_whitespace = -1;
146         int trailing_newline = 0;
147         int trailing_carriage_return = 0;
148         int i;
149
150         /* Logic is simpler if we temporarily ignore the trailing newline. */
151         if (len > 0 && line[len - 1] == '\n') {
152                 trailing_newline = 1;
153                 len--;
154         }
155         if ((ws_rule & WS_CR_AT_EOL) &&
156             len > 0 && line[len - 1] == '\r') {
157                 trailing_carriage_return = 1;
158                 len--;
159         }
160
161         /* Check for trailing whitespace. */
162         if (ws_rule & WS_BLANK_AT_EOL) {
163                 for (i = len - 1; i >= 0; i--) {
164                         if (isspace(line[i])) {
165                                 trailing_whitespace = i;
166                                 result |= WS_BLANK_AT_EOL;
167                         }
168                         else
169                                 break;
170                 }
171         }
172
173         if (trailing_whitespace == -1)
174                 trailing_whitespace = len;
175
176         /* Check indentation */
177         for (i = 0; i < trailing_whitespace; i++) {
178                 if (line[i] == ' ')
179                         continue;
180                 if (line[i] != '\t')
181                         break;
182                 if ((ws_rule & WS_SPACE_BEFORE_TAB) && written < i) {
183                         result |= WS_SPACE_BEFORE_TAB;
184                         if (stream) {
185                                 fputs(ws, stream);
186                                 fwrite(line + written, i - written, 1, stream);
187                                 fputs(reset, stream);
188                                 fwrite(line + i, 1, 1, stream);
189                         }
190                 } else if (ws_rule & WS_TAB_IN_INDENT) {
191                         result |= WS_TAB_IN_INDENT;
192                         if (stream) {
193                                 fwrite(line + written, i - written, 1, stream);
194                                 fputs(ws, stream);
195                                 fwrite(line + i, 1, 1, stream);
196                                 fputs(reset, stream);
197                         }
198                 } else if (stream) {
199                         fwrite(line + written, i - written + 1, 1, stream);
200                 }
201                 written = i + 1;
202         }
203
204         /* Check for indent using non-tab. */
205         if ((ws_rule & WS_INDENT_WITH_NON_TAB) && i - written >= ws_tab_width(ws_rule)) {
206                 result |= WS_INDENT_WITH_NON_TAB;
207                 if (stream) {
208                         fputs(ws, stream);
209                         fwrite(line + written, i - written, 1, stream);
210                         fputs(reset, stream);
211                 }
212                 written = i;
213         }
214
215         if (stream) {
216                 /*
217                  * Now the rest of the line starts at "written".
218                  * The non-highlighted part ends at "trailing_whitespace".
219                  */
220
221                 /* Emit non-highlighted (middle) segment. */
222                 if (trailing_whitespace - written > 0) {
223                         fputs(set, stream);
224                         fwrite(line + written,
225                             trailing_whitespace - written, 1, stream);
226                         fputs(reset, stream);
227                 }
228
229                 /* Highlight errors in trailing whitespace. */
230                 if (trailing_whitespace != len) {
231                         fputs(ws, stream);
232                         fwrite(line + trailing_whitespace,
233                             len - trailing_whitespace, 1, stream);
234                         fputs(reset, stream);
235                 }
236                 if (trailing_carriage_return)
237                         fputc('\r', stream);
238                 if (trailing_newline)
239                         fputc('\n', stream);
240         }
241         return result;
242 }
243
244 void ws_check_emit(const char *line, int len, unsigned ws_rule,
245                    FILE *stream, const char *set,
246                    const char *reset, const char *ws)
247 {
248         (void)ws_check_emit_1(line, len, ws_rule, stream, set, reset, ws);
249 }
250
251 unsigned ws_check(const char *line, int len, unsigned ws_rule)
252 {
253         return ws_check_emit_1(line, len, ws_rule, NULL, NULL, NULL, NULL);
254 }
255
256 int ws_blank_line(const char *line, int len, unsigned ws_rule)
257 {
258         /*
259          * We _might_ want to treat CR differently from other
260          * whitespace characters when ws_rule has WS_CR_AT_EOL, but
261          * for now we just use this stupid definition.
262          */
263         while (len-- > 0) {
264                 if (!isspace(*line))
265                         return 0;
266                 line++;
267         }
268         return 1;
269 }
270
271 /* Copy the line onto the end of the strbuf while fixing whitespaces */
272 void ws_fix_copy(struct strbuf *dst, const char *src, int len, unsigned ws_rule, int *error_count)
273 {
274         /*
275          * len is number of bytes to be copied from src, starting
276          * at src.  Typically src[len-1] is '\n', unless this is
277          * the incomplete last line.
278          */
279         int i;
280         int add_nl_to_tail = 0;
281         int add_cr_to_tail = 0;
282         int fixed = 0;
283         int last_tab_in_indent = -1;
284         int last_space_in_indent = -1;
285         int need_fix_leading_space = 0;
286
287         /*
288          * Strip trailing whitespace
289          */
290         if (ws_rule & WS_BLANK_AT_EOL) {
291                 if (0 < len && src[len - 1] == '\n') {
292                         add_nl_to_tail = 1;
293                         len--;
294                         if (0 < len && src[len - 1] == '\r') {
295                                 add_cr_to_tail = !!(ws_rule & WS_CR_AT_EOL);
296                                 len--;
297                         }
298                 }
299                 if (0 < len && isspace(src[len - 1])) {
300                         while (0 < len && isspace(src[len-1]))
301                                 len--;
302                         fixed = 1;
303                 }
304         }
305
306         /*
307          * Check leading whitespaces (indent)
308          */
309         for (i = 0; i < len; i++) {
310                 char ch = src[i];
311                 if (ch == '\t') {
312                         last_tab_in_indent = i;
313                         if ((ws_rule & WS_SPACE_BEFORE_TAB) &&
314                             0 <= last_space_in_indent)
315                             need_fix_leading_space = 1;
316                 } else if (ch == ' ') {
317                         last_space_in_indent = i;
318                         if ((ws_rule & WS_INDENT_WITH_NON_TAB) &&
319                             ws_tab_width(ws_rule) <= i - last_tab_in_indent)
320                                 need_fix_leading_space = 1;
321                 } else
322                         break;
323         }
324
325         if (need_fix_leading_space) {
326                 /* Process indent ourselves */
327                 int consecutive_spaces = 0;
328                 int last = last_tab_in_indent + 1;
329
330                 if (ws_rule & WS_INDENT_WITH_NON_TAB) {
331                         /* have "last" point at one past the indent */
332                         if (last_tab_in_indent < last_space_in_indent)
333                                 last = last_space_in_indent + 1;
334                         else
335                                 last = last_tab_in_indent + 1;
336                 }
337
338                 /*
339                  * between src[0..last-1], strip the funny spaces,
340                  * updating them to tab as needed.
341                  */
342                 for (i = 0; i < last; i++) {
343                         char ch = src[i];
344                         if (ch != ' ') {
345                                 consecutive_spaces = 0;
346                                 strbuf_addch(dst, ch);
347                         } else {
348                                 consecutive_spaces++;
349                                 if (consecutive_spaces == ws_tab_width(ws_rule)) {
350                                         strbuf_addch(dst, '\t');
351                                         consecutive_spaces = 0;
352                                 }
353                         }
354                 }
355                 while (0 < consecutive_spaces--)
356                         strbuf_addch(dst, ' ');
357                 len -= last;
358                 src += last;
359                 fixed = 1;
360         } else if ((ws_rule & WS_TAB_IN_INDENT) && last_tab_in_indent >= 0) {
361                 /* Expand tabs into spaces */
362                 int start = dst->len;
363                 int last = last_tab_in_indent + 1;
364                 for (i = 0; i < last; i++) {
365                         if (src[i] == '\t')
366                                 do {
367                                         strbuf_addch(dst, ' ');
368                                 } while ((dst->len - start) % ws_tab_width(ws_rule));
369                         else
370                                 strbuf_addch(dst, src[i]);
371                 }
372                 len -= last;
373                 src += last;
374                 fixed = 1;
375         }
376
377         strbuf_add(dst, src, len);
378         if (add_cr_to_tail)
379                 strbuf_addch(dst, '\r');
380         if (add_nl_to_tail)
381                 strbuf_addch(dst, '\n');
382         if (fixed && error_count)
383                 (*error_count)++;
384 }