1 // detector.c written by Mitchell Foral. mitchell<att>caladbolg.net.
2 // See COPYING for license information.
12 #include "languages.h"
15 #include "hash/cppheader_hash.h"
16 #include "hash/disambiguatefunc_hash.h"
17 #include "hash/extension_hash.h"
18 #include "hash/filename_hash.h"
20 #define ISBINARY(x) (x[0] == '\1')
21 #define ISAMBIGUOUS(x) (x[0] == '\2')
22 #define DISAMBIGUATEWHAT(x) &x[1]
26 # define mkstemp(p) _open(_mktemp(p), _O_CREAT | _O_SHORT_LIVED | _O_EXCL)
29 /* Parse the output of libmagic and return a language, if any.
30 * The contents of string `line` will be destroyed.
32 const char *magic_parse(char *line) {
34 char *eol = line + strlen(line);
39 for (p = line; p < eol; p++) *p = tolower(*p);
40 p = strstr(line, "script text"); // Example: "script text executable for perl -w,"
42 p = strstr(line, "for ");
46 while (isalnum(*pe)) pe++;
48 strncpy(buf, p, length);
50 struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
51 if (rl) return(rl->name);
55 p = strstr(line, "script"); // Example: "PHP script, ASCII text"
60 while (*p == ' ') p--;
61 while (p != line && isalnum(*(p - 1))) p--;
62 if (p != line && *(p - 1) == '-') p--;
63 } while (*p == '-'); // Skip over any switches.
65 strncpy(buf, p, length);
67 struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
68 if (rl) return(rl->name);
69 } else if (strstr(line, "xml")) return(LANG_XML);
74 /* Use libmagic to detect file language
76 const char *detect_language_magic(SourceFile *sourcefile) {
79 magic_t cookie = magic_open(MAGIC_NONE);
81 fprintf(stderr, "libmagic: %s\n", magic_error(cookie));
84 if (magic_load(cookie, NULL) != 0) {
85 fprintf(stderr, "libmagic: %s\n", magic_error(cookie));
90 if (sourcefile->diskpath) {
91 const char *magic = magic_file(cookie, sourcefile->diskpath);
93 fprintf(stderr, "libmagic: %s\n", magic_error(cookie));
97 strncpy(line, magic, sizeof(line));
98 line[sizeof(line)-1] = '\0';
100 char *p = ohcount_sourcefile_get_contents(sourcefile);
103 const char *magic = magic_buffer(cookie, p, strlen(p));
105 fprintf(stderr, "libmagic: %s\n", magic_error(cookie));
109 strncpy(line, magic, sizeof(line));
110 line[sizeof(line)-1] = '\0';
115 return magic_parse(line);
118 /* Use all available means to detect file language
120 const char *ohcount_detect_language(SourceFile *sourcefile) {
121 const char *language = NULL;
125 // Attempt to detect based on file extension.
126 length = strlen(sourcefile->ext);
127 struct ExtensionMap *re = ohcount_hash_language_from_ext(sourcefile->ext,
129 if (re) language = re->value;
131 // Try the lower-case version of this extension.
132 char lowerext[length + 1];
133 strncpy(lowerext, sourcefile->ext, length);
134 lowerext[length] = '\0';
135 for (p = lowerext; p < lowerext + length; p++) *p = tolower(*p);
136 struct ExtensionMap *re = ohcount_hash_language_from_ext(lowerext, length);
137 if (re) language = re->value;
140 // Attempt to detect using Emacs mode line (/^-\*-\s*mode[\s:]*\w/i).
142 char line[81] = { '\0' }, buf[81];
143 p = ohcount_sourcefile_get_contents(sourcefile);
145 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
147 // Get the contents of the first line.
148 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
149 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
150 strncpy(line, p, length);
152 if (*line == '#' && *(line + 1) == '!') {
153 // First line was sh-bang; loop to get contents of second line.
154 while (*pe == '\r' || *pe == '\n') pe++;
158 p = strstr(line, "-*-");
161 while (*p == ' ' || *p == '\t') p++;
162 // detect "mode" (any capitalization)
163 if (strncasecmp(p, "mode", 4) == 0) {
165 while (*p == ' ' || *p == '\t' || *p == ':') p++;
168 while (!isspace(*pe) && *pe != ';' && pe != strstr(pe, "-*-")) pe++;
169 length = (pe - p <= sizeof(buf)) ? pe - p : sizeof(buf);
170 strncpy(buf, p, length);
173 // Special case for "c" or "C" emacs mode header: always means C, not C++
174 if (strcasecmp(buf, "c") == 0) {
178 // First try it with the language name.
179 struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
180 if (rl) language = rl->name;
182 // Then try it with the extension table.
183 struct ExtensionMap *re = ohcount_hash_language_from_ext(buf, length);
184 if (re) language = re->value;
187 // Try the lower-case version of this modeline.
188 for (pe = buf; pe < buf+length; pe++) *pe = tolower(*pe);
189 // First try it with the language name.
190 rl = ohcount_hash_language_from_name(buf, length);
191 if (rl) language = rl->name;
194 // Then try it with the extension table.
195 struct ExtensionMap *re = ohcount_hash_language_from_ext(buf, length);
196 if (re) language = re->value;
201 // Attempt to detect based on filename.
203 length = strlen(sourcefile->filename);
204 struct FilenameMap *rf =
205 ohcount_hash_language_from_filename(sourcefile->filename, length);
206 if (rf) language = rf->value;
209 // Attempt to detect based on Unix 'file' command.
211 language = detect_language_magic(sourcefile);
215 if (ISAMBIGUOUS(language)) {
216 // Call the appropriate function for disambiguation.
217 length = strlen(DISAMBIGUATEWHAT(language));
218 struct DisambiguateFuncsMap *rd =
219 ohcount_hash_disambiguate_func_from_id(DISAMBIGUATEWHAT(language),
221 if (rd) language = rd->value(sourcefile);
222 } else language = ISBINARY(language) ? NULL : language;
227 const char *disambiguate_aspx(SourceFile *sourcefile) {
228 char *p = ohcount_sourcefile_get_contents(sourcefile);
229 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
230 for (; p < eof; p++) {
231 // /<%@\s*Page[^>]+Language="VB"[^>]+%>/
232 p = strstr(p, "<%@");
235 char *pe = strstr(p, "%>");
238 const int length = pe - p;
240 strncpy(buf, p, length);
242 char *eol = buf + strlen(buf);
243 for (p = buf; p < eol; p++) *p = tolower(*p);
245 while (*p == ' ' || *p == '\t') p++;
246 if (strncmp(p, "page", 4) == 0) {
248 if (strstr(p, "language=\"vb\""))
256 // 6502 assembly or XML-based Advanced Stream Redirector ?
257 const char *disambiguate_asx(SourceFile *sourcefile) {
258 char *p = ohcount_sourcefile_get_contents(sourcefile);
259 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
260 for (; p < eof; p++) {
275 return LANG_ASSEMBLER;
278 return LANG_ASSEMBLER; // only blanks - not valid XML, may be valid asm
281 const char *disambiguate_b(SourceFile *sourcefile) {
282 char *p = ohcount_sourcefile_get_contents(sourcefile);
283 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
285 // /(implement[ \t])|(include[ \t]+"[^"]*";)|
286 // ((return|break|continue).*;|(pick|case).*\{)/
287 if (strncmp(p, "implement", 9) == 0 &&
288 (*(p + 9) == ' ' || *(p + 9) == '\t'))
290 else if (strncmp(p, "include", 7) == 0 &&
291 (*(p + 7) == ' ' || *(p + 7) == '\t')) {
293 while (*p == ' ' || *p == '\t') p++;
295 while (*p != '"' && p < eof) p++;
296 if (*p == '"' && *(p + 1) == ';')
299 } else if (strncmp(p, "return", 6) == 0 ||
300 strncmp(p, "break", 5) == 0 ||
301 strncmp(p, "continue", 8) == 0) {
304 } else if (strncmp(p, "pick", 4) == 0 ||
305 strncmp(p, "case", 4) == 0) {
311 return disambiguate_basic(sourcefile);
314 const char *disambiguate_basic(SourceFile *sourcefile) {
318 // Attempt to detect based on file contents.
320 p = ohcount_sourcefile_get_contents(sourcefile);
322 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
324 // Get a line at a time.
325 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
326 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
327 strncpy(line, p, length);
335 while (isdigit(*p)) p++;
336 if (*p == ' ' || *p == '\t') {
338 while (*p == ' ' || *p == '\t') p++;
340 return LANG_CLASSIC_BASIC;
346 while (*pe == '\r' || *pe == '\n') pe++;
350 // Attempt to detect from associated VB files in file context.
351 char **filenames = sourcefile->filenames;
354 for (i = 0; filenames[i] != NULL; i++) {
355 pe = filenames[i] + strlen(filenames[i]);
357 while (p > filenames[i] && *(p - 1) != '.') p--;
360 (strncmp(p, "frm", length) == 0 ||
361 strncmp(p, "frx", length) == 0 ||
362 strncmp(p, "vba", length) == 0 ||
363 strncmp(p, "vbp", length) == 0 ||
364 strncmp(p, "vbs", length) == 0)) {
365 return LANG_VISUALBASIC;
370 return LANG_STRUCTURED_BASIC;
373 const char *disambiguate_cs(SourceFile *sourcefile) {
374 // Attempt to detect based on file contents.
375 char *contents = ohcount_sourcefile_get_contents(sourcefile);
376 if (contents && strstr(contents, "<?cs"))
377 return LANG_CLEARSILVER_TEMPLATE;
382 const char *disambiguate_def(SourceFile *sourcefile) {
383 char *p = ohcount_sourcefile_get_contents(sourcefile);
384 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
385 for (; p < eof; p++) {
393 if (p[1] == '*') // Modula-2 comment
397 if (strncmp(p, "DEFINITION", 10) == 0) // Modula-2 "DEFINITION MODULE"
401 return NULL; // not Modula-2
404 return NULL; // only blanks
407 const char *disambiguate_fortran(SourceFile *sourcefile) {
410 p = ohcount_sourcefile_get_contents(sourcefile);
411 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
413 // Try the assumption of a fixed formatted source code, and return free
414 // format if anything opposes this assumption.
415 // Rules based on the Fortran standard, page 47:
416 // ftp://ftp.nag.co.uk/sc22wg5/N1801-N1850/N1830.pdf
420 // Process a single line; tabulators are not valid in Fortran code
421 // but some compilers accept them to skip the first 5 columns.
422 if (*p == ' ' || *p == '\t' || isdigit(*p)) {
423 // Only consider lines starting with a blank or digit
424 // (non-comment in fixed)
425 if (*p == '\t') i = 5;
426 blanklabel = (*p == ' ' || *p == '\t');
427 while (*p != '\r' && *p != '\n' && p < eof) {
430 blanklabel = blanklabel && (*p == ' ');
431 if ( !isdigit(*p) && *p != ' ' && *p != '!')
432 // Non-digit, non-blank, non-comment character in the label field
433 // definetly not valid fixed formatted code!
434 return LANG_FORTRANFREE;
436 if ((i == 6) && !blanklabel && *p != ' ' && *p != '0')
437 // Fixed format continuation line with non-blank label field
438 // not allowed, assume free format:
439 return LANG_FORTRANFREE;
440 // Ignore comments (a ! character in column 6 is a continuation in
442 if (*p == '!' && i != 6) {
443 while (*p != '\r' && *p != '\n' && p < eof) p++;
447 if (p < eof) {p++; i++;}
448 while (*p != '"' && *p != '\r' && *p != '\n' && p < eof) {
453 if (p < eof) {p++; i++;}
454 while (*p != '\'' && *p != '\r' && *p != '\n' && p < eof) {
458 // Check for free format line continuation
459 if (i > 6 && i <= 72 && *p == '&')
460 // Found an unquoted free format continuation character in the fixed
461 // format code section. This has to be free format.
462 return LANG_FORTRANFREE;
466 // Not a statement line in fixed format...
467 if (*p != 'C' && *p != 'c' && *p != '*' && *p != '!')
468 // Not a valid fixed form comment, has to be free formatted source
469 return LANG_FORTRANFREE;
470 // Comment in fixed form, ignore this line
471 while (*p != '\r' && *p != '\n' && p < eof) p++;
473 // Skip all line ends
474 while ((*p == '\r' || *p == '\n') && p < eof) p++;
476 // Assume fixed format if none of the lines broke the assumptions
477 return LANG_FORTRANFIXED;
480 const char *disambiguate_h(SourceFile *sourcefile) {
484 // If the directory contains a matching *.m file, likely Objective C.
485 length = strlen(sourcefile->filename);
486 if (strcmp(sourcefile->ext, "h") == 0) {
488 strncpy(path, sourcefile->filename, length);
490 *(path + length - 1) = 'm';
491 char **filenames = sourcefile->filenames;
494 for (i = 0; filenames[i] != NULL; i++)
495 if (strcmp(path, filenames[i]) == 0)
496 return LANG_OBJECTIVE_C;
500 // Attempt to detect based on file contents.
501 char line[81], buf[81];
502 bof = ohcount_sourcefile_get_contents(sourcefile);
505 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
507 // Get a line at a time.
508 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
509 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
510 strncpy(line, p, length);
512 char *eol = line + strlen(line);
515 // Look for C++ headers.
518 while (*p == ' ' || *p == '\t') p++;
519 if (strncmp(p, "include", 7) == 0 &&
520 (*(p + 7) == ' ' || *(p + 7) == '\t')) {
521 // /^#\s*include\s+[<"][^>"]+[>"]/
523 while (*p == ' ' || *p == '\t') p++;
524 if (*p == '<' || *p == '"') {
525 // Is the header file a C++ header file?
528 while (pe < eol && *pe != '>' && *pe != '"') pe++;
530 strncpy(buf, p, length);
532 if (ohcount_hash_is_cppheader(buf, length))
534 // Is the extension for the header file a C++ file?
536 while (p > line && *(p - 1) != '.') p--;
538 strncpy(buf, p, length);
540 struct ExtensionMap *re = ohcount_hash_language_from_ext(buf, length);
541 if (re && strcmp(re->value, LANG_CPP) == 0)
547 // Look for C++ keywords.
550 if (islower(*p) && p != bof && !isalnum(*(p - 1)) && *(p - 1) != '_') {
552 while (islower(*pe)) pe++;
553 if (!isalnum(*pe) && *pe != '_') {
555 strncpy(buf, p, length);
557 if (strcmp(buf, "class") == 0 ||
558 strcmp(buf, "namespace") == 0 ||
559 strcmp(buf, "template") == 0 ||
560 strcmp(buf, "typename") == 0)
569 while (*pe == '\r' || *pe == '\n') pe++;
573 // Nothing to suggest C++.
577 const char *disambiguate_in(SourceFile *sourcefile) {
580 const char *language = NULL;
582 p = sourcefile->filepath;
583 pe = p + strlen(p) - 3;
584 if (strstr(p, ".") <= pe) {
585 // Only if the filename has an extension prior to the .in
588 strncpy(buf, p, length);
590 p = ohcount_sourcefile_get_contents(sourcefile);
595 // A SourceFile's filepath and diskpath need not be the same.
596 // Here, we'll take advantage of this to set up a new SourceFile
597 // whose filepath does not have the *.in extension, but whose
598 // diskpath still points back to the original file on disk (if any).
599 SourceFile *undecorated = ohcount_sourcefile_new(buf);
600 if (sourcefile->diskpath) {
601 ohcount_sourcefile_set_diskpath(undecorated, sourcefile->diskpath);
603 ohcount_sourcefile_set_contents(undecorated, p);
604 undecorated->filenames = sourcefile->filenames;
605 language = ohcount_sourcefile_get_language(undecorated);
606 ohcount_sourcefile_free(undecorated);
611 const char *disambiguate_inc(SourceFile *sourcefile) {
612 char *p = ohcount_sourcefile_get_contents(sourcefile);
614 char *eof = p + strlen(p);
618 else if (*p == '?' && strncmp(p + 1, "php", 3) == 0)
626 const char *disambiguate_m(SourceFile *sourcefile) {
630 // Attempt to detect based on a weighted heuristic of file contents.
631 int matlab_score = 0;
632 int objective_c_score = 0;
634 int octave_syntax_detected = 0;
636 int i, has_h_headers = 0, has_c_files = 0;
637 char **filenames = sourcefile->filenames;
639 for (i = 0; filenames[i] != NULL; i++) {
643 if (*(pe - 4) == '.' && *(pe - 3) == 'c' &&
644 ((*(pe - 2) == 'p' && *(pe - 1) == 'p') ||
645 (*(pe - 2) == '+' && *(pe - 1) == '+') ||
646 (*(pe - 2) == 'x' && *(pe - 1) == 'x'))) {
648 break; // short circuit
650 } else if (pe - p >= 3) {
651 if (*(pe - 3) == '.' && *(pe - 2) == 'c' && *(pe - 1) == 'c') {
653 break; // short circuit
655 } else if (pe - p >= 2) {
656 if (*(pe - 2) == '.') {
657 if (*(pe - 1) == 'h')
659 else if (*(pe - 1) == 'c' || *(pe - 1) == 'C') {
661 break; // short circuit
667 if (has_h_headers && !has_c_files)
668 objective_c_score += 5;
670 char line[81], buf[81];
671 p = ohcount_sourcefile_get_contents(sourcefile);
673 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
675 // Get a line at a time.
676 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
677 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
678 strncpy(line, p, length);
680 char *eol = line + strlen(line);
683 // Look for tell-tale lines.
685 while (*p == ' ' || *p == '\t') p++;
686 if (*p == '%') { // Matlab comment
688 } else if (*p == '#' && strncmp(p, "#import", 7) == 0) { // Objective C
690 } else if (*p == '#') { // Limbo or Octave comment
691 while (*p == '#') p++;
692 if (*p == ' ' || *p == '\t') {
695 octave_syntax_detected = 1;
697 } else if (*p == '/' && *(p + 1) == '/' || *(p + 1) == '*') {
698 objective_c_score++; // Objective C comment
699 } else if (*p == '+' || *p == '-') { // Objective C method signature
701 } else if (*p == '@' || *p == '#') { // Objective C method signature
702 if (strncmp(p, "@implementation", 15) == 0 ||
703 strncmp(p, "@interface", 10) == 0)
705 } else if (strncmp(p, "function", 8) == 0) { // Matlab or Octave function
707 while (*p == ' ' || *p == '\t') p++;
710 } else if (strncmp(p, "include", 7) == 0) { // Limbo include
711 // /^include[ \t]+"[^"]+\.m";/
713 if (*p == ' ' || *p == '\t') {
714 while (*p == ' ' || *p == '\t') p++;
716 while (*p != '"' && p < eol) p++;
717 if (*p == '"' && *(p - 2) == '.' && *(p - 1) == 'm')
723 // Look for Octave keywords.
726 if (islower(*p) && p != line && !isalnum(*(p - 1))) {
728 while (islower(*pe) || *pe == '_') pe++;
731 strncpy(buf, p, length);
733 if (strcmp(buf, "end_try_catch") == 0 ||
734 strcmp(buf, "end_unwind_protect") == 0 ||
735 strcmp(buf, "endfunction") == 0 ||
736 strcmp(buf, "endwhile") == 0)
737 octave_syntax_detected = 1;
743 // Look for Limbo declarations
746 if (*p == ':' && (*(p + 1) == ' ' || *(p + 1) == '\t')) {
747 // /:[ \t]+(module|adt|fn ?\(|con[ \t])/
749 if (strncmp(p, "module", 6) == 0 && !isalnum(*(p + 6)) ||
750 strncmp(p, "adt", 3) == 0 && !isalnum(*(p + 3)) ||
751 strncmp(p, "fn", 2) == 0 &&
752 (*(p + 2) == ' ' && *(p + 3) == '(' || *(p + 2) == '(') ||
753 strncmp(p, "con", 3) == 0 &&
754 (*(p + 3) == ' ' || *(p + 3) == '\t'))
761 while (*pe == '\r' || *pe == '\n') pe++;
765 if (limbo_score > objective_c_score && limbo_score > matlab_score)
767 else if (objective_c_score > matlab_score)
768 return LANG_OBJECTIVE_C;
770 return octave_syntax_detected ? LANG_OCTAVE : LANG_MATLAB;
775 // strnlen is not available on OS X, so we roll our own
776 size_t mystrnlen(const char *begin, size_t maxlen) {
779 const char *end = memchr(begin, '\0', maxlen);
780 return end ? (end - begin) : maxlen;
783 const char *disambiguate_pp(SourceFile *sourcefile) {
784 char *p = ohcount_sourcefile_get_contents(sourcefile);
789 /* prepare regular expressions */
793 /* try harder with optional spaces */
795 keyword = pcre_compile("^\\s*(ensure|content|notify|require|source)\\s+=>",
796 PCRE_MULTILINE, &error, &erroffset, NULL);
798 if (pcre_exec(keyword, NULL, p, mystrnlen(p, 10000), 0, 0, NULL, 0) > -1)
801 /* check for standard puppet constructs */
803 construct = pcre_compile("^\\s*(define\\s+[\\w:-]+\\s*\\(|class\\s+[\\w:-]+(\\s+inherits\\s+[\\w:-]+)?\\s*{|node\\s+\\'?[\\w:\\.-]+\\'?\\s*{|import\\s+\")",
804 PCRE_MULTILINE, &error, &erroffset, NULL);
806 if (pcre_exec(construct, NULL, p, mystrnlen(p, 10000), 0, 0, NULL, 0) > -1)
812 const char *disambiguate_pl(SourceFile *sourcefile) {
813 char *contents = ohcount_sourcefile_get_contents(sourcefile);
817 // Check for a perl shebang on first line of file
820 pcre *re = pcre_compile("#![^\\n]*perl", PCRE_CASELESS, &error, &erroffset, NULL);
821 if (pcre_exec(re, NULL, contents, mystrnlen(contents, 100), 0, PCRE_ANCHORED, NULL, 0) > -1)
824 // Check for prolog :- rules
825 if (strstr(contents, ":- ") || strstr(contents, ":-\n"))
832 #define QMAKE_SOURCES_SPACE "SOURCES +="
833 #define QMAKE_SOURCES "SOURCES+="
834 #define QMAKE_CONFIG_SPACE "CONFIG +="
835 #define QMAKE_CONFIG "CONFIG+="
837 const char *disambiguate_pro(SourceFile *sourcefile) {
838 char *p = ohcount_sourcefile_get_contents(sourcefile);
839 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
840 for (; p < eof; p++) {
841 if (strncmp(p, QMAKE_SOURCES_SPACE, strlen(QMAKE_SOURCES_SPACE)) == 0 ||
842 strncmp(p, QMAKE_SOURCES, strlen(QMAKE_SOURCES)) == 0 ||
843 strncmp(p, QMAKE_CONFIG_SPACE, strlen(QMAKE_CONFIG_SPACE)) == 0 ||
844 strncmp(p, QMAKE_CONFIG, strlen(QMAKE_CONFIG)) == 0)
845 return LANG_MAKE; // really QMAKE
847 return LANG_IDL_PVWAVE;
850 const char *disambiguate_r(SourceFile *sourcefile) {
851 char *contents = ohcount_sourcefile_get_contents(sourcefile);
855 char *eof = contents + ohcount_sourcefile_get_contents_size(sourcefile);
857 // Detect REBOL by looking for the occurence of "rebol" in the contents
858 // (case-insensitive). Correct REBOL scripts have a "REBOL [...]" header
860 char *needle = "rebol";
861 int len = strlen(needle);
862 for (; contents < eof - len; ++contents)
863 if (tolower(*contents) == *needle &&
864 !strncasecmp(contents, needle, len))
870 const char *disambiguate_rs(SourceFile *sourcefile) {
871 // .rs is normally Rust, but it might be RenderScript. RenderScript is
872 // expected to have a "#pragma version(1)" line at the very start, possibly
873 // after comments. To help with supporting future versions of RenderScript,
874 // we'll skip the number part.
875 // As RenderScript is not implemented in ohcount yet, it's returned as NULL.
876 char *contents = ohcount_sourcefile_get_contents(sourcefile);
881 char *needle = "\n#pragma version";
882 int len = strlen(needle);
883 if (strncasecmp(contents, needle + 1, len - 1) == 0) {
884 // "#pragma version" at the very start of the file is RenderScript.
888 char *eof = contents + ohcount_sourcefile_get_contents_size(sourcefile);
890 for (; contents < eof - len; ++contents) {
891 if (!strncmp(contents, needle, len)) {
899 const char *disambiguate_st(SourceFile *sourcefile) {
903 // Attempt to detect based on file contents.
904 int found_assignment = 0, found_block_start = 0, found_block_end = 0;
907 p = ohcount_sourcefile_get_contents(sourcefile);
909 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
911 // Get a line at a time.
912 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
913 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
914 strncpy(line, p, length);
916 char *eol = line + strlen(line);
919 for (p = line; p < eol; p++) {
922 while (p < eol && (*p == ' ' || *p == '\t')) p++;
924 found_assignment = 1;
926 found_block_start = 1;
927 } else if (*p == ']' && *(p + 1) == '.') found_block_end = 1;
928 if (found_assignment && found_block_start && found_block_end)
929 return LANG_SMALLTALK;
934 while (*pe == '\r' || *pe == '\n') pe++;
941 int ohcount_is_binary_filename(const char *filename) {
942 char *p = (char *)filename + strlen(filename);
943 while (p > filename && *(p - 1) != '.') p--;
945 struct ExtensionMap *re;
946 int length = strlen(p);
947 re = ohcount_hash_language_from_ext(p, length);
948 if (re) return ISBINARY(re->value);
949 // Try the lower-case version of this extension.
950 char lowerext[length];
951 strncpy(lowerext, p, length);
952 lowerext[length] = '\0';
953 for (p = lowerext; p < lowerext + length; p++) *p = tolower(*p);
954 re = ohcount_hash_language_from_ext(lowerext, length);
955 if (re) return ISBINARY(re->value);