1 // detector.c written by Mitchell Foral. mitchell<att>caladbolg.net.
2 // See COPYING for license information.
12 #include "languages.h"
15 #include "hash/cppheader_hash.h"
16 #include "hash/disambiguatefunc_hash.h"
17 #include "hash/extension_hash.h"
18 #include "hash/filename_hash.h"
20 #define ISBINARY(x) (x[0] == '\1')
21 #define ISAMBIGUOUS(x) (x[0] == '\2')
22 #define DISAMBIGUATEWHAT(x) &x[1]
26 # define mkstemp(p) _open(_mktemp(p), _O_CREAT | _O_SHORT_LIVED | _O_EXCL)
29 /* Parse the output of libmagic and return a language, if any.
30 * The contents of string `line` will be destroyed.
32 const char *magic_parse(char *line) {
34 char *eol = line + strlen(line);
39 for (p = line; p < eol; p++) *p = tolower(*p);
40 p = strstr(line, "script text");
41 if (p && p == line) { // /^script text(?: executable)? for \w/
42 p = strstr(line, "for ");
46 while (isalnum(*pe)) pe++;
48 strncpy(buf, p, length);
50 struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
51 if (rl) return(rl->name);
53 } else if (p) { // /(\w+)(?: -\w+)* script text/
57 while (*p == ' ') p--;
58 while (p != line && isalnum(*(p - 1))) p--;
59 if (p != line && *(p - 1) == '-') p--;
60 } while (*p == '-'); // Skip over any switches.
62 strncpy(buf, p, length);
64 struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
65 if (rl) return(rl->name);
66 } else if (strstr(line, "xml")) return(LANG_XML);
71 /* Use libmagic to detect file language
73 const char *detect_language_magic(SourceFile *sourcefile) {
76 magic_t cookie = magic_open(MAGIC_NONE);
78 fprintf(stderr, "libmagic: %s\n", magic_error(cookie));
81 if (magic_load(cookie, NULL) != 0) {
82 fprintf(stderr, "libmagic: %s\n", magic_error(cookie));
87 if (sourcefile->diskpath) {
88 const char *magic = magic_file(cookie, sourcefile->diskpath);
90 fprintf(stderr, "libmagic: %s\n", magic_error(cookie));
94 strncpy(line, magic, sizeof(line));
95 line[sizeof(line)-1] = '\0';
97 char *p = ohcount_sourcefile_get_contents(sourcefile);
100 const char *magic = magic_buffer(cookie, p, strlen(p));
102 fprintf(stderr, "libmagic: %s\n", magic_error(cookie));
106 strncpy(line, magic, sizeof(line));
107 line[sizeof(line)-1] = '\0';
112 return magic_parse(line);
115 /* Use all available means to detect file language
117 const char *ohcount_detect_language(SourceFile *sourcefile) {
118 const char *language = NULL;
122 // Attempt to detect using Emacs mode line (/^-\*-\s*mode[\s:]*\w/i).
123 char line[81] = { '\0' }, buf[81];
124 p = ohcount_sourcefile_get_contents(sourcefile);
126 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
128 // Get the contents of the first line.
129 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
130 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
131 strncpy(line, p, length);
133 if (*line == '#' && *(line + 1) == '!') {
134 // First line was sh-bang; loop to get contents of second line.
135 while (*pe == '\r' || *pe == '\n') pe++;
139 p = strstr(line, "-*-");
142 while (*p == ' ' || *p == '\t') p++;
143 // detect "mode" (any capitalization)
144 if (strncasecmp(p, "mode", 4) == 0) {
146 while (*p == ' ' || *p == '\t' || *p == ':') p++;
149 while (!isspace(*pe) && *pe != ';' && pe != strstr(pe, "-*-")) pe++;
150 length = (pe - p <= sizeof(buf)) ? pe - p : sizeof(buf);
151 strncpy(buf, p, length);
154 // Special case for "c" or "C" emacs mode header: always means C, not C++
155 if (strcasecmp(buf, "c") == 0) {
159 // First try it with the language name.
160 struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
161 if (rl) language = rl->name;
163 // Then try it with the extension table.
164 struct ExtensionMap *re = ohcount_hash_language_from_ext(buf, length);
165 if (re) language = re->value;
168 // Try the lower-case version of this modeline.
169 for (pe = buf; pe < buf+length; pe++) *pe = tolower(*pe);
170 // First try it with the language name.
171 rl = ohcount_hash_language_from_name(buf, length);
172 if (rl) language = rl->name;
175 // Then try it with the extension table.
176 struct ExtensionMap *re = ohcount_hash_language_from_ext(buf, length);
177 if (re) language = re->value;
181 // Attempt to detect based on file extension.
183 length = strlen(sourcefile->ext);
184 struct ExtensionMap *re = ohcount_hash_language_from_ext(sourcefile->ext,
186 if (re) language = re->value;
188 // Try the lower-case version of this extension.
189 char lowerext[length + 1];
190 strncpy(lowerext, sourcefile->ext, length);
191 lowerext[length] = '\0';
192 for (p = lowerext; p < lowerext + length; p++) *p = tolower(*p);
193 struct ExtensionMap *re = ohcount_hash_language_from_ext(lowerext, length);
194 if (re) language = re->value;
198 // Attempt to detect based on filename.
200 length = strlen(sourcefile->filename);
201 struct FilenameMap *rf =
202 ohcount_hash_language_from_filename(sourcefile->filename, length);
203 if (rf) language = rf->value;
206 // Attempt to detect based on Unix 'file' command.
208 language = detect_language_magic(sourcefile);
212 if (ISAMBIGUOUS(language)) {
213 // Call the appropriate function for disambiguation.
214 length = strlen(DISAMBIGUATEWHAT(language));
215 struct DisambiguateFuncsMap *rd =
216 ohcount_hash_disambiguate_func_from_id(DISAMBIGUATEWHAT(language),
218 if (rd) language = rd->value(sourcefile);
219 } else language = ISBINARY(language) ? NULL : language;
224 const char *disambiguate_aspx(SourceFile *sourcefile) {
225 char *p = ohcount_sourcefile_get_contents(sourcefile);
226 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
227 for (; p < eof; p++) {
228 // /<%@\s*Page[^>]+Language="VB"[^>]+%>/
229 p = strstr(p, "<%@");
232 char *pe = strstr(p, "%>");
235 const int length = pe - p;
237 strncpy(buf, p, length);
239 char *eol = buf + strlen(buf);
240 for (p = buf; p < eol; p++) *p = tolower(*p);
242 while (*p == ' ' || *p == '\t') p++;
243 if (strncmp(p, "page", 4) == 0) {
245 if (strstr(p, "language=\"vb\""))
253 // 6502 assembly or XML-based Advanced Stream Redirector ?
254 const char *disambiguate_asx(SourceFile *sourcefile) {
255 char *p = ohcount_sourcefile_get_contents(sourcefile);
256 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
257 for (; p < eof; p++) {
272 return LANG_ASSEMBLER;
275 return LANG_ASSEMBLER; // only blanks - not valid XML, may be valid asm
278 const char *disambiguate_b(SourceFile *sourcefile) {
279 char *p = ohcount_sourcefile_get_contents(sourcefile);
280 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
282 // /(implement[ \t])|(include[ \t]+"[^"]*";)|
283 // ((return|break|continue).*;|(pick|case).*\{)/
284 if (strncmp(p, "implement", 9) == 0 &&
285 (*(p + 9) == ' ' || *(p + 9) == '\t'))
287 else if (strncmp(p, "include", 7) == 0 &&
288 (*(p + 7) == ' ' || *(p + 7) == '\t')) {
290 while (*p == ' ' || *p == '\t') p++;
292 while (*p != '"' && p < eof) p++;
293 if (*p == '"' && *(p + 1) == ';')
296 } else if (strncmp(p, "return", 6) == 0 ||
297 strncmp(p, "break", 5) == 0 ||
298 strncmp(p, "continue", 8) == 0) {
301 } else if (strncmp(p, "pick", 4) == 0 ||
302 strncmp(p, "case", 4) == 0) {
308 return disambiguate_basic(sourcefile);
311 const char *disambiguate_basic(SourceFile *sourcefile) {
315 // Attempt to detect based on file contents.
317 p = ohcount_sourcefile_get_contents(sourcefile);
319 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
321 // Get a line at a time.
322 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
323 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
324 strncpy(line, p, length);
332 while (isdigit(*p)) p++;
333 if (*p == ' ' || *p == '\t') {
335 while (*p == ' ' || *p == '\t') p++;
337 return LANG_CLASSIC_BASIC;
343 while (*pe == '\r' || *pe == '\n') pe++;
347 // Attempt to detect from associated VB files in file context.
348 char **filenames = sourcefile->filenames;
351 for (i = 0; filenames[i] != NULL; i++) {
352 pe = filenames[i] + strlen(filenames[i]);
354 while (p > filenames[i] && *(p - 1) != '.') p--;
357 (strncmp(p, "frm", length) == 0 ||
358 strncmp(p, "frx", length) == 0 ||
359 strncmp(p, "vba", length) == 0 ||
360 strncmp(p, "vbp", length) == 0 ||
361 strncmp(p, "vbs", length) == 0)) {
362 return LANG_VISUALBASIC;
367 return LANG_STRUCTURED_BASIC;
370 const char *disambiguate_cs(SourceFile *sourcefile) {
371 // Attempt to detect based on file contents.
372 char *contents = ohcount_sourcefile_get_contents(sourcefile);
373 if (contents && strstr(contents, "<?cs"))
374 return LANG_CLEARSILVER_TEMPLATE;
379 const char *disambiguate_def(SourceFile *sourcefile) {
380 char *p = ohcount_sourcefile_get_contents(sourcefile);
381 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
382 for (; p < eof; p++) {
390 if (p[1] == '*') // Modula-2 comment
394 if (strncmp(p, "DEFINITION", 10) == 0) // Modula-2 "DEFINITION MODULE"
398 return NULL; // not Modula-2
401 return NULL; // only blanks
404 const char *disambiguate_fortran(SourceFile *sourcefile) {
407 p = ohcount_sourcefile_get_contents(sourcefile);
408 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
410 // Try the assumption of a fixed formatted source code, and return free
411 // format if anything opposes this assumption.
412 // Rules based on the Fortran standard, page 47:
413 // ftp://ftp.nag.co.uk/sc22wg5/N1801-N1850/N1830.pdf
417 // Process a single line; tabulators are not valid in Fortran code
418 // but some compilers accept them to skip the first 5 columns.
419 if (*p == ' ' || *p == '\t' || isdigit(*p)) {
420 // Only consider lines starting with a blank or digit
421 // (non-comment in fixed)
422 if (*p == '\t') i = 5;
423 blanklabel = (*p == ' ' || *p == '\t');
424 while (*p != '\r' && *p != '\n' && p < eof) {
427 blanklabel = blanklabel && (*p == ' ');
428 if ( !isdigit(*p) && *p != ' ' && *p != '!')
429 // Non-digit, non-blank, non-comment character in the label field
430 // definetly not valid fixed formatted code!
431 return LANG_FORTRANFREE;
433 if ((i == 6) && !blanklabel && *p != ' ' && *p != '0')
434 // Fixed format continuation line with non-blank label field
435 // not allowed, assume free format:
436 return LANG_FORTRANFREE;
437 // Ignore comments (a ! character in column 6 is a continuation in
439 if (*p == '!' && i != 6) {
440 while (*p != '\r' && *p != '\n' && p < eof) p++;
444 if (p < eof) {p++; i++;}
445 while (*p != '"' && *p != '\r' && *p != '\n' && p < eof) {
450 if (p < eof) {p++; i++;}
451 while (*p != '\'' && *p != '\r' && *p != '\n' && p < eof) {
455 // Check for free format line continuation
456 if (i > 6 && i <= 72 && *p == '&')
457 // Found an unquoted free format continuation character in the fixed
458 // format code section. This has to be free format.
459 return LANG_FORTRANFREE;
463 // Not a statement line in fixed format...
464 if (*p != 'C' && *p != 'c' && *p != '*' && *p != '!')
465 // Not a valid fixed form comment, has to be free formatted source
466 return LANG_FORTRANFREE;
467 // Comment in fixed form, ignore this line
468 while (*p != '\r' && *p != '\n' && p < eof) p++;
470 // Skip all line ends
471 while ((*p == '\r' || *p == '\n') && p < eof) p++;
473 // Assume fixed format if none of the lines broke the assumptions
474 return LANG_FORTRANFIXED;
477 const char *disambiguate_h(SourceFile *sourcefile) {
481 // If the directory contains a matching *.m file, likely Objective C.
482 length = strlen(sourcefile->filename);
483 if (strcmp(sourcefile->ext, "h") == 0) {
485 strncpy(path, sourcefile->filename, length);
487 *(path + length - 1) = 'm';
488 char **filenames = sourcefile->filenames;
491 for (i = 0; filenames[i] != NULL; i++)
492 if (strcmp(path, filenames[i]) == 0)
493 return LANG_OBJECTIVE_C;
497 // Attempt to detect based on file contents.
498 char line[81], buf[81];
499 bof = ohcount_sourcefile_get_contents(sourcefile);
502 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
504 // Get a line at a time.
505 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
506 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
507 strncpy(line, p, length);
509 char *eol = line + strlen(line);
512 // Look for C++ headers.
515 while (*p == ' ' || *p == '\t') p++;
516 if (strncmp(p, "include", 7) == 0 &&
517 (*(p + 7) == ' ' || *(p + 7) == '\t')) {
518 // /^#\s*include\s+[<"][^>"]+[>"]/
520 while (*p == ' ' || *p == '\t') p++;
521 if (*p == '<' || *p == '"') {
522 // Is the header file a C++ header file?
525 while (pe < eol && *pe != '>' && *pe != '"') pe++;
527 strncpy(buf, p, length);
529 if (ohcount_hash_is_cppheader(buf, length))
531 // Is the extension for the header file a C++ file?
533 while (p > line && *(p - 1) != '.') p--;
535 strncpy(buf, p, length);
537 struct ExtensionMap *re = ohcount_hash_language_from_ext(buf, length);
538 if (re && strcmp(re->value, LANG_CPP) == 0)
544 // Look for C++ keywords.
547 if (islower(*p) && p != bof && !isalnum(*(p - 1)) && *(p - 1) != '_') {
549 while (islower(*pe)) pe++;
550 if (!isalnum(*pe) && *pe != '_') {
552 strncpy(buf, p, length);
554 if (strcmp(buf, "class") == 0 ||
555 strcmp(buf, "namespace") == 0 ||
556 strcmp(buf, "template") == 0 ||
557 strcmp(buf, "typename") == 0)
566 while (*pe == '\r' || *pe == '\n') pe++;
570 // Nothing to suggest C++.
574 const char *disambiguate_in(SourceFile *sourcefile) {
577 const char *language = NULL;
579 p = sourcefile->filepath;
580 pe = p + strlen(p) - 3;
581 if (strstr(p, ".") <= pe) {
582 // Only if the filename has an extension prior to the .in
585 strncpy(buf, p, length);
587 p = ohcount_sourcefile_get_contents(sourcefile);
592 // A SourceFile's filepath and diskpath need not be the same.
593 // Here, we'll take advantage of this to set up a new SourceFile
594 // whose filepath does not have the *.in extension, but whose
595 // diskpath still points back to the original file on disk (if any).
596 SourceFile *undecorated = ohcount_sourcefile_new(buf);
597 if (sourcefile->diskpath) {
598 ohcount_sourcefile_set_diskpath(undecorated, sourcefile->diskpath);
600 ohcount_sourcefile_set_contents(undecorated, p);
601 undecorated->filenames = sourcefile->filenames;
602 language = ohcount_sourcefile_get_language(undecorated);
603 ohcount_sourcefile_free(undecorated);
608 const char *disambiguate_inc(SourceFile *sourcefile) {
609 char *p = ohcount_sourcefile_get_contents(sourcefile);
611 char *eof = p + strlen(p);
615 else if (*p == '?' && strncmp(p + 1, "php", 3) == 0)
623 const char *disambiguate_m(SourceFile *sourcefile) {
627 // Attempt to detect based on a weighted heuristic of file contents.
628 int matlab_score = 0;
629 int objective_c_score = 0;
631 int octave_syntax_detected = 0;
633 int i, has_h_headers = 0, has_c_files = 0;
634 char **filenames = sourcefile->filenames;
636 for (i = 0; filenames[i] != NULL; i++) {
640 if (*(pe - 4) == '.' && *(pe - 3) == 'c' &&
641 ((*(pe - 2) == 'p' && *(pe - 1) == 'p') ||
642 (*(pe - 2) == '+' && *(pe - 1) == '+') ||
643 (*(pe - 2) == 'x' && *(pe - 1) == 'x'))) {
645 break; // short circuit
647 } else if (pe - p >= 3) {
648 if (*(pe - 3) == '.' && *(pe - 2) == 'c' && *(pe - 1) == 'c') {
650 break; // short circuit
652 } else if (pe - p >= 2) {
653 if (*(pe - 2) == '.') {
654 if (*(pe - 1) == 'h')
656 else if (*(pe - 1) == 'c' || *(pe - 1) == 'C') {
658 break; // short circuit
664 if (has_h_headers && !has_c_files)
665 objective_c_score += 5;
667 char line[81], buf[81];
668 p = ohcount_sourcefile_get_contents(sourcefile);
670 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
672 // Get a line at a time.
673 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
674 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
675 strncpy(line, p, length);
677 char *eol = line + strlen(line);
680 // Look for tell-tale lines.
682 while (*p == ' ' || *p == '\t') p++;
683 if (*p == '%') { // Matlab comment
685 } else if (*p == '#' && strncmp(p, "#import", 7) == 0) { // Objective C
687 } else if (*p == '#') { // Limbo or Octave comment
688 while (*p == '#') p++;
689 if (*p == ' ' || *p == '\t') {
692 octave_syntax_detected = 1;
694 } else if (*p == '/' && *(p + 1) == '/' || *(p + 1) == '*') {
695 objective_c_score++; // Objective C comment
696 } else if (*p == '+' || *p == '-') { // Objective C method signature
698 } else if (*p == '@' || *p == '#') { // Objective C method signature
699 if (strncmp(p, "@implementation", 15) == 0 ||
700 strncmp(p, "@interface", 10) == 0)
702 } else if (strncmp(p, "function", 8) == 0) { // Matlab or Octave function
704 while (*p == ' ' || *p == '\t') p++;
707 } else if (strncmp(p, "include", 7) == 0) { // Limbo include
708 // /^include[ \t]+"[^"]+\.m";/
710 if (*p == ' ' || *p == '\t') {
711 while (*p == ' ' || *p == '\t') p++;
713 while (*p != '"' && p < eol) p++;
714 if (*p == '"' && *(p - 2) == '.' && *(p - 1) == 'm')
720 // Look for Octave keywords.
723 if (islower(*p) && p != line && !isalnum(*(p - 1))) {
725 while (islower(*pe) || *pe == '_') pe++;
728 strncpy(buf, p, length);
730 if (strcmp(buf, "end_try_catch") == 0 ||
731 strcmp(buf, "end_unwind_protect") == 0 ||
732 strcmp(buf, "endfunction") == 0 ||
733 strcmp(buf, "endwhile") == 0)
734 octave_syntax_detected = 1;
740 // Look for Limbo declarations
743 if (*p == ':' && (*(p + 1) == ' ' || *(p + 1) == '\t')) {
744 // /:[ \t]+(module|adt|fn ?\(|con[ \t])/
746 if (strncmp(p, "module", 6) == 0 && !isalnum(*(p + 6)) ||
747 strncmp(p, "adt", 3) == 0 && !isalnum(*(p + 3)) ||
748 strncmp(p, "fn", 2) == 0 &&
749 (*(p + 2) == ' ' && *(p + 3) == '(' || *(p + 2) == '(') ||
750 strncmp(p, "con", 3) == 0 &&
751 (*(p + 3) == ' ' || *(p + 3) == '\t'))
758 while (*pe == '\r' || *pe == '\n') pe++;
762 if (limbo_score > objective_c_score && limbo_score > matlab_score)
764 else if (objective_c_score > matlab_score)
765 return LANG_OBJECTIVE_C;
767 return octave_syntax_detected ? LANG_OCTAVE : LANG_MATLAB;
772 // strnlen is not available on OS X, so we roll our own
773 size_t mystrnlen(const char *begin, size_t maxlen) {
774 const char *end = memchr(begin, '\0', maxlen);
775 return end ? (end - begin) : maxlen;
778 const char *disambiguate_pp(SourceFile *sourcefile) {
779 char *p = ohcount_sourcefile_get_contents(sourcefile);
780 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
782 /* prepare regular expressions */
786 re = pcre_compile("(define\\s+\\w+\\s*\\(|class \\s+\\w+\\s*{)", 0, &error, &erroffset, NULL);
788 for (; p < eof; p++) {
789 if (strncmp(p, "$include", 8) == 0 ||
790 strncmp(p, "$INCLUDE", 8) == 0 ||
791 strncmp(p, "end.", 4) == 0)
793 if (strncmp(p, "enable =>", 9) == 0 ||
794 strncmp(p, "ensure =>", 9) == 0 ||
795 strncmp(p, "content =>", 10) == 0 ||
796 strncmp(p, "source =>", 9) == 0 ||
797 strncmp(p, "include ", 8) == 0)
800 /* regexp for checking for define and class declarations */
804 rc = pcre_exec(re, NULL, p, mystrnlen(p, 100), 0, 0, ovector, 30);
813 const char *disambiguate_pl(SourceFile *sourcefile) {
814 char *contents = ohcount_sourcefile_get_contents(sourcefile);
818 // Check for a perl shebang on first line of file
821 pcre *re = pcre_compile("#![^\\n]*perl", PCRE_CASELESS, &error, &erroffset, NULL);
822 if (pcre_exec(re, NULL, contents, mystrnlen(contents, 100), 0, PCRE_ANCHORED, NULL, 0) > -1)
825 // Check for prolog :- rules
826 if (strstr(contents, ":- ") || strstr(contents, ":-\n"))
833 #define QMAKE_SOURCES_SPACE "SOURCES +="
834 #define QMAKE_SOURCES "SOURCES+="
835 #define QMAKE_CONFIG_SPACE "CONFIG +="
836 #define QMAKE_CONFIG "CONFIG+="
838 const char *disambiguate_pro(SourceFile *sourcefile) {
839 char *p = ohcount_sourcefile_get_contents(sourcefile);
840 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
841 for (; p < eof; p++) {
842 if (strncmp(p, QMAKE_SOURCES_SPACE, strlen(QMAKE_SOURCES_SPACE)) == 0 ||
843 strncmp(p, QMAKE_SOURCES, strlen(QMAKE_SOURCES)) == 0 ||
844 strncmp(p, QMAKE_CONFIG_SPACE, strlen(QMAKE_CONFIG_SPACE)) == 0 ||
845 strncmp(p, QMAKE_CONFIG, strlen(QMAKE_CONFIG)) == 0)
846 return LANG_MAKE; // really QMAKE
848 return LANG_IDL_PVWAVE;
851 const char *disambiguate_r(SourceFile *sourcefile) {
852 char *contents = ohcount_sourcefile_get_contents(sourcefile);
856 char *eof = contents + ohcount_sourcefile_get_contents_size(sourcefile);
858 // Detect REBOL by looking for the occurence of "rebol" in the contents
859 // (case-insensitive). Correct REBOL scripts have a "REBOL [...]" header
861 char *needle = "rebol";
862 int len = strlen(needle);
863 for (; contents < eof - len; ++contents)
864 if (tolower(*contents) == *needle &&
865 !strncasecmp(contents, needle, len))
871 const char *disambiguate_st(SourceFile *sourcefile) {
875 // Attempt to detect based on file contents.
876 int found_assignment = 0, found_block_start = 0, found_block_end = 0;
879 p = ohcount_sourcefile_get_contents(sourcefile);
881 char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
883 // Get a line at a time.
884 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
885 length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
886 strncpy(line, p, length);
888 char *eol = line + strlen(line);
891 for (p = line; p < eol; p++) {
894 while (p < eol && (*p == ' ' || *p == '\t')) p++;
896 found_assignment = 1;
898 found_block_start = 1;
899 } else if (*p == ']' && *(p + 1) == '.') found_block_end = 1;
900 if (found_assignment && found_block_start && found_block_end)
901 return LANG_SMALLTALK;
906 while (*pe == '\r' || *pe == '\n') pe++;
913 int ohcount_is_binary_filename(const char *filename) {
914 char *p = (char *)filename + strlen(filename);
915 while (p > filename && *(p - 1) != '.') p--;
917 struct ExtensionMap *re;
918 int length = strlen(p);
919 re = ohcount_hash_language_from_ext(p, length);
920 if (re) return ISBINARY(re->value);
921 // Try the lower-case version of this extension.
922 char lowerext[length];
923 strncpy(lowerext, p, length);
924 lowerext[length] = '\0';
925 for (p = lowerext; p < lowerext + length; p++) *p = tolower(*p);
926 re = ohcount_hash_language_from_ext(lowerext, length);
927 if (re) return ISBINARY(re->value);