From 02f4c4c3e94fcca794f11e6c649d0d68cf822866 Mon Sep 17 00:00:00 2001 From: Scott Lawrence Date: Fri, 23 Jul 2010 16:09:18 -0400 Subject: [PATCH] added golang detection and parsing, based on C --- src/hash/extensions.gperf | 1 + src/hash/languages.gperf | 1 + src/hash/parsers.gperf | 2 + src/languages.h | 1 + src/parsers/golang.rl | 184 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 189 insertions(+) create mode 100644 src/parsers/golang.rl diff --git a/src/hash/extensions.gperf b/src/hash/extensions.gperf index 5be05d8..13bbfc5 100644 --- a/src/hash/extensions.gperf +++ b/src/hash/extensions.gperf @@ -69,6 +69,7 @@ fs, LANG_FSHARP ftn, DISAMBIGUATE("fortran") gif, BINARY glsl, LANG_GLSL +go, LANG_GOLANG groovy, LANG_GROOVY gz, BINARY h, DISAMBIGUATE("h") diff --git a/src/hash/languages.gperf b/src/hash/languages.gperf index 2acd17e..e729689 100644 --- a/src/hash/languages.gperf +++ b/src/hash/languages.gperf @@ -39,6 +39,7 @@ fortranfixed, LANG_FORTRANFIXED, "Fortan (Fixed-format)", 0 fortranfree, LANG_FORTRANFREE, "Fortan (Free-format)", 0 fsharp, LANG_FSHARP, "F#", 0 glsl, LANG_GLSL, "OpenGL Shading Language", 0 +golang, LANG_GOLANG, "Golang", 0 groovy, LANG_GROOVY, "Groovy", 0 haml, LANG_HAML, "Haml", 1 haskell, LANG_HASKELL, "Haskell", 0 diff --git a/src/hash/parsers.gperf b/src/hash/parsers.gperf index 447cd0f..b8aeb96 100644 --- a/src/hash/parsers.gperf +++ b/src/hash/parsers.gperf @@ -30,6 +30,7 @@ #include "../parsers/fortranfree.h" #include "../parsers/fsharp.h" #include "../parsers/glsl.h" +#include "../parsers/golang.h" #include "../parsers/groovy.h" #include "../parsers/haml.h" #include "../parsers/haskell.h" @@ -122,6 +123,7 @@ fortranfixed, parse_fortranfixed fortranfree, parse_fortranfree fsharp, parse_fsharp glsl, parse_glsl +golang, parse_golang groovy, parse_groovy haskell, parse_haskell haml, parse_haml diff --git a/src/languages.h b/src/languages.h index 0390b78..a3e2e83 100644 --- a/src/languages.h +++ b/src/languages.h @@ -42,6 +42,7 @@ #define LANG_FORTRANFREE "fortranfree" #define LANG_FSHARP "fsharp" #define LANG_GLSL "glsl" +#define LANG_GOLANG "golang" #define LANG_GROOVY "groovy" #define LANG_HASKELL "haskell" #define LANG_HAML "haml" diff --git a/src/parsers/golang.rl b/src/parsers/golang.rl new file mode 100644 index 0000000..60ae814 --- /dev/null +++ b/src/parsers/golang.rl @@ -0,0 +1,184 @@ +// c.rl written by Mitchell Foral. mitchellcaladbolgnet. + +/************************* Required for every parser *************************/ +#ifndef OHCOUNT_GOLANG_PARSER_H +#define OHCOUNT_GOLANG_PARSER_H + +#include "../parser_macros.h" + +// the name of the language +const char *GOLANG_LANG = LANG_GOLANG; + +// the languages entities +const char *golang_entities[] = { + "space", "comment", "string", "number", "preproc", + "keyword", "identifier", "operator", "any" +}; + +// constants associated with the entities +enum { + GOLANG_SPACE = 0, GOLANG_COMMENT, GOLANG_STRING, GOLANG_NUMBER, GOLANG_PREPROC, + GOLANG_KEYWORD, GOLANG_IDENTIFIER, GOLANG_OPERATOR, GOLANG_ANY +}; + +/*****************************************************************************/ + +%%{ + machine golang; + write data; + include common "common.rl"; + + # Line counting machine + + action golang_ccallback { + switch(entity) { + case GOLANG_SPACE: + ls + break; + case GOLANG_ANY: + code + break; + case INTERNAL_NL: + std_internal_newline(GOLANG_LANG) + break; + case NEWLINE: + std_newline(GOLANG_LANG) + } + } + + golang_line_comment = + '//' @comment ( + escaped_newline %{ entity = INTERNAL_NL; } %golang_ccallback + | + ws + | + (nonnewline - ws) @comment + )*; + golang_block_comment = + '/*' @comment ( + newline %{ entity = INTERNAL_NL; } %golang_ccallback + | + ws + | + (nonnewline - ws) @comment + )* :>> '*/'; + golang_comment = golang_line_comment | golang_block_comment; + + golang_sq_str = + '\'' @code ( + escaped_newline %{ entity = INTERNAL_NL; } %golang_ccallback + | + ws + | + [^\t '\\] @code + | + '\\' nonnewline @code + )* '\''; + golang_dq_str = + '"' @code ( + escaped_newline %{ entity = INTERNAL_NL; } %golang_ccallback + | + ws + | + [^\t "\\] @code + | + '\\' nonnewline @code + )* '"'; + golang_string = golang_sq_str | golang_dq_str; + + golang_line := |* + spaces ${ entity = GOLANG_SPACE; } => golang_ccallback; + golang_comment; + golang_string; + newline ${ entity = NEWLINE; } => golang_ccallback; + ^space ${ entity = GOLANG_ANY; } => golang_ccallback; + *|; + + # Entity machine + + action golang_ecallback { + callback(GOLANG_LANG, golang_entities[entity], cint(ts), cint(te), userdata); + } + + golang_line_comment_entity = '//' (escaped_newline | nonnewline)*; + golang_block_comment_entity = '/*' any* :>> '*/'; + golang_comment_entity = golang_line_comment_entity | golang_block_comment_entity; + + golang_string_entity = sq_str_with_escapes | dq_str_with_escapes; + + golang_number_entity = float | integer; + + golang_preprogolang_word = + 'define' | 'elif' | 'else' | 'endif' | 'error' | 'if' | 'ifdef' | + 'ifndef' | 'import' | 'include' | 'line' | 'pragma' | 'undef' | + 'using' | 'warning'; + # TODO: find some way of making preproc match the beginning of a line. + # Putting a 'when starts_line' conditional throws an assertion error. + golang_preprogolang_entity = + '#' space* (golang_block_comment_entity space*)? + golang_preprogolang_word (escaped_newline | nonnewline)*; + + golang_identifier_entity = (alpha | '_') (alnum | '_')*; + + golang_keyword_entity = + 'and' | 'and_eq' | 'asm' | 'auto' | 'bitand' | 'bitor' | 'bool' | + 'break' | 'case' | 'catch' | 'char' | 'class' | 'compl' | 'const' | + 'const_cast' | 'continue' | 'default' | 'delete' | 'do' | 'double' | + 'dynamigolang_cast' | 'else' | 'enum' | 'explicit' | 'export' | 'extern' | + 'false' | 'float' | 'for' | 'friend' | 'goto' | 'if' | 'inline' | 'int' | + 'long' | 'mutable' | 'namespace' | 'new' | 'not' | 'not_eq' | + 'operator' | 'or' | 'or_eq' | 'private' | 'protected' | 'public' | + 'register' | 'reinterpret_cast' | 'return' | 'short' | 'signed' | + 'sizeof' | 'static' | 'statigolang_cast' | 'struct' | 'switch' | + 'template' | 'this' | 'throw' | 'true' | 'try' | 'typedef' | 'typeid' | + 'typename' | 'union' | 'unsigned' | 'using' | 'virtual' | 'void' | + 'volatile' | 'wchar_t' | 'while' | 'xor' | 'xor_eq'; + + golang_operator_entity = [+\-/*%<>!=^&|?~:;.,()\[\]{}]; + + golang_entity := |* + space+ ${ entity = GOLANG_SPACE; } => golang_ecallback; + golang_comment_entity ${ entity = GOLANG_COMMENT; } => golang_ecallback; + golang_string_entity ${ entity = GOLANG_STRING; } => golang_ecallback; + golang_number_entity ${ entity = GOLANG_NUMBER; } => golang_ecallback; + golang_preprogolang_entity ${ entity = GOLANG_PREPROC; } => golang_ecallback; + golang_identifier_entity ${ entity = GOLANG_IDENTIFIER; } => golang_ecallback; + golang_keyword_entity ${ entity = GOLANG_KEYWORD; } => golang_ecallback; + golang_operator_entity ${ entity = GOLANG_OPERATOR; } => golang_ecallback; + ^(space | digit) ${ entity = GOLANG_ANY; } => golang_ecallback; + *|; +}%% + +/************************* Required for every parser *************************/ + +/* Parses a string buffer with C/C++ code. + * + * @param *buffer The string to parse. + * @param length The length of the string to parse. + * @param count Integer flag specifying whether or not to count lines. If yes, + * uses the Ragel machine optimized for counting. Otherwise uses the Ragel + * machine optimized for returning entity positions. + * @param *callback Callback function. If count is set, callback is called for + * every line of code, comment, or blank with 'lcode', 'lcomment', and + * 'lblank' respectively. Otherwise callback is called for each entity found. + */ +void parse_golang(char *buffer, int length, int count, + void (*callback) (const char *lang, const char *entity, int s, + int e, void *udata), + void *userdata + ) { + init + + %% write init; + cs = (count) ? golang_en_golang_line : golang_en_golang_entity; + %% write exec; + + // if no newline at EOF; callback contents of last line + if (count) { process_last_line(GOLANG_LANG) } +} + +const char *ORIG_GOLANG_LANG = LANG_GOLANG; + +#endif + +/*****************************************************************************/ -- 2.32.0.93.g670b81a890