Initial Revision
[ohcount] / ext / ohcount_native / compiled_state.c
1 /*
2  *  compiled_state.c
3  *  Ohcount
4  *
5  *  Created by Jason Allen on 6/23/06.
6  *  Copyright 2006 Ohloh. All rights reserved.
7  *
8  */
9
10 #include "common.h"
11
12
13 Transition *compiled_state_get_transition(CompiledState *compiled_state, int nth_transition) {
14         return compiled_state->transition_map[nth_transition];
15 }
16
17 /*
18  * compiled_state_initialize
19  *
20  * a 'compiled_state' helps the parser walk the DFA faster by precomputing the regular expression
21  * for each state. Basically any transition __from__ a state is added to the regex term. We also
22  * ensure the "\n" transition (to force end-of-line processing).
23  *
24  */
25 void compiled_state_initialize(CompiledState *compiled_state, State *state, Transition *transitions[]) {
26         compiled_state->state = state;
27
28         Transition **pp_t_cur = transitions;
29         Transition **pp_t_map = compiled_state->transition_map;
30         char *regex_cur = compiled_state->regex;
31         bool seen_newline = false;
32
33         for (; (*pp_t_cur) != NULL; pp_t_cur++) {
34                 Transition *t_cur = *pp_t_cur;
35                 if (t_cur->from_state == state) {
36
37                         // hey! this is us! add this transition to our regex
38                         *regex_cur++ = '(';
39                         if (!seen_newline) {
40                                 seen_newline = *t_cur->regex == '\n';
41                         }
42                         char *regex_src = t_cur->regex;
43                         while ((*regex_cur++ = *regex_src++)) {}
44                         regex_cur--; // dont want the nil!
45                         *regex_cur++ = ')';
46                         *regex_cur++ = '|';
47
48                         // add this transition to our map
49                         (*pp_t_map) = t_cur;
50                         pp_t_map++;
51                 }
52         }
53
54         // add newline (only if it's not already there...)
55         if (!seen_newline) {
56                 *regex_cur++ = '(';
57                 *regex_cur++ = '\n';
58                 *regex_cur++ = ')';
59                 *regex_cur++ = '|'; // just to stay similar to loop pattern
60                 (*pp_t_map) = NULL; // make sure we are pointing to a NULL transition
61         }
62
63         // terminate the string
64         *--regex_cur = 0;
65
66         // some sanity checks
67 #ifndef NDEBUG
68         int transition_count = pp_t_map - compiled_state->transition_map;
69         if (transition_count >= MAX_TRANSITIONS) {
70                 log("[ohcount] - ASSERT FAILED: transition_count > MAX (%d)\n", transition_count);
71         }
72 #endif
73
74         const char *error;
75         int erroffset;
76         compiled_state->pcre = pcre_compile(compiled_state->regex,          /* the pattern */
77                                                                                                                                                         0,                              /* default options */
78                                                                                                                                                         &error,                         /* for error message */
79                                                                                                                                                         &erroffset,                     /* for error offset */
80                                                                                                                                                         NULL);                          /* use default character tables */
81
82         const char *errptr;
83         /* since we're likely going to reuse this often, go ahead and study it */
84         /* we dont care about errors - it will return NULL which is fine */
85         compiled_state->pcre_extra = pcre_study(compiled_state->pcre,  /* result of pcre_compile() */
86                                                                                                                                                                         0,                     /* no options exist */
87                                                                                                                                                                         &errptr);                 /* set to NULL or points to a message */
88 }
89