1 // sourcefile.c written by Mitchell Foral. mitchell<att>caladbolg.net.
2 // See COPYING for license information.
11 #include "languages.h"
17 SourceFile *ohcount_sourcefile_new(const char *filepath) {
18 SourceFile *sourcefile = malloc(sizeof(SourceFile));
20 int length = strlen(filepath);
21 sourcefile->filepath = malloc(length + 1);
22 strncpy(sourcefile->filepath, filepath, length);
23 char *p = sourcefile->filepath + length;
26 while (p > sourcefile->filepath && *(p - 1) != '.' &&
27 *(p - 1) != '/' && *(p - 1) != '\\') p--;
30 while (p > sourcefile->filepath &&
31 *(p - 1) != '/' && *(p - 1) != '\\') p--;
32 sourcefile->filename = p;
34 sourcefile->dirpath = (p - 1) - sourcefile->filepath;
35 if (sourcefile->dirpath < 0) sourcefile->dirpath = 0;
37 sourcefile->diskpath = NULL;
39 sourcefile->contents = NULL;
40 sourcefile->size = -1;
42 sourcefile->language = NULL;
43 sourcefile->language_detected = 0;
45 sourcefile->parsed_language_list = NULL;
47 sourcefile->license_list = NULL;
49 sourcefile->loc_list = NULL;
51 sourcefile->filenames = NULL;
56 void ohcount_sourcefile_set_diskpath(SourceFile *sourcefile,
57 const char *diskpath) {
58 if (sourcefile->diskpath)
59 free(sourcefile->diskpath);
60 int size = strlen(diskpath);
61 sourcefile->diskpath = malloc(size + 1);
62 strncpy(sourcefile->diskpath, diskpath, size);
63 sourcefile->diskpath[size] = '\0';
66 void ohcount_sourcefile_set_contents(SourceFile *sourcefile,
67 const char *contents) {
68 if (sourcefile->contents)
69 free(sourcefile->contents);
70 int size = strlen(contents);
71 sourcefile->contents = malloc(size + 1);
72 strncpy(sourcefile->contents, contents, size);
73 sourcefile->contents[size] = '\0';
74 sourcefile->size = size;
77 char *ohcount_sourcefile_get_contents(SourceFile *sourcefile) {
78 if (sourcefile->contents == NULL) {
79 char *path = sourcefile->filepath;
80 if (sourcefile->diskpath)
81 path = sourcefile->diskpath;
82 FILE *f = fopen(path, "r");
84 fseek(f, 0, SEEK_END);
87 sourcefile->contents = malloc(size + 1);
88 fread(sourcefile->contents, 1, size, f);
89 sourcefile->contents[size] = '\0';
90 sourcefile->size = size;
93 sourcefile->contents = NULL;
97 return sourcefile->contents;
100 int ohcount_sourcefile_get_contents_size(SourceFile *sourcefile) {
101 if (sourcefile->size < 0)
102 ohcount_sourcefile_get_contents(sourcefile);
103 return sourcefile->size;
106 void ohcount_sourcefile_set_language(SourceFile *sourcefile,
107 const char *language) {
108 struct LanguageMap *rl =
109 ohcount_hash_language_from_name(language, strlen(language));
111 sourcefile->language = rl->name;
112 sourcefile->language_detected = 1;
116 const char *ohcount_sourcefile_get_language(SourceFile *sourcefile) {
117 if (!sourcefile->language_detected) {
118 sourcefile->language = ohcount_detect_language(sourcefile);
119 sourcefile->language_detected = 1;
121 return sourcefile->language;
125 * Callback function for populating a SourceFile's parsed_language_list field.
126 * This callback is passed to ohcount_parse() for parsing lines of code.
127 * @param language The language associated with the incoming line.
128 * @param entity The type of line. ("lcode", "lcomment", or "lblank").
129 * @param start The start position of the entity relative to the start of the
131 * @param end The end position of the entity relative to the start of the buffer
133 * @param userdata Pointer to the SourceFile being parsed.
134 * @see ohcount_sourcefile_parse.
136 void parser_callback(const char *language, const char *entity, int start,
137 int end, void *userdata) {
138 SourceFile *sf = (SourceFile *)userdata;
139 char *buffer = sf->contents; // field is guaranteed to exist
140 int buffer_size = sf->size; // field is guaranteed to exist
141 char *p = buffer + start, *pe = buffer + end;
143 ParsedLanguageList *list = sf->parsed_language_list;
144 ParsedLanguage *lang;
145 if (list->head == NULL) {
149 list->pl = ohcount_parsed_language_new(language, buffer_size);
153 // Has this language been detected before?
154 ParsedLanguageList *iter = list->head;
156 if (strcmp(iter->pl->name, language) == 0)
161 // This language has not been detected before. Create a new entry and add
163 iter = ohcount_parsed_language_list_new();
164 iter->pl = ohcount_parsed_language_new(language, buffer_size);
165 list->tail->next = iter;
171 if (strcmp(entity, "lcode") == 0) {
172 while (*p == ' ' || *p == '\t') p++;
173 ohcount_parsed_language_add_code(lang, p, pe - p);
174 } else if (strcmp(entity, "lcomment") == 0) {
175 while (*p == ' ' || *p == '\t') p++;
176 ohcount_parsed_language_add_comment(lang, p, pe - p);
177 } else if (strcmp(entity, "lblank") == 0) {
178 lang->blanks_count++;
182 void ohcount_sourcefile_parse(SourceFile *sourcefile) {
183 if (sourcefile->parsed_language_list == NULL) {
184 sourcefile->parsed_language_list = ohcount_parsed_language_list_new();
185 ohcount_parse(sourcefile, 1, parser_callback, sourcefile);
187 // Since the SourceFile contents are not 'free'd until the SourceFile itself
188 // is, continually parsing SourceFiles in a SourceFileList will cause an
189 // undesirable build-up of memory until the SourceFileList is 'free'd.
190 // While it is expensive to re-read the contents from the disk, it is
191 // unlikely they will need to be accessed again after parsing.
192 free(sourcefile->contents); // field is guaranteed to exist
193 sourcefile->contents = NULL;
197 ParsedLanguageList *ohcount_sourcefile_get_parsed_language_list(SourceFile
199 ohcount_sourcefile_parse(sourcefile);
200 return sourcefile->parsed_language_list;
203 void ohcount_sourcefile_parse_with_callback(SourceFile *sourcefile,
204 void (*callback)(const char *,
208 ohcount_parse(sourcefile, 1, callback, userdata);
211 void ohcount_sourcefile_parse_entities_with_callback(SourceFile *sourcefile,
217 ohcount_parse(sourcefile, 0, callback, userdata);
220 LicenseList *ohcount_sourcefile_get_license_list(SourceFile *sourcefile) {
221 if (sourcefile->license_list == NULL)
222 sourcefile->license_list = ohcount_detect_license(sourcefile);
223 return sourcefile->license_list;
226 LocList *ohcount_sourcefile_get_loc_list(SourceFile *sourcefile) {
227 if (sourcefile->loc_list == NULL) {
228 LocList *list = ohcount_loc_list_new();
229 ohcount_sourcefile_parse(sourcefile);
230 ParsedLanguageList *iter;
231 iter = ohcount_sourcefile_get_parsed_language_list(sourcefile)->head;
233 Loc *loc = ohcount_loc_new(iter->pl->name, iter->pl->code_count,
234 iter->pl->comments_count,
235 iter->pl->blanks_count, 1);
236 ohcount_loc_list_add_loc(list, loc);
237 ohcount_loc_free(loc);
240 sourcefile->loc_list = list;
242 return sourcefile->loc_list;
245 LocDeltaList *ohcount_sourcefile_diff(SourceFile *from, SourceFile *to) {
246 LocDeltaList *list = ohcount_loc_delta_list_new();
248 ParsedLanguageList *iter;
249 iter = ohcount_sourcefile_get_parsed_language_list(from)->head;
251 LocDelta *delta = ohcount_sourcefile_calc_loc_delta(from,
254 ohcount_loc_delta_list_add_loc_delta(list, delta);
255 ohcount_loc_delta_free(delta);
258 iter = ohcount_sourcefile_get_parsed_language_list(to)->head;
260 if (!ohcount_loc_delta_list_get_loc_delta(list, iter->pl->name)) {
261 LocDelta *delta = ohcount_sourcefile_calc_loc_delta(from,
264 ohcount_loc_delta_list_add_loc_delta(list, delta);
265 ohcount_loc_delta_free(delta);
273 LocDelta *ohcount_sourcefile_calc_loc_delta(SourceFile *from,
274 const char *language,
276 LocDelta *delta = ohcount_loc_delta_new(language, 0, 0, 0, 0, 0, 0);
278 char *from_code = "", *to_code = "";
279 char *from_comments = "", *to_comments = "";
280 int from_blanks_count = 0, to_blanks_count = 0;
282 ParsedLanguageList *iter;
283 iter = ohcount_sourcefile_get_parsed_language_list(from)->head;
285 if (strcmp(language, iter->pl->name) == 0) {
286 from_code = iter->pl->code;
287 from_comments = iter->pl->comments;
288 from_blanks_count = iter->pl->blanks_count;
293 iter = ohcount_sourcefile_get_parsed_language_list(to)->head;
295 if (strcmp(language, iter->pl->name) == 0) {
296 to_code = iter->pl->code;
297 to_comments = iter->pl->comments;
298 to_blanks_count = iter->pl->blanks_count;
304 ohcount_calc_diff(from_code, to_code, &delta->code_added,
305 &delta->code_removed);
306 ohcount_calc_diff(from_comments, to_comments, &delta->comments_added,
307 &delta->comments_removed);
308 if (from_blanks_count > to_blanks_count)
309 delta->blanks_removed = from_blanks_count - to_blanks_count;
311 delta->blanks_added = to_blanks_count - from_blanks_count;
316 void ohcount_sourcefile_set_filenames(SourceFile *sourcefile,
318 if (sourcefile->filenames) {
320 while (sourcefile->filenames[i])
321 free(sourcefile->filenames[i++]);
322 free(sourcefile->filenames);
325 if (filenames != NULL) {
327 while (filenames[length] != NULL) length++;
328 char **fnames = calloc(length + 1, sizeof(char *));
331 for (i = 0; i < length; i++) {
332 int len = strlen(filenames[i]);
333 char *fname = malloc(len + 1);
334 strncpy(fname, filenames[i], len);
338 sourcefile->filenames = fnames;
339 } else sourcefile->filenames = NULL;
342 char **ohcount_sourcefile_get_filenames(SourceFile *sourcefile) {
343 if (sourcefile->filenames == NULL) {
344 char dirpath[FILENAME_MAX];
345 strncpy(dirpath, sourcefile->filepath, sourcefile->dirpath);
346 dirpath[sourcefile->dirpath] = '\0';
348 DIR *d = opendir((const char *)dirpath);
351 while ((file = readdir(d))) length++;
354 char **filenames = calloc(length + 1, sizeof(char *));
356 d = opendir((const char *)dirpath);
357 while ((file = readdir(d))) {
358 int len = strlen(file->d_name);
359 char *filename = malloc(len + 1);
360 strncpy(filename, file->d_name, len);
361 filename[len] = '\0';
362 filenames[i++] = filename;
365 sourcefile->filenames = filenames;
368 return sourcefile->filenames;
371 void ohcount_sourcefile_free(SourceFile *sourcefile) {
372 free(sourcefile->filepath);
373 if (sourcefile->diskpath)
374 free(sourcefile->diskpath);
375 if (sourcefile->contents)
376 free(sourcefile->contents);
377 if (sourcefile->parsed_language_list)
378 ohcount_parsed_language_list_free(sourcefile->parsed_language_list);
379 if (sourcefile->license_list)
380 ohcount_license_list_free(sourcefile->license_list);
381 if (sourcefile->loc_list)
382 ohcount_loc_list_free(sourcefile->loc_list);
383 if (sourcefile->filenames) {
385 while (sourcefile->filenames[i])
386 free(sourcefile->filenames[i++]);
387 free(sourcefile->filenames);
394 SourceFileList *ohcount_sourcefile_list_new() {
395 SourceFileList *list = malloc(sizeof(SourceFileList));
403 void ohcount_sourcefile_list_add_file(SourceFileList *list,
404 const char *filepath) {
405 if (list->head == NULL) { // empty list
408 list->head->sf = ohcount_sourcefile_new(filepath);
409 list->head->next = NULL;
411 SourceFileList *item = ohcount_sourcefile_list_new();
412 item->sf = ohcount_sourcefile_new(filepath);
413 list->tail->next = item;
418 void ohcount_sourcefile_list_add_directory(SourceFileList *list,
419 const char *directory) {
420 char filepath[FILENAME_MAX];
421 strncpy(filepath, directory, strlen(directory));
422 *(filepath + strlen(directory)) = '/';
423 char *f_p = filepath + strlen(directory) + 1;
426 DIR *d = opendir(directory);
428 while ((file = readdir(d))) {
429 int length = strlen(file->d_name);
430 strncpy(f_p, (const char *)file->d_name, length);
431 *(f_p + length) = '\0';
433 if (file->d_type == DT_DIR && *file->d_name != '.') // no hidden dirs
434 ohcount_sourcefile_list_add_directory(list, filepath);
435 else if (file->d_type == DT_REG)
436 ohcount_sourcefile_list_add_file(list, filepath);
439 ohcount_sourcefile_list_add_file(list, directory);
443 LocList *ohcount_sourcefile_list_analyze_languages(SourceFileList *list) {
444 LocList *loc_list = ohcount_loc_list_new();
445 SourceFileList *iter = list->head;
447 ohcount_loc_list_add_loc_list(loc_list,
448 ohcount_sourcefile_get_loc_list(iter->sf));
454 void ohcount_sourcefile_list_free(SourceFileList *list) {
456 SourceFileList *iter = list->head;
458 SourceFileList *next = iter->next;
459 ohcount_sourcefile_free(iter->sf);