From fb1cc0962643aa7dbb2f87c5ed2308ce9e2cfad5 Mon Sep 17 00:00:00 2001 From: Robin Luckey Date: Mon, 9 Apr 2012 13:50:12 -0700 Subject: [PATCH] Fixes recursion bug in disambiguate_in(). The basic strategy of disambiguate_in() is to strip the trailing *.in extension from the filepath, and then to disambiguate the file as if it originally had that name. Thus, given file "foo.in", disambiguate_in() will disambiguate "foo". disambiguate_in() achieves this while re-using the exact same file on disk. This is possible because a SourceFile struct has both a `filepath` (the name we use for disambiguation purposes) and the `diskpath` (the actual name on disk). So disambiguate_in() instantiates a new SourceFile with a stripped filepath, yet the same diskpath and same file contents. The bug is that the code did this incorrectly: when assigning the diskpath of the new SourceFile, it would mistakenly assign it the previous SourceFile's *filepath* instead of the previous SourceFile's diskpath. If disambiguate_in() runs just once (when the file has just a single *.in extension, the usual case), this mistake does not matter because the filepath and diskpath are the same. But if disambiguate_in() recurses on itself (when the file has multiple *.in.in extensions), then during the second pass the filepath and diskpath will not be equal -- they will differ by one missing *.in extension. Thus the diskpath of the new SourceFile will refer to a (probably) non-existent file. The bug is hard to explain but was simple to correct. In addition to correcting the diskpath assignment, I've fixed a memory leak: it was possible to allocate a new SourceFile, and then immediately return NULL, which fails to free the SourceFile. I've moved the allocation *after* the NULL return check to avoid this. --- src/detector.c | 14 +++++++++----- test/detect_files/foo.in.in | 2 ++ test/unit/detector_test.h | 1 + 3 files changed, 12 insertions(+), 5 deletions(-) create mode 100644 test/detect_files/foo.in.in diff --git a/src/detector.c b/src/detector.c index 005f6df..685b29d 100644 --- a/src/detector.c +++ b/src/detector.c @@ -584,15 +584,19 @@ const char *disambiguate_in(SourceFile *sourcefile) { char buf[length]; strncpy(buf, p, length); buf[length] = '\0'; - SourceFile *undecorated = ohcount_sourcefile_new(buf); p = ohcount_sourcefile_get_contents(sourcefile); if (!p) { return NULL; } - // The filepath without the '.in' extension does not exist on disk. The - // sourcefile->diskpath field must be set incase the detector needs to run - // 'file -b' on the file. - ohcount_sourcefile_set_diskpath(undecorated, sourcefile->filepath); + + // A SourceFile's filepath and diskpath need not be the same. + // Here, we'll take advantage of this to set up a new SourceFile + // whose filepath does not have the *.in extension, but whose + // diskpath still points back to the original file on disk (if any). + SourceFile *undecorated = ohcount_sourcefile_new(buf); + if (sourcefile->diskpath) { + ohcount_sourcefile_set_diskpath(undecorated, sourcefile->diskpath); + } ohcount_sourcefile_set_contents(undecorated, p); undecorated->filenames = sourcefile->filenames; language = ohcount_sourcefile_get_language(undecorated); diff --git a/test/detect_files/foo.in.in b/test/detect_files/foo.in.in new file mode 100644 index 0000000..32f8e97 --- /dev/null +++ b/test/detect_files/foo.in.in @@ -0,0 +1,2 @@ +[Foo] +Foo=Bar diff --git a/test/unit/detector_test.h b/test/unit/detector_test.h index 7eda100..3809cf6 100755 --- a/test/unit/detector_test.h +++ b/test/unit/detector_test.h @@ -80,6 +80,7 @@ void test_detector_disambiguate_m() { void test_detector_disambiguate_in() { ASSERT_NODETECT("empty.in"); + ASSERT_NODETECT("foo.in.in"); } void test_detector_disambiguate_pl() { -- 2.32.0.93.g670b81a890