OTWO-1213 Works around lost encoding in Ruby/C binding layer
[ohcount] / src / licenses.c
1 // licenses.c written by Mitchell Foral. mitchell<att>caladbolg.net.
2 // See COPYING for license information.
3
4 #include <ctype.h>
5 #include <string.h>
6 #include <stdio.h>
7
8 #include "licenses.h"
9 #include "parser.h"
10
11 // Holds licenses and their associated details and patterns.
12 License license_map[] = {
13   {
14     LIC_ACADEMIC,
15     "http://www.opensource.org/licenses/afl-3.0.php",
16     "Academic Free License",
17     "\\bAcademic\\s*Free\\s*License\\b",
18     PCRE_CASELESS,
19     NULL,
20     0,
21     NULL, NULL
22   },
23   {
24     LIC_ADAPTIVE,
25     "http://www.opensource.org/licenses/apl1.0.php",
26     "Adaptive Public License",
27     "\\bAdaptive\\s*Public\\s*License\\b",
28     PCRE_CASELESS,
29     NULL,
30     0,
31     NULL, NULL
32   },
33   {
34     LIC_AFFERO,
35     "http://www.affero.org/oagpl.html",
36     "GNU Affero General Public License",
37     "\\bGNU\\s+Affero\\s+General\\s+Public\\s+License\\b",
38     PCRE_CASELESS,
39     NULL,
40     0,
41     NULL, NULL
42   },
43   {
44     LIC_APACHE,
45     "http://www.opensource.org/licenses/apachepl.php",
46     "Apache Software License",
47     "(\\bApache\\s*Software\\s*License(?![\\s,]*2))|(\\bapache\\s*license(?![\\s,]*2))",
48     PCRE_CASELESS,
49     NULL,
50     0,
51     NULL, NULL
52   },
53   {
54     LIC_APACHE2,
55     "http://www.opensource.org/licenses/apache2.0.php",
56     "Apache License, 2.0",
57     "\\bapache\\s+(software\\s+)?license,?\\s+(version\\s+)?2",
58     PCRE_CASELESS,
59     NULL,
60     0,
61     NULL, NULL
62   },
63   {
64     LIC_APPLE_OPEN_SOURCE,
65     "http://www.opensource.org/licenses/apsl-2.0.php",
66     "Apple Public Source License",
67     "\\bApple\\s*Public\\s*Source\\s*License\\b",
68     PCRE_CASELESS,
69     NULL,
70     0,
71     NULL, NULL
72   },
73   {
74     LIC_ARTISTIC,
75     "http://www.opensource.org/licenses/artistic-license.php",
76     "Artistic license",
77     "\\bartistic\\s*license\\b",
78     PCRE_CASELESS,
79     NULL,
80     0,
81     NULL, NULL
82   },
83   {
84     LIC_ATTRIBUTION_ASSURANCE,
85     "http://www.opensource.org/licenses/attribution.php",
86     "Attribution Assurance Licenses",
87     "\\battribution\\s*assurance\\s*license(s)?\\b",
88     PCRE_CASELESS,
89     NULL,
90     0,
91     NULL, NULL
92   },
93   {
94     LIC_BOOST,
95     "http://www.boost.org/LICENSE_1_0.txt",
96     "Boost Software License - Version 1.0 - August 17th, 2003",
97     "\\bboost\\s*software\\s*license\\b",
98     PCRE_CASELESS,
99     NULL,
100     0,
101     NULL, NULL
102   },
103   {
104     LIC_BSD,
105     "http://www.opensource.org/licenses/bsd-license.php",
106     "New BSD license",
107     "(\\bbsd\\s*license\\b)|(The Regents of the University of California)",
108     PCRE_CASELESS,
109     NULL,
110     0,
111     NULL, NULL
112   },
113   {
114     LIC_CECILL,
115     "http://www.cecill.info/licences/Licence_CeCILL_V2-en.html",
116     "CeCILL license",
117     "\\bcecill\\b",
118     PCRE_CASELESS,
119     NULL,
120     0,
121     NULL, NULL
122   },
123   {
124     LIC_CECILL_B,
125     "http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html",
126     "CeCILL-B license",
127     "\\bcecill-b\\b",
128     PCRE_CASELESS,
129     NULL,
130     0,
131     NULL, NULL
132   },
133   {
134     LIC_CECILL_C,
135     "http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html",
136     "CeCILL-C license",
137     "\\bcecill-c\\b",
138     PCRE_CASELESS,
139     NULL,
140     0,
141     NULL, NULL
142   },
143   {
144     LIC_COMPUTER_ASSOCIATES_TRUSTED,
145     "http://www.opensource.org/licenses/ca-tosl1.1.php",
146     "Computer Associates Trusted Open Source License 1.1",
147     "\\bcomputer\\s*associates\\s*trusted\\s*open\\s*source\\s*license\\b",
148     PCRE_CASELESS,
149     NULL,
150     0,
151     NULL, NULL
152   },
153   {
154     LIC_COMMON_DEVELOPMENT_AND_DISTRIBUTION,
155     "http://www.opensource.org/licenses/cddl1.php",
156     "Common Development and Distribution License",
157     "\\bcommon\\s*development\\s*and\\s*distribution\\s*license\\b",
158     PCRE_CASELESS,
159     NULL,
160     0,
161     NULL, NULL
162   },
163   {
164     LIC_COMMON_PUBLIC,
165     "http://www.opensource.org/licenses/cpl1.0.php",
166     "Common Public License 1.0",
167     "\\bcommon\\s*public\\s*license\\b",
168     PCRE_CASELESS,
169     NULL,
170     0,
171     NULL, NULL
172   },
173   {
174     LIC_CUA_OFFICE,
175     "http://www.opensource.org/licenses/cuaoffice.php",
176     "CUA Office Public License Version 1.0",
177     "\\bCUA\\s*office\\s*public\\s*license\\b",
178     PCRE_CASELESS,
179     NULL,
180     0,
181     NULL, NULL
182   },
183   {
184     LIC_EU_DATAGRID,
185     "http://www.opensource.org/licenses/eudatagrid.php",
186     "EU DataGrid Software License",
187     "\\beu\\s*datagrid\\s*software\\s*license\\b",
188     PCRE_CASELESS,
189     NULL,
190     0,
191     NULL, NULL
192   },
193   {
194     LIC_ECLIPSE,
195     "http://www.opensource.org/licenses/eclipse-1.0.php",
196     "Eclipse Public License",
197     "\\beclipse\\s*public\\s*license\\b",
198     PCRE_CASELESS,
199     NULL,
200     0,
201     NULL, NULL
202   },
203   {
204     LIC_EDUCATIONAL,
205     "http://www.opensource.org/licenses/ecl1.php",
206     "Educational Community License",
207     "\\beducational\\s*community\\s*license\\b",
208     PCRE_CASELESS,
209     NULL,
210     0,
211     NULL, NULL
212   },
213   {
214     LIC_EIFFEL,
215     "http://www.opensource.org/licenses/eiffel.php",
216     "Eiffel Forum License",
217     "\\beiffel\\s*forum\\s*license(?![,V\\s]*2)\\b",
218     PCRE_CASELESS,
219     NULL,
220     0,
221     NULL, NULL
222   },
223   {
224     LIC_EIFFEL2,
225     "http://www.opensource.org/licenses/ver2_eiffel.php",
226     "Eiffel Forum License V2.0",
227     "\\beiffel\\s*forum\\s*license [,V\\s]*2",
228     PCRE_CASELESS,
229     NULL,
230     0,
231     NULL, NULL
232   },
233   {
234     LIC_ENTESSA,
235     "http://www.opensource.org/licenses/entessa.php",
236     "Entessa Public License",
237     "\\bentessa\\s*public\\s*license\\b",
238     PCRE_CASELESS,
239     NULL,
240     0,
241     NULL, NULL
242   },
243   {
244     LIC_FAIR,
245     "http://www.opensource.org/licenses/fair.php",
246     "Fair License",
247     "\\bfair\\s*license\\b",
248     PCRE_CASELESS,
249     NULL,
250     0,
251     NULL, NULL
252   },
253   {
254     LIC_FRAMEWORX,
255     "http://www.opensource.org/licenses/frameworx.php",
256     "Frameworx License",
257     "\\bframeworx\\s*license\\b",
258     PCRE_CASELESS,
259     NULL,
260     0,
261     NULL, NULL
262   },
263   {
264     LIC_GPL3_OR_LATER,
265     "http://www.gnu.org/licenses/gpl-3.0.html",
266     "GNU General Public License 3.0",
267     "\\b(GNU GENERAL PUBLIC LICENSE|GPL).{0,100}(Version)? 3.{0,50}later",
268     PCRE_CASELESS | PCRE_MULTILINE,
269     NULL,
270     0,
271     NULL, NULL
272   },
273   {
274     LIC_GPL3,
275     "http://www.gnu.org/licenses/gpl-3.0.html",
276     "GNU General Public License 3.0",
277     "GNU (GENERAL PUBLIC LICENSE|GPL).{0,100}(Version |v)3",
278     PCRE_CASELESS | PCRE_MULTILINE,
279     "((at your option) any later version)|(GENERAL PUBLIC LICENSE.*GENERAL PUBLIC LICENSE)",
280     PCRE_CASELESS,
281     NULL, NULL
282   },
283   {
284     LIC_LGPL3,
285     "http://www.gnu.org/licenses/lgpl-3.0.html",
286     "GNU Lesser General Public License 3.0",
287     "((\\blgpl\\b)|(\\bgnu\\s*(library|lesser)\\s*(general\\s*)?(public\\s*)?license\\b)|(\\b(lesser|library)\\s*gpl\\b)).{0,10}(\\bas published by the free software foundation\\b)?.{0,10}(\\bversion\\b)?.{0,10}\\b3(\\.0)?\\b",
288     PCRE_CASELESS,
289     NULL,
290     0,
291     NULL, NULL
292   },
293   {
294     LIC_GPL,
295     "http://www.opensource.org/licenses/gpl-license.php",
296     "GNU General Public License (GPL)",
297     "(\\bgpl\\b)|(\\bgplv2\\b)|(\\bgnu\\s*general\\s*public\\s*license\\b)|(\\bwww\\.gnu\\.org\\/licenses\\/gpl\\.txt\\b)",
298     PCRE_CASELESS,
299     NULL,
300     0,
301     NULL, NULL
302   },
303   {
304     LIC_LGPL,
305     "http://www.opensource.org/licenses/lgpl-license.php",
306     "GNU Library or \"Lesser\" GPL (LGPL)",
307     "(\\blgpl\\b)|(\\bgnu\\s*(library|lesser)\\s*(general\\s*)?(public\\s*)?license\\b)|(\\b(lesser|library)\\s*gpl\\b)",
308     PCRE_CASELESS,
309     NULL,
310     0,
311     NULL, NULL
312   },
313   {
314     LIC_HISTORICAL,
315     "http://www.opensource.org/licenses/historical.php",
316     "Historical Permission Notice and Disclaimer",
317     "\\bhistorical\\s*permission\\s*notice\\s*and\\s*disclaimer\\b",
318     PCRE_CASELESS,
319     NULL,
320     0,
321     NULL, NULL
322   },
323   {
324     LIC_I9,
325     "http://i9os.googlecode.com/svn/trunk/Documentation/Licenses/i9_License",
326     "i9 License",
327     "\\bi9\\s*\\s*license\\b",
328     PCRE_CASELESS,
329     NULL,
330     0,
331     NULL, NULL
332   },
333   {
334     LIC_IBM_PUBLIC,
335     "http://www.opensource.org/licenses/ibmpl.php",
336     "IBM Public License",
337     "\\bibm\\s*public\\s*license\\b",
338     PCRE_CASELESS,
339     NULL,
340     0,
341     NULL, NULL
342   },
343   {
344     LIC_INTEL_OPEN_SOURCE,
345     "http://www.opensource.org/licenses/intel-open-source-license.php",
346     "Intel Open Source License",
347     "\\bintel\\s*open\\s*source\\s*license\\b",
348     PCRE_CASELESS,
349     NULL,
350     0,
351     NULL, NULL
352   },
353   {
354     LIC_JABBER_OPEN_SOURCE,
355     "http://www.opensource.org/licenses/jabberpl.php",
356     "Jabber Open Source License",
357     "\\bjabber\\s*open\\s*source\\s*license\\b",
358     PCRE_CASELESS,
359     NULL,
360     0,
361     NULL, NULL
362   },
363   {
364     LIC_LUCENT_PLAN9,
365     "http://www.opensource.org/licenses/plan9.php",
366     "Lucent Public License (Plan9)",
367     "\\blucent\\s*public\\s*license[\\s(]*plan9",
368     PCRE_CASELESS,
369     NULL,
370     0,
371     NULL, NULL
372   },
373   {
374     LIC_LUCENT_PUBLIC,
375     "http://www.opensource.org/licenses/lucent1.02.php",
376     "Lucent Public License Version 1.02",
377     "\\blucent\\s*public\\s*license\\s*(version)?\\s+1",
378     PCRE_CASELESS,
379     NULL,
380     0,
381     NULL, NULL
382   },
383   {
384     LIC_MIT,
385     "http://www.opensource.org/licenses/mit-license.php",
386     "MIT license",
387     "(\\bmit\\s*license\\b)|(\\bMIT\\/X11\\s*licensed?\\b)",
388     PCRE_CASELESS,
389     NULL,
390     0,
391     NULL, NULL
392   },
393   {
394     LIC_MITRE,
395     "http://www.opensource.org/licenses/mitrepl.php",
396     "MITRE Collaborative Virtual Workspace License (CVW License)",
397     "\\bmitre\\s*collaborative\\s*virtual\\s*workspace\\s*license\\b",
398     PCRE_CASELESS,
399     NULL,
400     0,
401     NULL, NULL
402   },
403   {
404     LIC_MOTOSOTO,
405     "http://www.opensource.org/licenses/motosoto.php",
406     "Motosoto License",
407     "\\bmotosoto\\s*license\\b",
408     PCRE_CASELESS,
409     NULL,
410     0,
411     NULL, NULL
412   },
413   {
414     LIC_MOZILLA_PUBLIC1,
415     "http://www.opensource.org/licenses/mozilla1.0.php",
416     "Mozilla Public License 1.0 (MPL)",
417     "\\bmozilla\\s*public\\s*license\\b",
418     PCRE_CASELESS,
419     NULL,
420     0,
421     NULL, NULL
422   },
423   {
424     LIC_MOZILLA_PUBLIC11,
425     "http://www.opensource.org/licenses/mozilla1.1.php",
426     "Mozilla Public License 1.1 (MPL)",
427     "\\bmozilla\\s*public\\s*license 1\\.1\\b",
428     PCRE_CASELESS,
429     NULL,
430     0,
431     NULL, NULL
432   },
433   {
434     LIC_NASA_OPEN,
435     "http://www.opensource.org/licenses/nasa1.3.php",
436     "NASA Open Source Agreement 1.3",
437     "\\bnasa\\s*open\\s*source\\s*agreement\\b",
438     PCRE_CASELESS,
439     NULL,
440     0,
441     NULL, NULL
442   },
443   {
444     LIC_NAUMEN,
445     "http://www.opensource.org/licenses/naumen.php",
446     "Naumen Public License",
447     "\\bnaumen\\s*public\\s*license\\b",
448     PCRE_CASELESS,
449     NULL,
450     0,
451     NULL, NULL
452   },
453   {
454     LIC_NETHACK,
455     "http://www.opensource.org/licenses/nethack.php",
456     "Nethack General Public License",
457     "\\bnethack\\s*general\\s*public\\s*license\\b",
458     PCRE_CASELESS,
459     NULL,
460     0,
461     NULL, NULL
462   },
463   {
464     LIC_NOKIA_OPEN_SOURCE,
465     "http://www.opensource.org/licenses/nokia.php",
466     "Nokia Open Source License",
467     "\\bnokia\\s*open\\s*source\\s*license\\b",
468     PCRE_CASELESS,
469     NULL,
470     0,
471     NULL, NULL
472   },
473   {
474     LIC_OCLC_RESEARCH,
475     "http://www.opensource.org/licenses/oclc2.php",
476     "OCLC Research Public License 2.0",
477     "\\boclc\\s*research\\s*public\\s*license\\b",
478     PCRE_CASELESS,
479     NULL,
480     0,
481     NULL, NULL
482   },
483   {
484     LIC_OPEN_GROUP_TEST,
485     "http://www.opensource.org/licenses/opengroup.php",
486     "Open Group Test Suite License",
487     "\\bopen\\s*group\\s*test\\s*suite\\s*license\\b",
488     PCRE_CASELESS,
489     NULL,
490     0,
491     NULL, NULL
492   },
493   {
494     LIC_OPEN_SOFTWARE,
495     "http://www.opensource.org/licenses/osl-3.0.php",
496     "Open Software License",
497     "\\bopen\\s*software\\s*license\\b",
498     PCRE_CASELESS,
499     NULL,
500     0,
501     NULL, NULL
502   },
503   {
504     LIC_PHP_LICENSE,
505     "http://www.opensource.org/licenses/php.php",
506     "PHP License",
507     "\\bphp\\s*license\\b",
508     PCRE_CASELESS,
509     NULL,
510     0,
511     NULL, NULL
512   },
513   {
514     LIC_PYTHON_LICENSE,
515     "http://www.opensource.org/licenses/pythonpl.php",
516     "Python license",
517     "\\bpython\\s*license\\b",
518     PCRE_CASELESS,
519     NULL,
520     0,
521     NULL, NULL
522   },
523   {
524     LIC_PYTHON_SOFTWARE_FOUNDATION,
525     "http://www.opensource.org/licenses/PythonSoftFoundation.php",
526     "Python Software Foundation License",
527     "\\bpython\\s*software\\s*foundation\\s*license\\b",
528     PCRE_CASELESS,
529     NULL,
530     0,
531     NULL, NULL
532   },
533   {
534     LIC_QT_PUBLIC,
535     "http://www.opensource.org/licenses/qtpl.php",
536     "Qt Public License (QPL)",
537     "\\bqt\\s*public\\s*license\\b",
538     PCRE_CASELESS,
539     NULL,
540     0,
541     NULL, NULL
542   },
543   {
544     LIC_REALNETWORKS_PUBLIC_SOURCE,
545     "http://www.opensource.org/licenses/real.php",
546     "RealNetworks Public Source License V1.0",
547     "\\brealnetworks\\s*public\\s*source\\s*license\\b",
548     PCRE_CASELESS,
549     NULL,
550     0,
551     NULL, NULL
552   },
553   {
554     LIC_RECIPROCAL_PUBLIC,
555     "http://www.opensource.org/licenses/rpl.php",
556     "Reciprocal Public License",
557     "\\breciprocal\\s*public\\s*license\\b",
558     PCRE_CASELESS,
559     NULL,
560     0,
561     NULL, NULL
562   },
563   {
564     LIC_RICOH_SOURCE,
565     "http://www.opensource.org/licenses/ricohpl.php",
566     "Ricoh Source Code Public License",
567     "\\bricoh\\s*source\\s*code\\s*public\\s*license\\b",
568     PCRE_CASELESS,
569     NULL,
570     0,
571     NULL, NULL
572   },
573   {
574     LIC_SLEEPYCAT,
575     "http://www.opensource.org/licenses/sleepycat.php",
576     "Sleepycat License",
577     "\\bsleepycat\\s*license\\b",
578     PCRE_CASELESS,
579     NULL,
580     0,
581     NULL, NULL
582   },
583   {
584     LIC_SUGARCRM113,
585     "http://www.sugarcrm.com/SPL",
586     "SugarCRM Public License 1.1.3",
587     "\\bsugar\\s*public\\s*license\\s*version\\s*1\\.1\\.3\\b",
588     PCRE_CASELESS,
589     NULL,
590     0,
591     NULL, NULL
592   },
593   {
594     LIC_SUN_INDUSTRY_STANDARDS,
595     "http://www.opensource.org/licenses/sisslpl.php",
596     "Sun Industry Standards Source License (SISSL)",
597     "\\bsun\\s*industry\\s*standards\\s*source\\s*license\\b",
598     PCRE_CASELESS,
599     NULL,
600     0,
601     NULL, NULL
602   },
603   {
604     LIC_SUN_PUBLIC,
605     "http://www.opensource.org/licenses/sunpublic.php",
606     "Sun Public License",
607     "\\bsun\\s*public\\s*license\\b",
608     PCRE_CASELESS,
609     NULL,
610     0,
611     NULL, NULL
612   },
613   {
614     LIC_SYBASE_OPEN_WATCOM,
615     "http://www.opensource.org/licenses/sybase.php",
616     "Sybase Open Watcom Public License 1.0",
617     "\\bsybase\\s*open\\s*watcom\\s*public\\s*license\\b",
618     PCRE_CASELESS,
619     NULL,
620     0,
621     NULL, NULL
622   },
623   {
624     LIC_U_OF_I_NCSA,
625     "http://www.opensource.org/licenses/UoI-NCSA.php",
626     "University of Illinois/NCSA Open Source License",
627     "\\buniversity\\s*of\\s*illinois\\/ncsa\\s*open\\s*source\\s*license\\b",
628     PCRE_CASELESS,
629     NULL,
630     0,
631     NULL, NULL
632   },
633   {
634     LIC_VOVIDA_SOFTWARE,
635     "http://www.opensource.org/licenses/vovidapl.php",
636     "Vovida Software License v. 1.0",
637     "\\bvovida\\s*software\\s*license\\b",
638     PCRE_CASELESS,
639     NULL,
640     0,
641     NULL, NULL
642   },
643   {
644     LIC_W3C,
645     "http://www.opensource.org/licenses/W3C.php",
646     "W3C License",
647     "\\bw3c\\s*license\\b",
648     PCRE_CASELESS,
649     NULL,
650     0,
651     NULL, NULL
652   },
653   {
654     LIC_WXWINDOWS,
655     "http://www.opensource.org/licenses/wxwindows.php",
656     "wxWindows Library License",
657     "\\bwxwindows\\s*library\\s*license\\b",
658     PCRE_CASELESS,
659     NULL,
660     0,
661     NULL, NULL
662   },
663   {
664     LIC_XNET,
665     "http://www.opensource.org/licenses/xnet.php",
666     "X.Net License",
667     "\\bx\\.net\\s*license\\b",
668     PCRE_CASELESS,
669     NULL,
670     0,
671     NULL, NULL
672   },
673   {
674     LIC_ZOPE,
675     "http://www.opensource.org/licenses/zpl.php",
676     "Zope Public License",
677     "\\bzope\\s*public\\s*license\\b",
678     PCRE_CASELESS,
679     NULL,
680     0,
681     NULL, NULL
682   },
683   {
684     LIC_ZLIB_LIBPNG,
685     "http://www.opensource.org/licenses/zlib-license.php",
686     "zlib/libpng license",
687     "\\bzlib\\/libpng\\s*license\\b",
688     PCRE_CASELESS,
689     NULL,
690     0,
691     NULL, NULL
692   },
693   {
694     LIC_APACHE_ISH,
695     "",
696     "Apache-ish License",
697     "(\\bapache-style.*license\\b)|(\\bapache-like.*license\\b)",
698     PCRE_CASELESS,
699     NULL,
700     0,
701     NULL, NULL
702   },
703   {
704     LIC_BSD_ISH,
705     "",
706     "BSD-ish License",
707     "Copyright\\s.{1,40}All rights reserved.{0,40}Redistribution and use in source and binary forms, with or without.{0,20}modification, are permitted provided that the following conditions.{0,20}\\sare met.{1,40}Redistributions of source code must retain the above copyright\\s.*notice, this list of conditions and the following disclaimer\\.\\s+.*Redistributions in binary form must reproduce the above.*copyright\\s+.{0,10}notice, this list of conditions and the following.*disclaimer in the\\s+.*documentation.*(The (name|names) of the (author|contributors) may not|Neither the name of the).*be used to endorse or promote\\s+.*products\\s+.*derived\\s+.*from this software without specific prior written\\s+.*permission.*HOWEVER\\s+.*CAUSED AND ON ANY.*THEORY OF LIABILITY, WHETHER IN CONTRACT",
708     PCRE_MULTILINE,
709     "The Regents of the University of California",
710     0,
711     NULL, NULL
712   },
713   {
714     LIC_BSD_2CLAUSE_ISH,
715     "",
716     "BSD-ish (2 clause) License",
717     "Copyright\\s.{1,60}All rights reserved.{1,40}Redistribution and use in source and binary forms, with or without.{0,20}modification, are permitted provided that the following conditions.{0,20}\\sare met.{0,20}\\s{1,20}.{0,20}Redistributions of source code must retain the above copyright\\s+.*notice, this list of conditions and the following disclaimer.\\s+.*Redistributions in binary form must reproduce the above copyright\\s+.*notice, this list of conditions and the following disclaimer in the\\s+.*documentation and\\/or other materials provided with the distribution\\.\\s+.*HOWEVER CAUSED AND ON ANY.*THEORY OF LIABILITY, WHETHER IN CONTRACT",
718     PCRE_MULTILINE,
719     "(The Regents of the University of California)|(used to endorse or promote\\s+.*products\\s+.*prior\\s+.*written\\s+.*permission\\.)",
720     PCRE_MULTILINE,
721     NULL, NULL
722   },
723   {
724     LIC_WTFPL2,
725     "",
726     "WTF Public License",
727     "(\\bwtfpl\\b)|(\\bwtf\\s*public\\s*license\\b)|(\\b(do\\s*)?what\\s*the\\s*\\fuck\\s*public\\s*license\\b)",
728     PCRE_CASELESS,
729     NULL,
730     0,
731     NULL, NULL
732   },
733   { NULL, NULL, NULL, NULL, 0, NULL, 0, NULL, NULL },
734 };
735 int license_map_length = 0; // will be set dynamically
736
737 /** Compiles the regular expressions defined in license_map. */
738 void compile_regexps() {
739   if (license_map_length == 0)
740     return;
741   const char *err;
742   int erroffset;
743   int i;
744   for (i = 0; i < license_map_length; i++) {
745     License *l = &license_map[i];
746     int flags;
747     if (l->re) {
748       flags = l->re_flags;
749       if (flags & PCRE_MULTILINE)
750         flags |= PCRE_DOTALL;
751       l->regexp = pcre_compile(l->re, flags, &err, &erroffset, NULL);
752     }
753     if (l->exclude_re) {
754       flags = l->exclude_re_flags;
755       if (flags & PCRE_MULTILINE)
756         flags |= PCRE_DOTALL;
757       l->exclude_regexp = pcre_compile(l->exclude_re, flags, &err, &erroffset,
758                                        NULL);
759     }
760   }
761 }
762
763 /**
764  * Overrides a less-specific license l with a more-specific one if the latter
765  * was detected.
766  */
767 #define OVERRIDE_LICENSE(l, with) { \
768   if (strcmp(license_map[i].name, l) == 0) { \
769     for (j = 0; j < license_map_length; j++) \
770       if (potential_licenses_s[j] > -1 && \
771           strcmp(license_map[j].name, with) == 0) { \
772         overridden = 1; \
773         break; \
774       } \
775   } \
776 }
777
778 LicenseList *ohcount_detect_license(SourceFile *sourcefile) {
779   LicenseList *list = ohcount_license_list_new();
780
781   // Get the size of this map and compile the REs. Only runs once.
782   if (license_map_length == 0) {
783     while (license_map[license_map_length].name) license_map_length++;
784     compile_regexps();
785   }
786
787   ohcount_sourcefile_parse(sourcefile);
788
789   char *p, *q;
790   int i, j, k;
791   int ovector[30]; // recommended by PCRE
792   ParsedLanguageList *iter_language;
793   iter_language = ohcount_sourcefile_get_parsed_language_list(sourcefile)->head;
794   if (iter_language) {
795     int potential_licenses_s[license_map_length];
796     int potential_licenses_e[license_map_length];
797
798     while (iter_language) {
799       // Before looking for licenses, strip whitespace and newlines
800       p = iter_language->pl->comments;
801       int buffer_len = p ? strlen(p) : 0;
802       char *p_max = p + buffer_len;
803
804       char *buffer = malloc(buffer_len+1);
805       if (buffer == NULL) {
806         fprintf(stderr, "out of memory in ohcount_detect_license");
807         exit(-1);
808       }
809       q = buffer;
810       char *q_max = buffer + buffer_len + 1;
811
812       while (p < p_max && q < q_max) {
813         // Strip leading whitespace and punctuation.
814         while (*p == ' ' || *p == '\t' || ispunct(*p)) p++;
815         // Copy line contents.
816         while (p < p_max && *p != '\r' && *p != '\n' && q < q_max)
817           *q++ = *p++;
818         // Strip newline characters.
819         while (*p == '\r' || *p == '\n') p++;
820         // Add a trailing space.
821         if (q < q_max) *q++ = ' ';
822       }
823       if (q < q_max) *q = '\0';
824
825       for (j = 0; j < license_map_length; j++) {
826         potential_licenses_s[j] = -1;
827         potential_licenses_e[j] = -1;
828         if (pcre_exec(license_map[j].regexp, NULL, buffer, q - buffer, 0, 0,
829                       ovector, 30) >= 0) {
830           int m0 = ovector[0], m1 = ovector[1];
831           // Exclude terms that may not exist in the license.
832           if (license_map[j].exclude_re &&
833               pcre_exec(license_map[j].exclude_regexp, NULL, buffer + m0, m1 - m0,
834                         0, 0, ovector, 30) >= 0)
835             continue;
836           potential_licenses_s[j] = m0;
837           potential_licenses_e[j] = m1;
838           for (k = 0; k < j; k++) {
839             // If this matched license is completely contained inside another one,
840             // do not include it.
841             if ((potential_licenses_s[k] < m0 && potential_licenses_e[k] >= m1) ||
842                 (potential_licenses_s[k] <= m0 && potential_licenses_e[k] > m1)) {
843               potential_licenses_s[j] = -1;
844               potential_licenses_e[j] = -1;
845             }
846             // If this matched license completely contains another one, do not
847             // include the latter.
848             if ((m0 < potential_licenses_s[k] && m1 >= potential_licenses_e[k]) ||
849                 (m0 <= potential_licenses_s[k] && m1 > potential_licenses_e[k])) {
850               potential_licenses_s[k] = -1;
851               potential_licenses_e[k] = -1;
852             }
853           }
854         }
855       }
856       iter_language = iter_language->next;
857     }
858
859     // Create the list of licenses from potential licenses.
860     for (i = 0; i < license_map_length; i++) {
861       if (potential_licenses_s[i] > -1) {
862         int overridden = 0;
863         OVERRIDE_LICENSE(LIC_GPL, LIC_GPL3);
864         OVERRIDE_LICENSE(LIC_GPL, LIC_GPL3_OR_LATER);
865         OVERRIDE_LICENSE(LIC_GPL3, LIC_GPL3_OR_LATER);
866         OVERRIDE_LICENSE(LIC_BSD_2CLAUSE_ISH, LIC_BSD_ISH);
867         if (!overridden) {
868           if (list->head == NULL) { // empty list
869             list->head = list;
870             list->tail = list;
871             list->head->lic = &license_map[i];
872             list->next = NULL;
873           } else {
874             LicenseList *item = ohcount_license_list_new();
875             item->lic = &license_map[i];
876             list->tail->next = item;
877             list->tail = item;
878           }
879         }
880       }
881     }
882   }
883
884   return list;
885 }
886
887 LicenseList *ohcount_license_list_new() {
888   LicenseList *list = malloc(sizeof(LicenseList));
889   list->lic = NULL;
890   list->next = NULL;
891   list->head = NULL;
892   list->tail = NULL;
893   return list;
894 }
895
896 void ohcount_license_list_free(LicenseList *list) {
897   if (list->head) {
898     LicenseList *iter = list->head;
899     while (iter) {
900       LicenseList *next = iter->next;
901       free(iter);
902       iter = next;
903     }
904   } else free(list);
905 }