1 /* db.c: an external database to avoid filesystem lookups.
3 Copyright 1994, 1995, 1996, 1997, 2008 Karl Berry.
4 Copyright 1997-2005 Olaf Weber.
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with this library; if not, see <http://www.gnu.org/licenses/>. */
19 #include <kpathsea/config.h>
20 #include <kpathsea/absolute.h>
21 #include <kpathsea/c-stat.h>
22 #include <kpathsea/c-fopen.h>
23 #include <kpathsea/c-pathch.h>
24 #include <kpathsea/db.h>
25 #include <kpathsea/hash.h>
26 #include <kpathsea/line.h>
27 #include <kpathsea/pathsearch.h>
28 #include <kpathsea/readable.h>
29 #include <kpathsea/str-list.h>
30 #include <kpathsea/tex-file.h>
31 #include <kpathsea/variable.h>
33 static hash_table_type db; /* The hash table for all the ls-R's. */
35 /* Based on the size of 2008 texmf-dist/ls-R, about 62000 entries. But
36 we don't want to make it too big, since texmf/ls-R only has about
37 1300 entries. We should dynamically adapt the size. */
38 #define DB_HASH_SIZE 32003
41 #define DB_NAME "ls-R"
44 #define DB_NAME_LC "ls-r"
47 static const_string db_names[] = {
53 static hash_table_type alias_db;
55 #define ALIAS_NAME "aliases"
57 #ifndef ALIAS_HASH_SIZE
58 #define ALIAS_HASH_SIZE 1009
61 static str_list_type db_dir_list;
63 /* If DIRNAME contains any element beginning with a `.' (that is more
64 than just `./'), return true. This is to allow ``hidden''
65 directories -- ones that don't get searched. */
68 ignore_dir_p P1C(const_string, dirname)
70 const_string dot_pos = dirname;
72 while ((dot_pos = strchr (dot_pos + 1, '.'))) {
73 /* If / before and no / after, skip it. */
74 if (IS_DIR_SEP (dot_pos[-1]) && dot_pos[1] && !IS_DIR_SEP (dot_pos[1]))
81 /* If no DB_FILENAME, return false (maybe they aren't using this feature).
82 Otherwise, add entries from DB_FILENAME to TABLE, and return true. */
85 db_build P2C(hash_table_type *, table, const_string, db_filename)
88 unsigned dir_count = 0, file_count = 0, ignore_dir_count = 0;
89 unsigned len = strlen (db_filename) - sizeof (DB_NAME) + 1; /* Keep the /. */
90 string top_dir = (string)xmalloc (len + 1);
91 string cur_dir = NULL; /* First thing in ls-R might be a filename. */
92 FILE *db_file = fopen (db_filename, FOPEN_R_MODE);
94 strncpy (top_dir, db_filename, len);
98 while ((line = read_line (db_file)) != NULL) {
101 /* A line like `/foo:' = new dir foo. Allow both absolute (/...)
102 and explicitly relative (./...) names here. It's a kludge to
103 pass in the directory name with the trailing : still attached,
104 but it doesn't actually hurt. */
105 if (len > 0 && line[len - 1] == ':' && kpse_absolute_p (line, true)) {
106 /* New directory line. */
107 if (!ignore_dir_p (line)) {
108 /* If they gave a relative name, prepend full directory name now. */
109 line[len - 1] = DIR_SEP;
110 /* Skip over leading `./', it confuses `match' and is just a
111 waste of space, anyway. This will lose on `../', but `match'
112 won't work there, either, so it doesn't matter. */
113 cur_dir = *line == '.' ? concat (top_dir, line + 2) : xstrdup (line);
120 /* Ignore blank, `.' and `..' lines. */
121 } else if (*line != 0 && cur_dir /* a file line? */
123 && (line[1] == 0 || (line[1] == '.' && line[2] == 0))))
125 /* Make a new hash table entry with a key of `line' and a data
126 of `cur_dir'. An already-existing identical key is ok, since
127 a file named `foo' can be in more than one directory. Share
128 `cur_dir' among all its files (and hence never free it).
130 Note that we assume that all names in the ls-R file have already
131 been case-smashed to lowercase where appropriate.
133 hash_insert_normalized (table, xstrdup (line), cur_dir);
136 } /* else ignore blank lines or top-level files
137 or files in ignored directories*/
142 xfclose (db_file, db_filename);
144 if (file_count == 0) {
145 WARNING1 ("kpathsea: No usable entries in %s", db_filename);
146 WARNING ("kpathsea: See the manual for how to generate ls-R");
149 str_list_add (&db_dir_list, xstrdup (top_dir));
153 if (KPSE_DEBUG_P (KPSE_DEBUG_HASH)) {
154 /* Don't make this a debugging bit, since the output is so
155 voluminous, and being able to specify -1 is too useful.
156 Instead, let people who want it run the program under
157 a debugger and change the variable that way. */
158 boolean hash_summary_only = true;
160 DEBUGF4 ("%s: %u entries in %d directories (%d hidden).\n",
161 db_filename, file_count, dir_count, ignore_dir_count);
162 DEBUGF ("ls-R hash table:");
163 hash_print (*table, hash_summary_only);
166 #endif /* KPSE_DEBUG */
171 return db_file != NULL;
175 /* Insert FNAME into the hash table. This is for files that get built
176 during a run. We wouldn't want to reread all of ls-R, even if it got
180 kpse_db_insert P1C(const_string, passed_fname)
182 /* We might not have found ls-R, or even had occasion to look for it
183 yet, so do nothing if we have no hash table. */
185 const_string dir_part;
186 string fname = xstrdup (passed_fname);
187 string baseptr = (string)xbasename (fname);
188 const_string file_part = xstrdup (baseptr);
190 *baseptr = '\0'; /* Chop off the filename. */
191 dir_part = fname; /* That leaves the dir, with the trailing /. */
193 /* Note that we do not assuse that these names have been normalized. */
194 hash_insert (&db, file_part, dir_part);
198 /* Return true if FILENAME could be in PATH_ELT, i.e., if the directory
199 part of FILENAME matches PATH_ELT. Have to consider // wildcards, but
200 $ and ~ expansion have already been done. */
203 match P2C(const_string, filename, const_string, path_elt)
205 const_string original_filename = filename;
206 boolean matched = false;
208 for (; *filename && *path_elt; filename++, path_elt++) {
209 if (FILECHARCASEEQ (*filename, *path_elt)) /* normal character match */
212 else if (IS_DIR_SEP (*path_elt) /* at // */
213 && original_filename < filename && IS_DIR_SEP (path_elt[-1])) {
214 while (IS_DIR_SEP (*path_elt))
215 path_elt++; /* get past second and any subsequent /'s */
216 if (*path_elt == 0) {
217 /* Trailing //, matches anything. We could make this part of the
218 other case, but it seems pointless to do the extra work. */
222 /* Intermediate //, have to match rest of PATH_ELT. */
223 for (; !matched && *filename; filename++) {
224 /* Try matching at each possible character. */
225 if (IS_DIR_SEP (filename[-1])
226 && FILECHARCASEEQ (*filename, *path_elt))
227 matched = match (filename, path_elt);
229 /* Prevent filename++ when *filename='\0'. */
234 else /* normal character nonmatch, quit */
238 /* If we've reached the end of PATH_ELT, check that we're at the last
239 component of FILENAME, we've matched. */
240 if (!matched && *path_elt == 0) {
241 /* Probably PATH_ELT ended with `vf' or some such, and FILENAME ends
242 with `vf/ptmr.vf'. In that case, we'll be at a directory
243 separator. On the other hand, if PATH_ELT ended with a / (as in
244 `vf/'), FILENAME being the same `vf/ptmr.vf', we'll be at the
245 `p'. Upshot: if we're at a dir separator in FILENAME, skip it.
246 But if not, that's ok, as long as there are no more dir separators. */
247 if (IS_DIR_SEP (*filename))
250 while (*filename && !IS_DIR_SEP (*filename))
252 matched = *filename == 0;
259 /* If DB_DIR is a prefix of PATH_ELT, return true; otherwise false.
260 That is, the question is whether to try the db for a file looked up
261 in PATH_ELT. If PATH_ELT == ".", for example, the answer is no. If
262 PATH_ELT == "/usr/local/lib/texmf/fonts//tfm", the answer is yes.
264 In practice, ls-R is only needed for lengthy subdirectory
265 comparisons, but there's no gain to checking PATH_ELT to see if it is
266 a subdir match, since the only way to do that is to do a string
267 search in it, which is all we do anyway. */
270 elt_in_db P2C(const_string, db_dir, const_string, path_elt)
272 boolean found = false;
274 while (!found && FILECHARCASEEQ (*db_dir++, *path_elt++)) {
275 /* If we've matched the entire db directory, it's good. */
279 /* If we've reached the end of PATH_ELT, but not the end of the db
280 directory, it's no good. */
281 else if (*path_elt == 0)
288 /* If ALIAS_FILENAME exists, read it into TABLE. */
291 alias_build P2C(hash_table_type *, table, const_string, alias_filename)
293 string line, real, alias;
295 FILE *alias_file = fopen (alias_filename, FOPEN_R_MODE);
298 while ((line = read_line (alias_file)) != NULL) {
299 /* comments or empty */
300 if (*line == 0 || *line == '%' || *line == '#') {
303 /* Each line should have two fields: realname aliasname. */
305 while (*real && ISSPACE (*real))
308 while (*alias && !ISSPACE (*alias))
311 while (*alias && ISSPACE (*alias))
313 /* Is the check for errors strong enough? Should we warn the user
314 for potential errors? */
315 if (strlen (real) != 0 && strlen (alias) != 0) {
316 /* Stuff in the alias file should be normalized. */
317 hash_insert_normalized (table, xstrdup (alias), xstrdup (real));
325 if (KPSE_DEBUG_P (KPSE_DEBUG_HASH)) {
326 /* As with ls-R above ... */
327 boolean hash_summary_only = true;
328 DEBUGF2 ("%s: %u aliases.\n", alias_filename, count);
329 DEBUGF ("alias hash table:");
330 hash_print (*table, hash_summary_only);
333 #endif /* KPSE_DEBUG */
335 xfclose (alias_file, alias_filename);
338 return alias_file != NULL;
341 /* Initialize the path for ls-R files, and read them all into the hash
342 table `db'. If no usable ls-R's are found, set db.buckets to NULL. */
345 kpse_init_db P1H(void)
348 const_string db_path;
350 string *orig_db_files;
352 assert (sizeof(DB_NAME) == sizeof(DB_NAME_LC));
354 db_path = kpse_init_format (kpse_db_format);
355 db_files = kpse_all_path_search_list (db_path, db_names);
356 orig_db_files = db_files;
358 /* Must do this after the path searching (which ends up calling
359 kpse_db_search recursively), so db.buckets stays NULL. */
360 db = hash_create (DB_HASH_SIZE);
362 while (db_files && *db_files) {
363 if (db_build (&db, *db_files))
370 /* If db can't be built, leave `size' nonzero (so we don't
371 rebuild it), but clear `buckets' (so we don't look in it). */
376 free (orig_db_files);
378 /* Add the content of any alias databases. There may exist more than
379 one alias file along DB_NAME files. This duplicates the above code
380 -- should be a function. */
382 db_files = kpse_all_path_search (db_path, ALIAS_NAME);
383 orig_db_files = db_files;
385 alias_db = hash_create (ALIAS_HASH_SIZE);
387 while (db_files && *db_files) {
388 if (alias_build (&alias_db, *db_files))
395 free (alias_db.buckets);
396 alias_db.buckets = NULL;
399 free (orig_db_files);
402 /* Avoid doing anything if this PATH_ELT is irrelevant to the databases. */
405 kpse_db_search P3C(const_string, name, const_string, orig_path_elt,
408 string *db_dirs, *orig_dirs, *r;
409 const_string last_slash;
414 string *aliases = NULL;
415 boolean relevant = false;
417 /* If we failed to build the database (or if this is the recursive
418 call to build the db path), quit. */
419 if (db.buckets == NULL)
422 /* When tex-glyph.c calls us looking for, e.g., dpi600/cmr10.pk, we
423 won't find it unless we change NAME to just `cmr10.pk' and append
424 `/dpi600' to PATH_ELT. We are justified in using a literal `/'
425 here, since that's what tex-glyph.c unconditionally uses in
426 DPI_BITMAP_SPEC. But don't do anything if the / begins NAME; that
427 should never happen. */
428 last_slash = strrchr (name, '/');
429 if (last_slash && last_slash != name) {
430 unsigned len = last_slash - name + 1;
431 string dir_part = (string)xmalloc (len);
432 strncpy (dir_part, name, len - 1);
433 dir_part[len - 1] = 0;
434 path_elt = concat3 (orig_path_elt, "/", dir_part);
435 name = last_slash + 1;
437 path_elt = (string) orig_path_elt;
439 /* Don't bother doing any lookups if this `path_elt' isn't covered by
440 any of database directories. We do this not so much because the
441 extra couple of hash lookups matter -- they don't -- but rather
442 because we want to return NULL in this case, so path_search can
443 know to do a disk search. */
444 for (e = 0; !relevant && e < STR_LIST_LENGTH (db_dir_list); e++) {
445 relevant = elt_in_db (STR_LIST_ELT (db_dir_list, e), path_elt);
450 /* If we have aliases for this name, use them. */
451 if (alias_db.buckets)
452 aliases = hash_lookup (alias_db, name);
455 aliases = XTALLOC1 (string);
458 { /* Push aliases up by one and insert the original name at the front. */
460 unsigned len = 1; /* Have NULL element already allocated. */
461 for (r = aliases; *r; r++)
463 XRETALLOC (aliases, len + 1, string);
464 for (i = len; i > 0; i--) {
465 aliases[i] = aliases[i - 1];
467 aliases[0] = (string) name;
471 for (r = aliases; !done && *r; r++) {
474 /* We have an ls-R db. Look up `try'. */
475 orig_dirs = db_dirs = hash_lookup (db, ctry);
477 ret = XTALLOC1 (str_list_type);
478 *ret = str_list_init ();
480 /* For each filename found, see if it matches the path element. For
481 example, if we have .../cx/cmr10.300pk and .../ricoh/cmr10.300pk,
482 and the path looks like .../cx, we don't want the ricoh file. */
483 while (!done && db_dirs && *db_dirs) {
484 string db_file = concat (*db_dirs, ctry);
485 boolean matched = match (db_file, path_elt);
488 if (KPSE_DEBUG_P (KPSE_DEBUG_SEARCH))
489 DEBUGF3 ("db:match(%s,%s) = %d\n", db_file, path_elt, matched);
492 /* We got a hit in the database. Now see if the file actually
493 exists, possibly under an alias. */
496 if (kpse_readable_file (db_file)) {
502 free (db_file); /* `db_file' wasn't on disk. */
504 /* The hit in the DB doesn't exist in disk. Now try all its
505 aliases. For example, suppose we have a hierarchy on CD,
506 thus `mf.bas', but ls-R contains `mf.base'. Find it anyway.
507 Could probably work around this with aliases, but
508 this is pretty easy and shouldn't hurt. The upshot is that
509 if one of the aliases actually exists, we use that. */
510 for (a = aliases + 1; *a && !found; a++) {
511 string atry = concat (*db_dirs, *a);
512 if (kpse_readable_file (atry))
519 /* If we have a real file, add it to the list, maybe done. */
521 str_list_add (ret, found);
525 } else { /* no match in the db */
530 /* On to the next directory, if any. */
534 /* This is just the space for the pointers, not the strings. */
535 if (orig_dirs && *orig_dirs)
541 /* If we had to break up NAME, free the temporary PATH_ELT. */
542 if (path_elt != orig_path_elt)
549 kpse_db_search_list P3C(const_string*, names, const_string, path_elt,
552 string *db_dirs, *orig_dirs, *r;
553 const_string last_slash, name, path;
558 boolean relevant = false;
561 /* If we failed to build the database (or if this is the recursive
562 call to build the db path), quit. */
563 if (db.buckets == NULL)
566 /* Don't bother doing any lookups if this `path_elt' isn't covered by
567 any of database directories. We do this not so much because the
568 extra couple of hash lookups matter -- they don't -- but rather
569 because we want to return NULL in this case, so path_search can
570 know to do a disk search. */
571 for (e = 0; !relevant && e < STR_LIST_LENGTH (db_dir_list); e++) {
572 relevant = elt_in_db (STR_LIST_ELT (db_dir_list, e), path_elt);
578 /* Handle each name. */
579 for (n = 0; !done && names[n]; n++) {
582 /* Absolute names should have been caught in our caller. */
583 if (kpse_absolute_p(name, true))
586 /* When tex-glyph.c calls us looking for, e.g., dpi600/cmr10.pk, we
587 won't find it unless we change NAME to just `cmr10.pk' and append
588 `/dpi600' to PATH_ELT. We are justified in using a literal `/'
589 here, since that's what tex-glyph.c unconditionally uses in
590 DPI_BITMAP_SPEC. But don't do anything if the / begins NAME; that
591 should never happen. */
592 last_slash = strrchr (name, '/');
593 if (last_slash && last_slash != name) {
594 unsigned len = last_slash - name + 1;
595 string dir_part = (string)xmalloc (len);
596 strncpy (dir_part, name, len - 1);
597 dir_part[len - 1] = 0;
598 path = concat3 (path_elt, "/", dir_part);
599 name = last_slash + 1;
605 /* If we have aliases for this name, use them. */
606 if (alias_db.buckets)
607 aliases = hash_lookup (alias_db, name);
612 aliases = XTALLOC1 (string);
615 { /* Push aliases up by one and insert the original name at the front. */
617 unsigned len = 1; /* Have NULL element already allocated. */
618 for (r = aliases; *r; r++)
620 XRETALLOC (aliases, len + 1, string);
621 for (i = len; i > 0; i--) {
622 aliases[i] = aliases[i - 1];
624 aliases[0] = (string) name;
627 for (r = aliases; !done && *r; r++) {
630 /* We have an ls-R db. Look up `try'. */
631 orig_dirs = db_dirs = hash_lookup (db, ctry);
633 ret = XTALLOC1 (str_list_type);
634 *ret = str_list_init ();
636 /* For each filename found, see if it matches the path element. For
637 example, if we have .../cx/cmr10.300pk and .../ricoh/cmr10.300pk,
638 and the path looks like .../cx, we don't want the ricoh file. */
639 while (!done && db_dirs && *db_dirs) {
640 string db_file = concat (*db_dirs, ctry);
641 boolean matched = match (db_file, path);
644 if (KPSE_DEBUG_P (KPSE_DEBUG_SEARCH))
645 DEBUGF3 ("db:match(%s,%s) = %d\n", db_file, path, matched);
648 /* We got a hit in the database. Now see if the file actually
649 exists, possibly under an alias. */
652 if (kpse_readable_file (db_file)) {
658 free (db_file); /* `db_file' wasn't on disk. */
660 /* The hit in the DB doesn't exist in disk. Now try all its
661 aliases. For example, suppose we have a hierarchy on CD,
662 thus `mf.bas', but ls-R contains `mf.base'. Find it anyway.
663 Could probably work around this with aliases, but
664 this is pretty easy and shouldn't hurt. The upshot is that
665 if one of the aliases actually exists, we use that. */
666 for (a = aliases + 1; *a && !found; a++) {
667 string atry = concat (*db_dirs, *a);
668 if (kpse_readable_file (atry))
675 /* If we have a real file, add it to the list, maybe done. */
677 str_list_add (ret, found);
681 } else { /* no match in the db */
685 /* On to the next directory, if any. */
689 /* This is just the space for the pointers, not the strings. */
690 if (orig_dirs && *orig_dirs)
695 if (path != path_elt)