1 /***************************************************************************
2 * chm_lib.c - CHM archive manipulation routines *
3 * ------------------- *
5 * author: Jed Wing <jedwin@ugcs.caltech.edu> *
7 * notes: These routines are meant for the manipulation of microsoft *
8 * .chm (compiled html help) files, but may likely be used *
9 * for the manipulation of any ITSS archive, if ever ITSS *
10 * archives are used for any other purpose. *
12 * Note also that the section names are statically handled. *
13 * To be entirely correct, the section names should be read *
14 * from the section names meta-file, and then the various *
15 * content sections and the "transforms" to apply to the data *
16 * they contain should be inferred from the section name and *
17 * the meta-files referenced using that name; however, all of *
18 * the files I've been able to get my hands on appear to have *
19 * only two sections: Uncompressed and MSCompressed. *
20 * Additionally, the ITSS.DLL file included with Windows does *
21 * not appear to handle any different transforms than the *
22 * simple LZX-transform. Furthermore, the list of transforms *
23 * to apply is broken, in that only half the required space *
24 * is allocated for the list. (It appears as though the *
25 * space is allocated for ASCII strings, but the strings are *
26 * written as unicode. As a result, only the first half of *
27 * the string appears.) So this is probably not too big of *
28 * a deal, at least until CHM v4 (MS .lit files), which also *
29 * incorporate encryption, of some description. *
31 ***************************************************************************/
33 /***************************************************************************
35 * This library is free software; you can redistribute it and/or modify *
36 * it under the terms of the GNU Lesser General Public License as *
37 * published by the Free Software Foundation; either version 2.1 of the *
38 * License, or (at your option) any later version. *
40 ***************************************************************************/
42 /***************************************************************************
44 * Adapted for Wine by Mike McCormack *
46 ***************************************************************************/
49 #include "wine/port.h"
58 #include "wine/unicode.h"
63 #define CHM_ACQUIRE_LOCK(a) do { \
64 EnterCriticalSection(&(a)); \
66 #define CHM_RELEASE_LOCK(a) do { \
67 LeaveCriticalSection(&(a)); \
70 #define CHM_NULL_FD (INVALID_HANDLE_VALUE)
71 #define CHM_CLOSE_FILE(fd) CloseHandle((fd))
74 * defines related to tuning
76 #ifndef CHM_MAX_BLOCKS_CACHED
77 #define CHM_MAX_BLOCKS_CACHED 5
81 * architecture specific defines
83 * Note: as soon as C99 is more widespread, the below defines should
84 * probably just use the C99 sized-int types.
86 * The following settings will probably work for many platforms. The sizes
87 * don't have to be exactly correct, but the types must accommodate at least as
88 * many bits as they specify.
91 /* i386, 32-bit, Windows */
94 typedef USHORT UInt16;
97 typedef LONGLONG Int64;
98 typedef ULONGLONG UInt64;
100 /* utilities for unmarshalling data */
101 static int _unmarshal_char_array(unsigned char **pData,
102 unsigned long *pLenRemain,
106 if (count <= 0 || (unsigned int)count > *pLenRemain)
108 memcpy(dest, (*pData), count);
110 *pLenRemain -= count;
114 static int _unmarshal_uchar_array(unsigned char **pData,
115 unsigned long *pLenRemain,
119 if (count <= 0 || (unsigned int)count > *pLenRemain)
121 memcpy(dest, (*pData), count);
123 *pLenRemain -= count;
127 static int _unmarshal_int32(unsigned char **pData,
128 unsigned long *pLenRemain,
133 *dest = (*pData)[0] | (*pData)[1]<<8 | (*pData)[2]<<16 | (*pData)[3]<<24;
139 static int _unmarshal_uint32(unsigned char **pData,
140 unsigned long *pLenRemain,
145 *dest = (*pData)[0] | (*pData)[1]<<8 | (*pData)[2]<<16 | (*pData)[3]<<24;
151 static int _unmarshal_int64(unsigned char **pData,
152 unsigned long *pLenRemain,
163 temp |= (*pData)[i-1];
171 static int _unmarshal_uint64(unsigned char **pData,
172 unsigned long *pLenRemain,
183 temp |= (*pData)[i-1];
191 static int _unmarshal_uuid(unsigned char **pData,
192 unsigned long *pDataLen,
195 return _unmarshal_uchar_array(pData, pDataLen, dest, 16);
198 /* names of sections essential to decompression */
199 static const WCHAR _CHMU_RESET_TABLE[] = {
200 ':',':','D','a','t','a','S','p','a','c','e','/',
201 'S','t','o','r','a','g','e','/',
202 'M','S','C','o','m','p','r','e','s','s','e','d','/',
203 'T','r','a','n','s','f','o','r','m','/',
204 '{','7','F','C','2','8','9','4','0','-','9','D','3','1',
205 '-','1','1','D','0','-','9','B','2','7','-',
206 '0','0','A','0','C','9','1','E','9','C','7','C','}','/',
207 'I','n','s','t','a','n','c','e','D','a','t','a','/',
208 'R','e','s','e','t','T','a','b','l','e',0
210 static const WCHAR _CHMU_LZXC_CONTROLDATA[] = {
211 ':',':','D','a','t','a','S','p','a','c','e','/',
212 'S','t','o','r','a','g','e','/',
213 'M','S','C','o','m','p','r','e','s','s','e','d','/',
214 'C','o','n','t','r','o','l','D','a','t','a',0
216 static const WCHAR _CHMU_CONTENT[] = {
217 ':',':','D','a','t','a','S','p','a','c','e','/',
218 'S','t','o','r','a','g','e','/',
219 'M','S','C','o','m','p','r','e','s','s','e','d','/',
220 'C','o','n','t','e','n','t',0
222 static const WCHAR _CHMU_SPANINFO[] = {
223 ':',':','D','a','t','a','S','p','a','c','e','/',
224 'S','t','o','r','a','g','e','/',
225 'M','S','C','o','m','p','r','e','s','s','e','d','/',
226 'S','p','a','n','I','n','f','o',
230 * structures local to this module
233 /* structure of ITSF headers */
234 #define _CHM_ITSF_V2_LEN (0x58)
235 #define _CHM_ITSF_V3_LEN (0x60)
238 char signature[4]; /* 0 (ITSF) */
239 Int32 version; /* 4 */
240 Int32 header_len; /* 8 */
241 Int32 unknown_000c; /* c */
242 UInt32 last_modified; /* 10 */
243 UInt32 lang_id; /* 14 */
244 UChar dir_uuid[16]; /* 18 */
245 UChar stream_uuid[16]; /* 28 */
246 UInt64 unknown_offset; /* 38 */
247 UInt64 unknown_len; /* 40 */
248 UInt64 dir_offset; /* 48 */
249 UInt64 dir_len; /* 50 */
250 UInt64 data_offset; /* 58 (Not present before V3) */
251 }; /* __attribute__ ((aligned (1))); */
253 static int _unmarshal_itsf_header(unsigned char **pData,
254 unsigned long *pDataLen,
255 struct chmItsfHeader *dest)
257 /* we only know how to deal with the 0x58 and 0x60 byte structures */
258 if (*pDataLen != _CHM_ITSF_V2_LEN && *pDataLen != _CHM_ITSF_V3_LEN)
261 /* unmarshal common fields */
262 _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
263 _unmarshal_int32 (pData, pDataLen, &dest->version);
264 _unmarshal_int32 (pData, pDataLen, &dest->header_len);
265 _unmarshal_int32 (pData, pDataLen, &dest->unknown_000c);
266 _unmarshal_uint32 (pData, pDataLen, &dest->last_modified);
267 _unmarshal_uint32 (pData, pDataLen, &dest->lang_id);
268 _unmarshal_uuid (pData, pDataLen, dest->dir_uuid);
269 _unmarshal_uuid (pData, pDataLen, dest->stream_uuid);
270 _unmarshal_uint64 (pData, pDataLen, &dest->unknown_offset);
271 _unmarshal_uint64 (pData, pDataLen, &dest->unknown_len);
272 _unmarshal_uint64 (pData, pDataLen, &dest->dir_offset);
273 _unmarshal_uint64 (pData, pDataLen, &dest->dir_len);
275 /* error check the data */
276 /* XXX: should also check UUIDs, probably, though with a version 3 file,
277 * current MS tools do not seem to use them.
279 if (memcmp(dest->signature, "ITSF", 4) != 0)
281 if (dest->version == 2)
283 if (dest->header_len < _CHM_ITSF_V2_LEN)
286 else if (dest->version == 3)
288 if (dest->header_len < _CHM_ITSF_V3_LEN)
294 /* now, if we have a V3 structure, unmarshal the rest.
295 * otherwise, compute it
297 if (dest->version == 3)
300 _unmarshal_uint64(pData, pDataLen, &dest->data_offset);
305 dest->data_offset = dest->dir_offset + dest->dir_len;
310 /* structure of ITSP headers */
311 #define _CHM_ITSP_V1_LEN (0x54)
314 char signature[4]; /* 0 (ITSP) */
315 Int32 version; /* 4 */
316 Int32 header_len; /* 8 */
317 Int32 unknown_000c; /* c */
318 UInt32 block_len; /* 10 */
319 Int32 blockidx_intvl; /* 14 */
320 Int32 index_depth; /* 18 */
321 Int32 index_root; /* 1c */
322 Int32 index_head; /* 20 */
323 Int32 unknown_0024; /* 24 */
324 UInt32 num_blocks; /* 28 */
325 Int32 unknown_002c; /* 2c */
326 UInt32 lang_id; /* 30 */
327 UChar system_uuid[16]; /* 34 */
328 UChar unknown_0044[16]; /* 44 */
329 }; /* __attribute__ ((aligned (1))); */
331 static int _unmarshal_itsp_header(unsigned char **pData,
332 unsigned long *pDataLen,
333 struct chmItspHeader *dest)
335 /* we only know how to deal with a 0x54 byte structures */
336 if (*pDataLen != _CHM_ITSP_V1_LEN)
339 /* unmarshal fields */
340 _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
341 _unmarshal_int32 (pData, pDataLen, &dest->version);
342 _unmarshal_int32 (pData, pDataLen, &dest->header_len);
343 _unmarshal_int32 (pData, pDataLen, &dest->unknown_000c);
344 _unmarshal_uint32 (pData, pDataLen, &dest->block_len);
345 _unmarshal_int32 (pData, pDataLen, &dest->blockidx_intvl);
346 _unmarshal_int32 (pData, pDataLen, &dest->index_depth);
347 _unmarshal_int32 (pData, pDataLen, &dest->index_root);
348 _unmarshal_int32 (pData, pDataLen, &dest->index_head);
349 _unmarshal_int32 (pData, pDataLen, &dest->unknown_0024);
350 _unmarshal_uint32 (pData, pDataLen, &dest->num_blocks);
351 _unmarshal_int32 (pData, pDataLen, &dest->unknown_002c);
352 _unmarshal_uint32 (pData, pDataLen, &dest->lang_id);
353 _unmarshal_uuid (pData, pDataLen, dest->system_uuid);
354 _unmarshal_uchar_array(pData, pDataLen, dest->unknown_0044, 16);
356 /* error check the data */
357 if (memcmp(dest->signature, "ITSP", 4) != 0)
359 if (dest->version != 1)
361 if (dest->header_len != _CHM_ITSP_V1_LEN)
367 /* structure of PMGL headers */
368 static const char _chm_pmgl_marker[4] = "PMGL";
369 #define _CHM_PMGL_LEN (0x14)
372 char signature[4]; /* 0 (PMGL) */
373 UInt32 free_space; /* 4 */
374 UInt32 unknown_0008; /* 8 */
375 Int32 block_prev; /* c */
376 Int32 block_next; /* 10 */
377 }; /* __attribute__ ((aligned (1))); */
379 static int _unmarshal_pmgl_header(unsigned char **pData,
380 unsigned long *pDataLen,
381 struct chmPmglHeader *dest)
383 /* we only know how to deal with a 0x14 byte structures */
384 if (*pDataLen != _CHM_PMGL_LEN)
387 /* unmarshal fields */
388 _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
389 _unmarshal_uint32 (pData, pDataLen, &dest->free_space);
390 _unmarshal_uint32 (pData, pDataLen, &dest->unknown_0008);
391 _unmarshal_int32 (pData, pDataLen, &dest->block_prev);
392 _unmarshal_int32 (pData, pDataLen, &dest->block_next);
394 /* check structure */
395 if (memcmp(dest->signature, _chm_pmgl_marker, 4) != 0)
401 /* structure of PMGI headers */
402 static const char _chm_pmgi_marker[4] = "PMGI";
403 #define _CHM_PMGI_LEN (0x08)
406 char signature[4]; /* 0 (PMGI) */
407 UInt32 free_space; /* 4 */
408 }; /* __attribute__ ((aligned (1))); */
410 static int _unmarshal_pmgi_header(unsigned char **pData,
411 unsigned long *pDataLen,
412 struct chmPmgiHeader *dest)
414 /* we only know how to deal with a 0x8 byte structures */
415 if (*pDataLen != _CHM_PMGI_LEN)
418 /* unmarshal fields */
419 _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
420 _unmarshal_uint32 (pData, pDataLen, &dest->free_space);
422 /* check structure */
423 if (memcmp(dest->signature, _chm_pmgi_marker, 4) != 0)
429 /* structure of LZXC reset table */
430 #define _CHM_LZXC_RESETTABLE_V1_LEN (0x28)
431 struct chmLzxcResetTable
437 UInt64 uncompressed_len;
438 UInt64 compressed_len;
440 }; /* __attribute__ ((aligned (1))); */
442 static int _unmarshal_lzxc_reset_table(unsigned char **pData,
443 unsigned long *pDataLen,
444 struct chmLzxcResetTable *dest)
446 /* we only know how to deal with a 0x28 byte structures */
447 if (*pDataLen != _CHM_LZXC_RESETTABLE_V1_LEN)
450 /* unmarshal fields */
451 _unmarshal_uint32 (pData, pDataLen, &dest->version);
452 _unmarshal_uint32 (pData, pDataLen, &dest->block_count);
453 _unmarshal_uint32 (pData, pDataLen, &dest->unknown);
454 _unmarshal_uint32 (pData, pDataLen, &dest->table_offset);
455 _unmarshal_uint64 (pData, pDataLen, &dest->uncompressed_len);
456 _unmarshal_uint64 (pData, pDataLen, &dest->compressed_len);
457 _unmarshal_uint64 (pData, pDataLen, &dest->block_len);
459 /* check structure */
460 if (dest->version != 2)
466 /* structure of LZXC control data block */
467 #define _CHM_LZXC_MIN_LEN (0x18)
468 #define _CHM_LZXC_V2_LEN (0x1c)
469 struct chmLzxcControlData
472 char signature[4]; /* 4 (LZXC) */
473 UInt32 version; /* 8 */
474 UInt32 resetInterval; /* c */
475 UInt32 windowSize; /* 10 */
476 UInt32 windowsPerReset; /* 14 */
477 UInt32 unknown_18; /* 18 */
480 static int _unmarshal_lzxc_control_data(unsigned char **pData,
481 unsigned long *pDataLen,
482 struct chmLzxcControlData *dest)
484 /* we want at least 0x18 bytes */
485 if (*pDataLen < _CHM_LZXC_MIN_LEN)
488 /* unmarshal fields */
489 _unmarshal_uint32 (pData, pDataLen, &dest->size);
490 _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
491 _unmarshal_uint32 (pData, pDataLen, &dest->version);
492 _unmarshal_uint32 (pData, pDataLen, &dest->resetInterval);
493 _unmarshal_uint32 (pData, pDataLen, &dest->windowSize);
494 _unmarshal_uint32 (pData, pDataLen, &dest->windowsPerReset);
496 if (*pDataLen >= _CHM_LZXC_V2_LEN)
497 _unmarshal_uint32 (pData, pDataLen, &dest->unknown_18);
499 dest->unknown_18 = 0;
501 if (dest->version == 2)
503 dest->resetInterval *= 0x8000;
504 dest->windowSize *= 0x8000;
506 if (dest->windowSize == 0 || dest->resetInterval == 0)
509 /* for now, only support resetInterval a multiple of windowSize/2 */
510 if (dest->windowSize == 1)
512 if ((dest->resetInterval % (dest->windowSize/2)) != 0)
515 /* check structure */
516 if (memcmp(dest->signature, "LZXC", 4) != 0)
522 /* the structure used for chm file handles */
527 CRITICAL_SECTION mutex;
528 CRITICAL_SECTION lzx_mutex;
529 CRITICAL_SECTION cache_mutex;
539 struct chmUnitInfo rt_unit;
540 struct chmUnitInfo cn_unit;
541 struct chmLzxcResetTable reset_table;
543 /* LZX control data */
544 int compression_enabled;
546 UInt32 reset_interval;
547 UInt32 reset_blkcount;
549 /* decompressor state */
550 struct LZXstate *lzx_state;
553 /* cache for decompressed blocks */
554 UChar **cache_blocks;
555 Int64 *cache_block_indices;
556 Int32 cache_num_blocks;
560 * utility functions local to this module
563 /* utility function to handle differences between {pread,read}(64)? */
564 static Int64 _chm_fetch_bytes(struct chmFile *h,
570 if (h->fd == CHM_NULL_FD)
573 CHM_ACQUIRE_LOCK(h->mutex);
574 /* NOTE: this might be better done with CreateFileMapping, et cetera... */
576 DWORD origOffsetLo=0, origOffsetHi=0;
577 DWORD offsetLo, offsetHi;
580 /* awkward Win32 Seek/Tell */
581 offsetLo = (unsigned long)(os & 0xffffffffL);
582 offsetHi = (unsigned long)((os >> 32) & 0xffffffffL);
583 origOffsetLo = SetFilePointer(h->fd, 0, &origOffsetHi, FILE_CURRENT);
584 offsetLo = SetFilePointer(h->fd, offsetLo, &offsetHi, FILE_BEGIN);
596 /* restore original position */
597 SetFilePointer(h->fd, origOffsetLo, &origOffsetHi, FILE_BEGIN);
599 CHM_RELEASE_LOCK(h->mutex);
603 /* open an ITS archive */
604 struct chmFile *chm_openW(const WCHAR *filename)
606 unsigned char sbuffer[256];
607 unsigned long sremain;
608 unsigned char *sbufpos;
609 struct chmFile *newHandle=NULL;
610 struct chmItsfHeader itsfHeader;
611 struct chmItspHeader itspHeader;
613 struct chmUnitInfo uiSpan;
615 struct chmUnitInfo uiLzxc;
616 struct chmLzxcControlData ctlData;
618 /* allocate handle */
619 newHandle = (struct chmFile *)malloc(sizeof(struct chmFile));
620 newHandle->fd = CHM_NULL_FD;
621 newHandle->lzx_state = NULL;
622 newHandle->cache_blocks = NULL;
623 newHandle->cache_block_indices = NULL;
624 newHandle->cache_num_blocks = 0;
627 if ((newHandle->fd=CreateFileW(filename,
632 FILE_ATTRIBUTE_NORMAL,
633 NULL)) == CHM_NULL_FD)
639 /* initialize mutexes, if needed */
640 InitializeCriticalSection(&newHandle->mutex);
641 InitializeCriticalSection(&newHandle->lzx_mutex);
642 InitializeCriticalSection(&newHandle->cache_mutex);
644 /* read and verify header */
645 sremain = _CHM_ITSF_V3_LEN;
647 if (_chm_fetch_bytes(newHandle, sbuffer, (UInt64)0, sremain) != sremain ||
648 !_unmarshal_itsf_header(&sbufpos, &sremain, &itsfHeader))
650 chm_close(newHandle);
654 /* stash important values from header */
655 newHandle->dir_offset = itsfHeader.dir_offset;
656 newHandle->dir_len = itsfHeader.dir_len;
657 newHandle->data_offset = itsfHeader.data_offset;
659 /* now, read and verify the directory header chunk */
660 sremain = _CHM_ITSP_V1_LEN;
662 if (_chm_fetch_bytes(newHandle, sbuffer,
663 (UInt64)itsfHeader.dir_offset, sremain) != sremain ||
664 !_unmarshal_itsp_header(&sbufpos, &sremain, &itspHeader))
666 chm_close(newHandle);
670 /* grab essential information from ITSP header */
671 newHandle->dir_offset += itspHeader.header_len;
672 newHandle->dir_len -= itspHeader.header_len;
673 newHandle->index_root = itspHeader.index_root;
674 newHandle->index_head = itspHeader.index_head;
675 newHandle->block_len = itspHeader.block_len;
677 /* if the index root is -1, this means we don't have any PMGI blocks.
678 * as a result, we must use the sole PMGL block as the index root
680 if (newHandle->index_root == -1)
681 newHandle->index_root = newHandle->index_head;
683 /* By default, compression is enabled. */
684 newHandle->compression_enabled = 1;
686 /* Jed, Sun Jun 27: 'span' doesn't seem to be used anywhere?! */
689 if (CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle,
692 uiSpan.space == CHM_COMPRESSED)
694 chm_close(newHandle);
698 /* N.B.: we've already checked that uiSpan is in the uncompressed section,
699 * so this should not require attempting to decompress, which may
700 * rely on having a valid "span"
704 if (chm_retrieve_object(newHandle, &uiSpan, sbuffer,
705 0, sremain) != sremain ||
706 !_unmarshal_uint64(&sbufpos, &sremain, &newHandle->span))
708 chm_close(newHandle);
713 /* prefetch most commonly needed unit infos */
714 if (CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle,
716 &newHandle->rt_unit) ||
717 newHandle->rt_unit.space == CHM_COMPRESSED ||
718 CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle,
720 &newHandle->cn_unit) ||
721 newHandle->cn_unit.space == CHM_COMPRESSED ||
722 CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle,
723 _CHMU_LZXC_CONTROLDATA,
725 uiLzxc.space == CHM_COMPRESSED)
727 newHandle->compression_enabled = 0;
730 /* read reset table info */
731 if (newHandle->compression_enabled)
733 sremain = _CHM_LZXC_RESETTABLE_V1_LEN;
735 if (chm_retrieve_object(newHandle, &newHandle->rt_unit, sbuffer,
736 0, sremain) != sremain ||
737 !_unmarshal_lzxc_reset_table(&sbufpos, &sremain,
738 &newHandle->reset_table))
740 newHandle->compression_enabled = 0;
744 /* read control data */
745 if (newHandle->compression_enabled)
747 sremain = (unsigned long)uiLzxc.length;
749 if (chm_retrieve_object(newHandle, &uiLzxc, sbuffer,
750 0, sremain) != sremain ||
751 !_unmarshal_lzxc_control_data(&sbufpos, &sremain,
754 newHandle->compression_enabled = 0;
757 newHandle->window_size = ctlData.windowSize;
758 newHandle->reset_interval = ctlData.resetInterval;
760 /* Jed, Mon Jun 28: Experimentally, it appears that the reset block count */
761 /* must be multiplied by this formerly unknown ctrl data field in */
762 /* order to decompress some files. */
764 newHandle->reset_blkcount = newHandle->reset_interval /
765 (newHandle->window_size / 2);
767 newHandle->reset_blkcount = newHandle->reset_interval /
768 (newHandle->window_size / 2) *
769 ctlData.windowsPerReset;
773 /* initialize cache */
774 chm_set_param(newHandle, CHM_PARAM_MAX_BLOCKS_CACHED,
775 CHM_MAX_BLOCKS_CACHED);
780 /* close an ITS archive */
781 void chm_close(struct chmFile *h)
785 if (h->fd != CHM_NULL_FD)
786 CHM_CLOSE_FILE(h->fd);
789 DeleteCriticalSection(&h->mutex);
790 DeleteCriticalSection(&h->lzx_mutex);
791 DeleteCriticalSection(&h->cache_mutex);
794 LZXteardown(h->lzx_state);
800 for (i=0; i<h->cache_num_blocks; i++)
802 if (h->cache_blocks[i])
803 free(h->cache_blocks[i]);
805 free(h->cache_blocks);
806 h->cache_blocks = NULL;
809 if (h->cache_block_indices)
810 free(h->cache_block_indices);
811 h->cache_block_indices = NULL;
818 * set a parameter on the file handle.
819 * valid parameter types:
820 * CHM_PARAM_MAX_BLOCKS_CACHED:
821 * how many decompressed blocks should be cached? A simple
822 * caching scheme is used, wherein the index of the block is
823 * used as a hash value, and hash collision results in the
824 * invalidation of the previously cached block.
826 void chm_set_param(struct chmFile *h,
832 case CHM_PARAM_MAX_BLOCKS_CACHED:
833 CHM_ACQUIRE_LOCK(h->cache_mutex);
834 if (paramVal != h->cache_num_blocks)
840 /* allocate new cached blocks */
841 newBlocks = (UChar **)malloc(paramVal * sizeof (UChar *));
842 newIndices = (UInt64 *)malloc(paramVal * sizeof (UInt64));
843 for (i=0; i<paramVal; i++)
849 /* re-distribute old cached blocks */
852 for (i=0; i<h->cache_num_blocks; i++)
854 int newSlot = (int)(h->cache_block_indices[i] % paramVal);
856 if (h->cache_blocks[i])
858 /* in case of collision, destroy newcomer */
859 if (newBlocks[newSlot])
861 free(h->cache_blocks[i]);
862 h->cache_blocks[i] = NULL;
866 newBlocks[newSlot] = h->cache_blocks[i];
867 newIndices[newSlot] =
868 h->cache_block_indices[i];
873 free(h->cache_blocks);
874 free(h->cache_block_indices);
877 /* now, set new values */
878 h->cache_blocks = newBlocks;
879 h->cache_block_indices = newIndices;
880 h->cache_num_blocks = paramVal;
882 CHM_RELEASE_LOCK(h->cache_mutex);
891 * helper methods for chm_resolve_object
894 /* skip a compressed dword */
895 static void _chm_skip_cword(UChar **pEntry)
897 while (*(*pEntry)++ >= 0x80)
901 /* skip the data from a PMGL entry */
902 static void _chm_skip_PMGL_entry_data(UChar **pEntry)
904 _chm_skip_cword(pEntry);
905 _chm_skip_cword(pEntry);
906 _chm_skip_cword(pEntry);
909 /* parse a compressed dword */
910 static UInt64 _chm_parse_cword(UChar **pEntry)
914 while ((temp=*(*pEntry)++) >= 0x80)
917 accum += temp & 0x7f;
920 return (accum << 7) + temp;
923 /* parse a utf-8 string into an ASCII char buffer */
924 static int _chm_parse_UTF8(UChar **pEntry, UInt64 count, WCHAR *path)
926 /* MJM - Modified to return real Unicode strings */
929 *path++ = (*(*pEntry)++);
937 /* parse a PMGL entry into a chmUnitInfo struct; return 1 on success. */
938 static int _chm_parse_PMGL_entry(UChar **pEntry, struct chmUnitInfo *ui)
943 strLen = _chm_parse_cword(pEntry);
944 if (strLen > CHM_MAX_PATHLEN)
948 if (! _chm_parse_UTF8(pEntry, strLen, ui->path))
952 ui->space = (int)_chm_parse_cword(pEntry);
953 ui->start = _chm_parse_cword(pEntry);
954 ui->length = _chm_parse_cword(pEntry);
958 /* find an exact entry in PMGL; return NULL if we fail */
959 static UChar *_chm_find_in_PMGL(UChar *page_buf,
961 const WCHAR *objPath)
963 /* XXX: modify this to do a binary search using the nice index structure
964 * that is provided for us.
966 struct chmPmglHeader header;
972 WCHAR buffer[CHM_MAX_PATHLEN+1];
974 /* figure out where to start and end */
976 hremain = _CHM_PMGL_LEN;
977 if (! _unmarshal_pmgl_header(&cur, &hremain, &header))
979 end = page_buf + block_len - (header.free_space);
981 /* now, scan progressively */
986 strLen = _chm_parse_cword(&cur);
987 if (! _chm_parse_UTF8(&cur, strLen, buffer))
990 /* check if it is the right name */
991 if (! strcmpiW(buffer, objPath))
994 _chm_skip_PMGL_entry_data(&cur);
1000 /* find which block should be searched next for the entry; -1 if no block */
1001 static Int32 _chm_find_in_PMGI(UChar *page_buf,
1003 const WCHAR *objPath)
1005 /* XXX: modify this to do a binary search using the nice index structure
1006 * that is provided for us
1008 struct chmPmgiHeader header;
1014 WCHAR buffer[CHM_MAX_PATHLEN+1];
1016 /* figure out where to start and end */
1018 hremain = _CHM_PMGI_LEN;
1019 if (! _unmarshal_pmgi_header(&cur, &hremain, &header))
1021 end = page_buf + block_len - (header.free_space);
1023 /* now, scan progressively */
1027 strLen = _chm_parse_cword(&cur);
1028 if (! _chm_parse_UTF8(&cur, strLen, buffer))
1031 /* check if it is the right name */
1032 if (strcmpiW(buffer, objPath) > 0)
1035 /* load next value for path */
1036 page = (int)_chm_parse_cword(&cur);
1042 /* resolve a particular object from the archive */
1043 int chm_resolve_object(struct chmFile *h,
1044 const WCHAR *objPath,
1045 struct chmUnitInfo *ui)
1048 * XXX: implement caching scheme for dir pages
1053 /* buffer to hold whatever page we're looking at */
1054 UChar *page_buf = HeapAlloc(GetProcessHeap(), 0, h->block_len);
1057 curPage = h->index_root;
1059 /* until we have either returned or given up */
1060 while (curPage != -1)
1063 /* try to fetch the index page */
1064 if (_chm_fetch_bytes(h, page_buf,
1065 (UInt64)h->dir_offset + (UInt64)curPage*h->block_len,
1066 h->block_len) != h->block_len)
1068 HeapFree(GetProcessHeap(), 0, page_buf);
1069 return CHM_RESOLVE_FAILURE;
1072 /* now, if it is a leaf node: */
1073 if (memcmp(page_buf, _chm_pmgl_marker, 4) == 0)
1076 UChar *pEntry = _chm_find_in_PMGL(page_buf,
1081 HeapFree(GetProcessHeap(), 0, page_buf);
1082 return CHM_RESOLVE_FAILURE;
1085 /* parse entry and return */
1086 _chm_parse_PMGL_entry(&pEntry, ui);
1087 HeapFree(GetProcessHeap(), 0, page_buf);
1088 return CHM_RESOLVE_SUCCESS;
1091 /* else, if it is a branch node: */
1092 else if (memcmp(page_buf, _chm_pmgi_marker, 4) == 0)
1093 curPage = _chm_find_in_PMGI(page_buf, h->block_len, objPath);
1095 /* else, we are confused. give up. */
1098 HeapFree(GetProcessHeap(), 0, page_buf);
1099 return CHM_RESOLVE_FAILURE;
1103 /* didn't find anything. fail. */
1104 HeapFree(GetProcessHeap(), 0, page_buf);
1105 return CHM_RESOLVE_FAILURE;
1109 * utility methods for dealing with compressed data
1112 /* get the bounds of a compressed block. return 0 on failure */
1113 static int _chm_get_cmpblock_bounds(struct chmFile *h,
1118 UChar buffer[8], *dummy;
1121 /* for all but the last block, use the reset table */
1122 if (block < h->reset_table.block_count-1)
1124 /* unpack the start address */
1127 if (_chm_fetch_bytes(h, buffer,
1128 (UInt64)h->data_offset
1129 + (UInt64)h->rt_unit.start
1130 + (UInt64)h->reset_table.table_offset
1132 remain) != remain ||
1133 !_unmarshal_uint64(&dummy, &remain, start))
1136 /* unpack the end address */
1139 if (_chm_fetch_bytes(h, buffer,
1140 (UInt64)h->data_offset
1141 + (UInt64)h->rt_unit.start
1142 + (UInt64)h->reset_table.table_offset
1143 + (UInt64)block*8 + 8,
1144 remain) != remain ||
1145 !_unmarshal_int64(&dummy, &remain, len))
1149 /* for the last block, use the span in addition to the reset table */
1152 /* unpack the start address */
1155 if (_chm_fetch_bytes(h, buffer,
1156 (UInt64)h->data_offset
1157 + (UInt64)h->rt_unit.start
1158 + (UInt64)h->reset_table.table_offset
1160 remain) != remain ||
1161 !_unmarshal_uint64(&dummy, &remain, start))
1164 *len = h->reset_table.compressed_len;
1167 /* compute the length and absolute start address */
1169 *start += h->data_offset + h->cn_unit.start;
1174 /* decompress the block. must have lzx_mutex. */
1175 static Int64 _chm_decompress_block(struct chmFile *h,
1179 UChar *cbuffer = HeapAlloc( GetProcessHeap(), 0,
1180 ((unsigned int)h->reset_table.block_len + 6144));
1181 UInt64 cmpStart; /* compressed start */
1182 Int64 cmpLen; /* compressed len */
1183 int indexSlot; /* cache index slot */
1184 UChar *lbuffer; /* local buffer ptr */
1185 UInt32 blockAlign = (UInt32)(block % h->reset_blkcount); /* reset intvl. aln. */
1186 UInt32 i; /* local loop index */
1188 /* let the caching system pull its weight! */
1189 if (block - blockAlign <= h->lzx_last_block &&
1190 block >= h->lzx_last_block)
1191 blockAlign = (block - h->lzx_last_block);
1193 /* check if we need previous blocks */
1194 if (blockAlign != 0)
1196 /* fetch all required previous blocks since last reset */
1197 for (i = blockAlign; i > 0; i--)
1199 UInt32 curBlockIdx = block - i;
1201 /* check if we most recently decompressed the previous block */
1202 if (h->lzx_last_block != curBlockIdx)
1204 if ((curBlockIdx % h->reset_blkcount) == 0)
1207 fprintf(stderr, "***RESET (1)***\n");
1209 LZXreset(h->lzx_state);
1212 indexSlot = (int)((curBlockIdx) % h->cache_num_blocks);
1213 h->cache_block_indices[indexSlot] = curBlockIdx;
1214 if (! h->cache_blocks[indexSlot])
1215 h->cache_blocks[indexSlot] = (UChar *)malloc(
1216 (unsigned int)(h->reset_table.block_len));
1217 lbuffer = h->cache_blocks[indexSlot];
1219 /* decompress the previous block */
1221 fprintf(stderr, "Decompressing block #%4d (EXTRA)\n", curBlockIdx);
1223 if (!_chm_get_cmpblock_bounds(h, curBlockIdx, &cmpStart, &cmpLen) ||
1224 _chm_fetch_bytes(h, cbuffer, cmpStart, cmpLen) != cmpLen ||
1225 LZXdecompress(h->lzx_state, cbuffer, lbuffer, (int)cmpLen,
1226 (int)h->reset_table.block_len) != DECR_OK)
1229 fprintf(stderr, " (DECOMPRESS FAILED!)\n");
1231 HeapFree(GetProcessHeap(), 0, cbuffer);
1235 h->lzx_last_block = (int)curBlockIdx;
1241 if ((block % h->reset_blkcount) == 0)
1244 fprintf(stderr, "***RESET (2)***\n");
1246 LZXreset(h->lzx_state);
1250 /* allocate slot in cache */
1251 indexSlot = (int)(block % h->cache_num_blocks);
1252 h->cache_block_indices[indexSlot] = block;
1253 if (! h->cache_blocks[indexSlot])
1254 h->cache_blocks[indexSlot] = (UChar *)malloc(
1255 ((unsigned int)h->reset_table.block_len));
1256 lbuffer = h->cache_blocks[indexSlot];
1259 /* decompress the block we actually want */
1261 fprintf(stderr, "Decompressing block #%4d (REAL )\n", block);
1263 if (! _chm_get_cmpblock_bounds(h, block, &cmpStart, &cmpLen) ||
1264 _chm_fetch_bytes(h, cbuffer, cmpStart, cmpLen) != cmpLen ||
1265 LZXdecompress(h->lzx_state, cbuffer, lbuffer, (int)cmpLen,
1266 (int)h->reset_table.block_len) != DECR_OK)
1269 fprintf(stderr, " (DECOMPRESS FAILED!)\n");
1271 HeapFree(GetProcessHeap(), 0, cbuffer);
1274 h->lzx_last_block = (int)block;
1276 /* XXX: modify LZX routines to return the length of the data they
1277 * decompressed and return that instead, for an extra sanity check.
1279 HeapFree(GetProcessHeap(), 0, cbuffer);
1280 return h->reset_table.block_len;
1283 /* grab a region from a compressed block */
1284 static Int64 _chm_decompress_region(struct chmFile *h,
1289 UInt64 nBlock, nOffset;
1297 /* figure out what we need to read */
1298 nBlock = start / h->reset_table.block_len;
1299 nOffset = start % h->reset_table.block_len;
1301 if (nLen > (h->reset_table.block_len - nOffset))
1302 nLen = h->reset_table.block_len - nOffset;
1304 /* if block is cached, return data from it. */
1305 CHM_ACQUIRE_LOCK(h->lzx_mutex);
1306 CHM_ACQUIRE_LOCK(h->cache_mutex);
1307 if (h->cache_block_indices[nBlock % h->cache_num_blocks] == nBlock &&
1308 h->cache_blocks[nBlock % h->cache_num_blocks] != NULL)
1311 h->cache_blocks[nBlock % h->cache_num_blocks] + nOffset,
1312 (unsigned int)nLen);
1313 CHM_RELEASE_LOCK(h->cache_mutex);
1314 CHM_RELEASE_LOCK(h->lzx_mutex);
1317 CHM_RELEASE_LOCK(h->cache_mutex);
1319 /* data request not satisfied, so... start up the decompressor machine */
1322 int window_size = ffs(h->window_size) - 1;
1323 h->lzx_last_block = -1;
1324 h->lzx_state = LZXinit(window_size);
1327 /* decompress some data */
1328 gotLen = _chm_decompress_block(h, nBlock, &ubuffer);
1331 memcpy(buf, ubuffer+nOffset, (unsigned int)nLen);
1332 CHM_RELEASE_LOCK(h->lzx_mutex);
1336 /* retrieve (part of) an object */
1337 LONGINT64 chm_retrieve_object(struct chmFile *h,
1338 struct chmUnitInfo *ui,
1343 /* must be valid file handle */
1347 /* starting address must be in correct range */
1348 if (addr < 0 || addr >= ui->length)
1352 if (addr + len > ui->length)
1353 len = ui->length - addr;
1355 /* if the file is uncompressed, it's simple */
1356 if (ui->space == CHM_UNCOMPRESSED)
1359 return _chm_fetch_bytes(h,
1361 (UInt64)h->data_offset + (UInt64)ui->start + (UInt64)addr,
1365 /* else if the file is compressed, it's a little trickier */
1366 else /* ui->space == CHM_COMPRESSED */
1368 Int64 swath=0, total=0;
1370 /* if compression is not enabled for this file... */
1371 if (! h->compression_enabled)
1376 /* swill another mouthful */
1377 swath = _chm_decompress_region(h, buf, ui->start + addr, len);
1379 /* if we didn't get any... */
1395 /* enumerate the objects in the .chm archive */
1396 int chm_enumerate(struct chmFile *h,
1403 /* buffer to hold whatever page we're looking at */
1404 UChar *page_buf = HeapAlloc(GetProcessHeap(), 0, (unsigned int)h->block_len);
1405 struct chmPmglHeader header;
1408 unsigned long lenRemain;
1411 /* the current ui */
1412 struct chmUnitInfo ui;
1416 curPage = h->index_head;
1418 /* until we have either returned or given up */
1419 while (curPage != -1)
1422 /* try to fetch the index page */
1423 if (_chm_fetch_bytes(h,
1425 (UInt64)h->dir_offset + (UInt64)curPage*h->block_len,
1426 h->block_len) != h->block_len)
1428 HeapFree(GetProcessHeap(), 0, page_buf);
1432 /* figure out start and end for this page */
1434 lenRemain = _CHM_PMGL_LEN;
1435 if (! _unmarshal_pmgl_header(&cur, &lenRemain, &header))
1437 HeapFree(GetProcessHeap(), 0, page_buf);
1440 end = page_buf + h->block_len - (header.free_space);
1442 /* loop over this page */
1445 if (! _chm_parse_PMGL_entry(&cur, &ui))
1447 HeapFree(GetProcessHeap(), 0, page_buf);
1451 /* get the length of the path */
1452 ui_path_len = strlenW(ui.path)-1;
1454 /* check for DIRS */
1455 if (ui.path[ui_path_len] == '/' && !(what & CHM_ENUMERATE_DIRS))
1458 /* check for FILES */
1459 if (ui.path[ui_path_len] != '/' && !(what & CHM_ENUMERATE_FILES))
1462 /* check for NORMAL vs. META */
1463 if (ui.path[0] == '/')
1466 /* check for NORMAL vs. SPECIAL */
1467 if (ui.path[1] == '#' || ui.path[1] == '$')
1468 flag = CHM_ENUMERATE_SPECIAL;
1470 flag = CHM_ENUMERATE_NORMAL;
1473 flag = CHM_ENUMERATE_META;
1474 if (! (what & flag))
1477 /* call the enumerator */
1479 int status = (*e)(h, &ui, context);
1482 case CHM_ENUMERATOR_FAILURE:
1483 HeapFree(GetProcessHeap(), 0, page_buf);
1485 case CHM_ENUMERATOR_CONTINUE:
1487 case CHM_ENUMERATOR_SUCCESS:
1488 HeapFree(GetProcessHeap(), 0, page_buf);
1496 /* advance to next page */
1497 curPage = header.block_next;
1500 HeapFree(GetProcessHeap(), 0, page_buf);
1504 int chm_enumerate_dir(struct chmFile *h,
1505 const WCHAR *prefix,
1511 * XXX: do this efficiently (i.e. using the tree index)
1516 /* buffer to hold whatever page we're looking at */
1517 UChar *page_buf = HeapAlloc(GetProcessHeap(), 0, (unsigned int)h->block_len);
1518 struct chmPmglHeader header;
1521 unsigned long lenRemain;
1523 /* set to 1 once we've started */
1526 /* the current ui */
1527 struct chmUnitInfo ui;
1531 /* the length of the prefix */
1532 WCHAR prefixRectified[CHM_MAX_PATHLEN+1];
1534 WCHAR lastPath[CHM_MAX_PATHLEN];
1538 curPage = h->index_head;
1540 /* initialize pathname state */
1541 lstrcpynW(prefixRectified, prefix, CHM_MAX_PATHLEN);
1542 prefixLen = strlenW(prefixRectified);
1545 if (prefixRectified[prefixLen-1] != '/')
1547 prefixRectified[prefixLen] = '/';
1548 prefixRectified[prefixLen+1] = '\0';
1555 /* until we have either returned or given up */
1556 while (curPage != -1)
1559 /* try to fetch the index page */
1560 if (_chm_fetch_bytes(h,
1562 (UInt64)h->dir_offset + (UInt64)curPage*h->block_len,
1563 h->block_len) != h->block_len)
1565 HeapFree(GetProcessHeap(), 0, page_buf);
1569 /* figure out start and end for this page */
1571 lenRemain = _CHM_PMGL_LEN;
1572 if (! _unmarshal_pmgl_header(&cur, &lenRemain, &header))
1574 HeapFree(GetProcessHeap(), 0, page_buf);
1577 end = page_buf + h->block_len - (header.free_space);
1579 /* loop over this page */
1582 if (! _chm_parse_PMGL_entry(&cur, &ui))
1584 HeapFree(GetProcessHeap(), 0, page_buf);
1588 /* check if we should start */
1591 if (ui.length == 0 && strncmpiW(ui.path, prefixRectified, prefixLen) == 0)
1596 if (ui.path[prefixLen] == '\0')
1600 /* check if we should stop */
1603 if (strncmpiW(ui.path, prefixRectified, prefixLen) != 0)
1605 HeapFree(GetProcessHeap(), 0, page_buf);
1610 /* check if we should include this path */
1611 if (lastPathLen != -1)
1613 if (strncmpiW(ui.path, lastPath, lastPathLen) == 0)
1616 strcpyW(lastPath, ui.path);
1617 lastPathLen = strlenW(lastPath);
1619 /* get the length of the path */
1620 ui_path_len = strlenW(ui.path)-1;
1622 /* check for DIRS */
1623 if (ui.path[ui_path_len] == '/' && !(what & CHM_ENUMERATE_DIRS))
1626 /* check for FILES */
1627 if (ui.path[ui_path_len] != '/' && !(what & CHM_ENUMERATE_FILES))
1630 /* check for NORMAL vs. META */
1631 if (ui.path[0] == '/')
1634 /* check for NORMAL vs. SPECIAL */
1635 if (ui.path[1] == '#' || ui.path[1] == '$')
1636 flag = CHM_ENUMERATE_SPECIAL;
1638 flag = CHM_ENUMERATE_NORMAL;
1641 flag = CHM_ENUMERATE_META;
1642 if (! (what & flag))
1645 /* call the enumerator */
1647 int status = (*e)(h, &ui, context);
1650 case CHM_ENUMERATOR_FAILURE:
1651 HeapFree(GetProcessHeap(), 0, page_buf);
1653 case CHM_ENUMERATOR_CONTINUE:
1655 case CHM_ENUMERATOR_SUCCESS:
1656 HeapFree(GetProcessHeap(), 0, page_buf);
1664 /* advance to next page */
1665 curPage = header.block_next;
1668 HeapFree(GetProcessHeap(), 0, page_buf);