_UNDOCUMENTED CORNER_
edited by Andrew Schulman
written by Pete Davis

[LISTING ONE]

/* WHSTRUCT.H--Windows Help File Internal Records--Pete Davis and Ron Burk, 
   June 1993. See "Undocumented Corner," DDJ, September 1993  */

typedef unsigned long   DWORD;
typedef unsigned int    WORD;
typedef unsigned char   BYTE;

#define HELP_MAGIC      0x00035F3FL

/* Help file Header record */
typedef struct HELPHEADER {
    DWORD   MagicNumber;      /* 0x00035F3F                */
    long    WHIFS;            /* File offset of WHIFS header   */
    long    Negative1;
    long    FileSize;         /* Size of entire .HLP File  */
}   HELPHEADER;
/* File Header for WHIFS files */
typedef struct FILEHEADER {
    long    FilePlusHeader;  /* File size including this header */
    long    FileSize;        /* File size not including header  */
    char    TermNull;
}   FILEHEADER;
/* Help Directory BTREE */
typedef struct WHIFSBTREEHEADER {
    char    Magic[18];      /* Not exactly magic for some .MVB files   */
    char    Garbage[13];
    int     MustBeZero;     /* Probably shows up when Help > ~40 megs  */
    int     NSplits;        /* Number of page split Btree has suffered */
    int     RootPage;       /* Page # of root page                     */
    int     MustBeNegOne;   /* Probably shows up when B-Tree is HUGE!! */
    int     TotalPages;     /* total # to 2Kb pages in Btree           */
    int     NLevels;        /* Number of levels in this Btree          */
    DWORD   TotalWHIFSEntries;
}   WHIFSBTREEHEADER;
/* Modified B-Tree Node header to handle a pointer to the page */
typedef struct BTREENODEHEADER {
    WORD    Signature;      /* Signature word            */
    int     NEntries;       /* Number of entries         */
    int     PreviousPage;   /* Index of Previous Page    */
    int     NextPage;       /* Index of Next Page        */
    char    *BTData;        /* Pointer to B-Tree's data  */    
}   BTREENODEHEADER;
/* Modified B-Tree Index header to handle a pointer to the page */
typedef struct BTREEINDEXHEADER {
    WORD    Signature;      /* Signature word            */
    int     NEntries;       /* Number of entries in node */
    char    *IdxData;
}   BTREEINDEXHEADER;
/* Phrase header for uncompressed |Phrases file */
typedef struct PHRASEHDR    {
    int     NumPhrases;   /* Number of phrases in table                    */
    WORD    OneHundred;   /* 0x0100                                        */
} PHRASEHDR;
/* Phrase header for compressed |Phrases file */
typedef struct ALTPHRASEHDR    {
    int     NumPhrases;   /* Number of phrases in table                    */
    WORD    OneHundred;   /* 0x0100                                        */
    long    PhrasesSize;  /* Amount of space uncompressed phrases requires */
} ALTPHRASEHDR;
/* Flags for |SYSTEM header Flags field below:  Unfortunately, none of these
   flags are particularly solid. The 0x0004 works MOST of the time. Another
   flag, 0x0008, appears both in Win32 .HLP files, and in files with Phrase
   compression but without LZ77 compression. */
#define NO_COMPRESSION_310      0x0000
#define COMPRESSION_310         0x0004
#define SYSFLAG_300             0x000A
/* Header for |SYSTEM file */
typedef struct SYSTEMHEADER {
    BYTE    Magic;     /* 0x6C                  */
    BYTE    Version;   /* Version #             */
    BYTE    Revision;  /* Revision code         */
    BYTE    Always0;   /* Unknown               */
    WORD    Always1;   /* Always 0x0001         */
    DWORD   GenDate;   /* Date/Time that the help file was generated    */
    WORD    Flags;     /* Values seen: 0x0000 0x0004, 0x0008, 0x000A    */
    } SYSTEMHEADER;
/* Types for SYSTEMREC RecordType below:  note that other record types,
   such as 0x0A, 0x0B, 0x0C, 0x0D, shown up in the large .MVB files used
   by the MSDN CD-ROM and Cinemania products. */
#define HPJ_TITLE       0x0001      /* Title from .HPJ file            */
#define HPJ_COPYRIGHT   0x0002      /* Copyright notice from .HPJ file */
#define HPJ_CONTENTS    0x0003      /* Contents=  from .HPJ            */
#define MACRO_DATA      0x0004      /* RData = 4 nulls if no macros    */
#define ICON_DATA       0x0005      /* Data for Icon                   */
#define HPJ_SECWINDOWS  0x0006      /* Secondary window info in .HPJ   */
#define HPJ_CITATION    0x0008      /* Citation= under [OPTIONS]       */
/* Secondary Window Record following type 0x0006 System Record */
typedef struct SECWINDOW {
    WORD    Flags;          /* Flags (See Below)        */
    BYTE    Type[10];       /* Type of window           */
    BYTE    Name[9];        /* Window name              */
    BYTE    Caption[51];    /* Caption for window       */
    WORD    X;              /* X coordinate to start at */
    WORD    Y;              /* Y coordinate to start at */
    WORD    Width;          /* Width to create for      */
    WORD    Height;         /* Height to create for     */
    WORD    Maximize;       /* Maximize flag            */
    BYTE    Rgb[3];         /* RGB for background       */
    BYTE    Unknown1;       /* No known use             */
    BYTE    RgbNsr[3];      /* RGB for non scrollable region */
    BYTE    Unknown2;       /* No known use             */
} SECWINDOW;
/* Values for Secondary Window Flags */
#define WSYSFLAG_TYPE       0x0001  /* Type is valid        */
#define WSYSFLAG_NAME       0x0002  /* Name is valid        */
#define WSYSFLAG_CAPTION    0x0004  /* Ccaption is valid    */
#define WSYSFLAG_X          0x0008  /* X    is valid        */
#define WSYSFLAG_Y          0x0010  /* Y    is valid        */
#define WSYSFLAG_WIDTH      0x0020  /* Width    is valid    */
#define WSYSFLAG_HEIGHT     0x0040  /* Height   is valid    */
#define WSYSFLAG_MAXIMIZE   0x0080  /* Maximize is valid    */
#define WSYSFLAG_RGB        0x0100  /* Rgb  is valid        */
#define WSYSFLAG_RGBNSR     0x0200  /* RgbNsr   is valid    */
#define WSYSFLAG_TOP        0x0400  /* On top was set in HPJ file */
/* Help Compiler 3.1 System record. Multiple records possible */
typedef struct SYSTEMREC {
    WORD    RecordType;   /* Type of Data in record      */
    WORD    DataSize;     /* Size of RData               */
    char   *RData;        /* Raw data (Icon, title, etc) */
    } SYSTEMREC;
/* Header for |TOMAP file */
typedef struct TOMAPHEADER {
    long    IndexTopic;   /* Index topic for help file */
    long    Reserved[15];
    int     ToMapLen;     /* Number of topic pointers  */
    long    *TopicPtr;    /* Pointer to all the topics */
    } TOMAPHEADER;


[LISTING TWO]

/* HELPDIR.C -- List all internal files with a Windows .HLP file. 
   WHIFS = Windows Help Internal File System -- Pete Davis, June 1993
   bcc helpdir.c
   See "Undocumented Corner," DDJ, September 1993 */
#pragma pack(1)
#include <conio.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "whstruct.h"

#define PAGE_SIZE       1024L        /* 1k pages -- must be long! */

void fail(const char *s) { puts(s); exit(1); }

int main(int argc, char *argv[]) {
   HELPHEADER         HelpHdr;
   WHIFSBTREEHEADER   WHIFSHdr;
   BTREENODEHEADER    WHIFSNode;
   int                file, aPage, c;
   long               WHIFSStart, FileOffset;
   FILE               *HelpFile;
   
   if ((HelpFile=fopen(argv[1], "rb")) == NULL)
       fail("can't open file");
   /* Get Help header, go to WHIFS and get WHIFS Header */
   fread(&HelpHdr, sizeof(HelpHdr), 1, HelpFile);
   if (HelpHdr.MagicNumber != HELP_MAGIC)
       fail("not a Windows help file");
   fseek(HelpFile, HelpHdr.WHIFS, SEEK_SET);
   fread(&WHIFSHdr, sizeof(WHIFSHdr), 1, HelpFile);
   /* WHIFS starts after the WHIFSHdr */
   WHIFSStart = HelpHdr.WHIFS + sizeof(WHIFSHdr);
   file=1;
   /* Goto WHIFS Root */
   fseek(HelpFile, WHIFSStart + (PAGE_SIZE * WHIFSHdr.RootPage), SEEK_SET);
   /* Find the first leaf node */
   while (file < WHIFSHdr.NLevels) {
       /* if it's not a leaf, we don't need last 2 fields */
       fread(&WHIFSNode, 4, 1, HelpFile);
       /* Find page pointer to first node in index */
       fread(&aPage, sizeof(int), 1, HelpFile);
       fseek(HelpFile, WHIFSStart + (PAGE_SIZE * aPage), SEEK_SET);
       file++;
   }
#ifdef DO_MACROS
{
    extern void do_macros(FILE *HelpFile, long WHIFSStart);
    do_macros(HelpFile, WHIFSStart);
}
#else
   /* Go through linked list of leaf nodes */
   for (;;) {
       if (! fread(&WHIFSNode, sizeof(WHIFSNode)-2, 1, HelpFile))
           break;
       /* List all entries in node */
       for (file = 1; file <= WHIFSNode.NEntries; file ++) {
          while (c = fgetc(HelpFile))
               putchar(c);
          fread(&FileOffset, sizeof(FileOffset), 1, HelpFile);
          printf("  \t0x%08lX\n", FileOffset);
       }
       if (WHIFSNode.NextPage == -1)
          break;
      else
          fseek(HelpFile,WHIFSStart+(WHIFSNode.NextPage*PAGE_SIZE),SEEK_SET);
   } 
#endif
   return 1;
}


[LISTING THREE]

/* WHMACROS.C -- Get macros from a .HLP file. Used by HELPDIR.C if #define 
   DO_MACROS -- Pete Davis and Andrew Schulman, 
   bcc -DDO_MACROS whmacros.c helpdir.c
   See "Undocumented Corner," DDJ, September 1993 */

#pragma pack(1)
#include <conio.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "whstruct.h"

extern void fail(const char *s);

#define PAGE_SIZE       1024L        /* 1k pages -- must be long! */

void do_macros(FILE *HelpFile, long WHIFSStart)
{
   BTREENODEHEADER  WHIFSNode;
   SYSTEMHEADER     SystemHdr;
   SYSTEMREC        SystemRec;
   FILEHEADER       FileHdr;
   long             SystemOffset=0, FileOffset, FileStart;
   char             filename[20], *data;
   int              *Offsets;
   int              c, i, file, txt;
   /* Find the System file. */
   do {
       fread(&WHIFSNode, sizeof(WHIFSNode) - 2, 1, HelpFile);
       /* Search all entries in node */
       for (file = 1; file <= WHIFSNode.NEntries; file ++) {
          i = 0;
          while ( c = fgetc(HelpFile) )
                filename[i++]=c;
          filename[i] = 0;
          fread(&FileOffset, sizeof(FileOffset), 1, HelpFile);
          if (strcmp(filename, "|SYSTEM") == 0) {
              SystemOffset = FileOffset;
              break;
          }
       }
       if (WHIFSNode.NextPage != -1)
          fseek(HelpFile, WHIFSStart + (WHIFSNode.NextPage * PAGE_SIZE), 
              SEEK_SET);
   } while (WHIFSNode.NextPage != -1);
    if (! SystemOffset)
        fail("Can't locate |SYSTEM file");
   /* Get System header */
   fseek(HelpFile, SystemOffset, SEEK_SET);
   fread(&FileHdr, sizeof(FileHdr), 1, HelpFile);
   fread(&SystemHdr, sizeof(SystemHdr), 1, HelpFile);

   FileStart = SystemOffset + sizeof(FileHdr) + sizeof(SystemHdr);
   FileOffset = 0;
   while (FileOffset < FileHdr.FileSize)    {
       fseek(HelpFile, FileStart + FileOffset, SEEK_SET);
       fread(&SystemRec, sizeof(SystemRec)-1, 1, HelpFile);
       FileOffset += (sizeof(SystemRec) + SystemRec.DataSize - 1);
       if (SystemRec.RecordType == MACRO_DATA)  {
           if (! (data = (char *) malloc(SystemRec.DataSize+1))) 
               fail("insufficient memory");
           fread(data, SystemRec.DataSize, 1, HelpFile);
           data[SystemRec.DataSize] = '\0';
           printf("%s\n\n", data);
           free(data);
       }
   }
}




Figure 1: Annotated hex dump of portions of a .HLP file. (a) All .HLP files 
start with a HELPHEADER. The first long is the .HLP magic number (0x035F3F). 
The next long is the file offset of the WHIFS header (in Figure 1(b), that's 
0x041F); (b) the WHIFS starts off with a WHIFSBTREEHEADER, immediately 
followed by the WHIFS directory, which contains null-terminated file names
followed by the individual WHIFS file's offset within the larger .HLP
file. Here, bag.ini is at offset 0x10, |CONTEXT is at 0x0362B3, and
|CTXOMAP is at 0x032B02; (c) each internal file begins with FILEHEADER 
structure, which specifies the file's size both with and without the header, 
followed by a 0. Here, bag.ini is 0x040F bytes with the header, and 0x0F06 
without. The file data itself (evidentally, some kind of initialization file)
starts immediately after the header.

(a)

D:\MIPS>dump msmail32.hlp -bytes 8 
00000000 | 3F 5F 03 00 1F 04 00 00                         | ?_......

(b)

D:\MIPS>dump msmail32.hlp -offset 0x041f 
0000041f | 2F 04 00 00 26 04 00 00 04 3B 29 02 04 00 04 7A | /...&....;)....z
0000042f | 34 00 00 43 3A 5C 7E 68 63 35 00 09 02 62 6D 00 | 4..C:\~hc5...bm.
0000043f | 00 00 00 00 00 FF FF 01 00 01 00 1E 00 00 00 C1 | ................
0000044f | 02 1E 00 FF FF FF FF 62 61 67 2E 69 6E 69 00 10 | .......bag.ini..
0000045f | 00 00 00 7C 43 4F 4E 54 45 58 54 00 B3 62 03 00 | ...|CONTEXT..b..
0000046f | 7C 43 54 58 4F 4D 41 50 00 02 2B 03 00 7C 46 4F | |CTXOMAP..+..|FO
; ... etc. ...

(c)

D:\MIPS>dump d:\mips\msmail32.hlp -offset 0x10
00000010 | 0F 04 00 00 06 04 00 00 00 0D 0A 5B 62 61 67 2E | ...........[bag.
00000020 | 69 6E 69 5D 0D 0A 67 72 6F 75 70 63 6F 75 6E 74 | ini]..groupcount
00000030 | 3D 31 34 0D 0A 67 72 6F 75 70 31 3D 42 61 63 6B | =14..group1=Back
00000040 | 75 70 0D 0A 67 72 6F 75 70 32 3D 43 6C 69 70 62 | up..group2=Clipb
; ... etc. ...



Figure 2: HELPDIR output for the .HLP file hex dumped in Figure 1.

D:\MIPS>c:\ddj\helpdir msmail32.hlp
bag.ini                 0x00000010
|CONTEXT                0x000362B3
|CTXOMAP                0x00032B02
|FONT                   0x000327D2
|KWBTREE                0x00033255
|KWDATA                 0x00032ED5
|KWMAP                  0x0003323E
|SYSTEM                 0x0000084E
|TOPIC                  0x00000A53
|TTLBTREE               0x00034A84
|bm0                    0x00037AE2
; ... etc. ...


Figure 3: Selected macros in Microsoft Cinemania and the MSDN
CD-ROM, as displayed by WHMACROS.

C:\DDJ>dir d:\content\*.mvb
CINMANIA MVB 139104719 08-18-92  12:00a

C:\DDJ>whmacros d:\content\cinmania.mvb
RegisterRoutine("ftui","InitRoutines","SU")
RegisterRoutine("ftui","ExecFullTextSearch","USSS")
; ...
InitRoutines(qchPath,1)
; ...
CreateButton("ftSearch", "&Search", \
    "ExecFullTextSearch(hwndApp, qchPath, `', `')")
; ...

C:\DDJ>dir d:\*.mvb
MSDNCD   MVB 270353088 04-05-93   5:58p

C:\DDJ>whmacros d:\msdncd.mvb
RegisterRoutine("msdncd", "Navigator", "USS")
Navigator(hwndApp, "Load", qchPath)
; ...
CreateButton("btn_prv","<<I&ndex","Navigator(hwndApp,\"Prev\",\"\")")
CreateButton("btn_nxt","Inde&x>>","Navigator(hwndApp,\"Next\",\"\")")



Table 1: WinHelp internal files

Function		Description

bmx         Bitmap files, numbered (bm0, bm24, bm12, and so on. Do not 
            start with a |)
|CONTEXT    Context topic table
|CTXOMAP    Context mapping to topics
|FONT       Fonts available to help file
|KWBTREE    Keyword B-tree file
|KWDATA     Keyword mappings to topic file
|KWMAP      Map into the KWBTREE for quick access
|Phrases    A list of phrases used for compression of the |TOPIC file
|SYSTEM     Contains mostly information from .HPJ file
|TOMAP      List of pointers to topics
|TOPIC      Contains the actual help text (usually compressed)
|TTLBTREE   Topic titles B-tree
baggage     Appears under the filename exactly as specified in help project

