2 * text-writer -- RTF-to-text translation writer code.
4 * Read RTF input, write text of document (text extraction).
6 * Wrapper must call WriterInit() once before processing any files,
7 * then set up input and call BeginFile() for each input file.
9 * This installs callbacks for the text and control token classes.
10 * The control class is necessary so that special characters such as
11 * \par, \tab, \sect, etc. can be converted.
13 * It's problematic what to do with text in headers and footers, and
14 * what to do about tables.
16 * This really is quite a stupid program, for instance, it could keep
17 * track of the current leader character and dump that out when a tab
20 * 04 Feb 91 Paul DuBois dubois@primate.wisc.edu
22 * This software may be redistributed without restriction and used for
23 * any purpose whatsoever.
28 * - Updated for distribution 1.05.
30 * - Updated to compile under THINK C 6.0.
32 * - Added Mike Sendall's entries for Macintosh char map.
34 * - Uses charset map and output sequence map for character translation.
36 * - Updated for 1.10 distribution.
42 # include "rtf2text.h"
43 # include "charlist.h"
44 # include "debugtools.h"
46 DEFAULT_DEBUG_CHANNEL(richedit);
48 static void TextClass ();
49 static void ControlClass ();
50 static void Destination ();
51 static void SpecialChar ();
52 static void PutStdChar ();
53 static void PutLitChar ();
54 static void PutLitStr ();
56 static char *outMap[rtfSC_MaxChar];
58 static CHARLIST charlist = {0, NULL, NULL};
60 int RTFToBuffer(char* pBuffer, int nBufferSize);
61 int RTFToBuffer(char* pBuffer, int nBufferSize)
64 /* check if the buffer is big enough to hold all characters */
65 /* we require one more for the '\0' */
69 if(nBufferSize < charlist.nCount + 1) {
70 return charlist.nCount + CHARLIST_CountChar(&charlist, '\n') + 1;
73 while(charlist.nCount)
75 *pBuffer = CHARLIST_Dequeue(&charlist);
91 * Initialize the writer.
97 RTFReadOutputMap (outMap,1);
104 /* install class callbacks */
106 RTFSetClassCallback (rtfText, TextClass);
107 RTFSetClassCallback (rtfControl, ControlClass);
114 * Write out a character. rtfMajor contains the input character, rtfMinor
115 * contains the corresponding standard character code.
117 * If the input character isn't in the charset map, try to print some
118 * representation of it.
128 if (rtfMinor != rtfSC_nothing)
129 PutStdChar (rtfMinor);
132 if (rtfMajor < 128) /* in ASCII range */
133 sprintf (buf, "[[%c]]", rtfMajor);
135 sprintf (buf, "[[\\'%02x]]", rtfMajor);
158 * This function notices destinations that should be ignored
159 * and skips to their ends. This keeps, for instance, picture
160 * data from being considered as plain text.
173 case rtfFNContNotice:
191 * The reason these use the rtfSC_xxx thingies instead of just writing
192 * out ' ', '-', '"', etc., is so that the mapping for these characters
193 * can be controlled by the text-map file.
211 PutStdChar (rtfSC_space); /* make sure cells are separated */
214 PutStdChar (rtfSC_nobrkspace);
220 PutStdChar (rtfSC_nobrkhyphen);
223 PutStdChar (rtfSC_bullet);
226 PutStdChar (rtfSC_emdash);
229 PutStdChar (rtfSC_endash);
232 PutStdChar (rtfSC_quoteleft);
235 PutStdChar (rtfSC_quoteright);
238 PutStdChar (rtfSC_quotedblleft);
241 PutStdChar (rtfSC_quotedblright);
248 * Eventually this should keep track of the destination of the
249 * current state and only write text when in the initial state.
251 * If the output sequence is unspecified in the output map, write
252 * the character's standard name instead. This makes map deficiencies
253 * obvious and provides incentive to fix it. :-)
256 void PutStdChar (int stdCode)
259 char *oStr = (char *) NULL;
262 /* if (stdCode == rtfSC_nothing)
263 RTFPanic ("Unknown character code, logic error\n");
267 oStr = outMap[stdCode];
268 if (oStr == (char *) NULL) /* no output sequence in map */
270 sprintf (buf, "[[%s]]", RTFStdCharName (stdCode));
277 void PutLitChar (int c)
279 CHARLIST_Enqueue(&charlist, (char) c);
280 /* fputc (c, ostream); */
284 static void PutLitStr (char *s)
288 CHARLIST_Enqueue(&charlist, *s);
290 /* fputs (s, ostream); */