1457 lines
56 KiB
C#
1457 lines
56 KiB
C#
using System;
|
|
using System.IO;
|
|
using System.Collections;
|
|
using System.Globalization;
|
|
using System.Text;
|
|
using iTextSharp.text;
|
|
using iTextSharp.text.rtf.direct;
|
|
using iTextSharp.text.rtf.document;
|
|
using iTextSharp.text.rtf.parser.ctrlwords;
|
|
using iTextSharp.text.rtf.parser.destinations;
|
|
/*
|
|
* $Id: RtfParser.cs,v 1.4 2008/05/16 19:31:08 psoares33 Exp $
|
|
*
|
|
*
|
|
* Copyright 2007 by Howard Shank (hgshank@yahoo.com)
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
|
* (the "License"); you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the License.
|
|
*
|
|
* The Original Code is 'iText, a free JAVA-PDF library'.
|
|
*
|
|
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
|
|
* the Initial Developer are Copyright (C) 1999-2006 by Bruno Lowagie.
|
|
* All Rights Reserved.
|
|
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
|
|
* are Copyright (C) 2000-2006 by Paulo Soares. All Rights Reserved.
|
|
*
|
|
* Contributor(s): all the names of the contributors are added in the source code
|
|
* where applicable.
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of the
|
|
* LGPL license (the ?GNU LIBRARY GENERAL PUBLIC LICENSE?), in which case the
|
|
* provisions of LGPL are applicable instead of those above. If you wish to
|
|
* allow use of your version of this file only under the terms of the LGPL
|
|
* License and not to allow others to use your version of this file under
|
|
* the MPL, indicate your decision by deleting the provisions above and
|
|
* replace them with the notice and other provisions required by the LGPL.
|
|
* If you do not delete the provisions above, a recipient may use your version
|
|
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
|
|
*
|
|
* This library is free software; you can redistribute it and/or modify it
|
|
* under the terms of the MPL as stated above or under the terms of the GNU
|
|
* Library General Public License as published by the Free Software Foundation;
|
|
* either version 2 of the License, or any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
|
|
* details.
|
|
*
|
|
* If you didn't download this code from the following link, you should check if
|
|
* you aren't using an obsolete version:
|
|
* http://www.lowagie.com/iText/
|
|
*/
|
|
namespace iTextSharp.text.rtf.parser {
|
|
|
|
/**
|
|
* The RtfParser allows the importing of RTF documents or
|
|
* RTF document fragments. The RTF document or fragment is tokenised,
|
|
* font and color definitions corrected and then added to
|
|
* the document being written.
|
|
*
|
|
* @author Mark Hall (Mark.Hall@mail.room3b.eu)
|
|
* @author Howard Shank (hgshank@yahoo.com)
|
|
* @since 2.0.8
|
|
*/
|
|
|
|
public class RtfParser {
|
|
/**
|
|
* Debugging flag.
|
|
*/
|
|
private static bool debugParser = false; // DEBUG Files are unlikely to be read by any reader!
|
|
private String logFile = null;
|
|
private bool logging = false;
|
|
private bool logAppend = false;
|
|
|
|
/**
|
|
* The iText document to add the RTF document to.
|
|
*/
|
|
private Document document = null;
|
|
/**
|
|
* The RtfDocument to add the RTF document or fragment to.
|
|
*/
|
|
private RtfDocument rtfDoc = null;
|
|
/**
|
|
* The RtfKeywords that creates and handles keywords that are implemented.
|
|
*/
|
|
private RtfCtrlWordMgr rtfKeywordMgr = null;
|
|
/**
|
|
* The RtfImportHeader to store imported font and color mappings in.
|
|
*/
|
|
private RtfImportMgr importMgr = null;
|
|
/**
|
|
* The RtfDestinationMgr object to manage destinations.
|
|
*/
|
|
private RtfDestinationMgr destinationMgr = null;
|
|
/**
|
|
* Stack for saving states for groups
|
|
*/
|
|
private Stack stackState = null;
|
|
/**
|
|
* The current parser state.
|
|
*/
|
|
private RtfParserState currentState = null;
|
|
/**
|
|
* The pushback reader to read the input stream.
|
|
*/
|
|
private PushbackStream pbReader = null;
|
|
/**
|
|
* Conversion type. Identifies if we are doing in import or a convert.
|
|
*/
|
|
private int conversionType = TYPE_IMPORT_FULL;
|
|
|
|
|
|
/*
|
|
* Bitmapping:
|
|
*
|
|
* 0111 1111 1111 1111 = Unkown state
|
|
* 0xxx xxxx xxxx xxxx = In Header
|
|
* 1xxx xxxx xxxx xxxx = In Document
|
|
* 2xxx xxxx xxxx xxxx = Reserved
|
|
* 4xxx xxxx xxxx xxxx = Other
|
|
* 8xxx xxxx xxxx xxxx = Errors
|
|
*/
|
|
|
|
/*
|
|
* Header state values
|
|
*/
|
|
|
|
/**
|
|
* Currently the RTF document header is being parsed.
|
|
*/
|
|
public const int PARSER_IN_HEADER = (0x0 << 28) | 0x000000;
|
|
/**
|
|
* Currently the RTF charset is being parsed.
|
|
*/
|
|
public const int PARSER_IN_CHARSET = PARSER_IN_HEADER | 0x000001;
|
|
/**
|
|
* Currently the RTF deffont is being parsed.
|
|
*/
|
|
public const int PARSER_IN_DEFFONT = PARSER_IN_HEADER | 0x000002;
|
|
/**
|
|
* Currently the RTF font table is being parsed.
|
|
*/
|
|
public const int PARSER_IN_FONT_TABLE = PARSER_IN_HEADER | 0x000003;
|
|
/**
|
|
* Currently a RTF font table info element is being parsed.
|
|
*/
|
|
public const int PARSER_IN_FONT_TABLE_INFO = PARSER_IN_HEADER | 0x000004;
|
|
/**
|
|
* Currently the RTF filetbl is being parsed.
|
|
*/
|
|
public const int PARSER_IN_FILE_TABLE = PARSER_IN_HEADER | 0x000005;
|
|
/**
|
|
* Currently the RTF color table is being parsed.
|
|
*/
|
|
public const int PARSER_IN_COLOR_TABLE = PARSER_IN_HEADER | 0x000006;
|
|
/**
|
|
* Currently the RTF stylesheet is being parsed.
|
|
*/
|
|
public const int PARSER_IN_STYLESHEET = PARSER_IN_HEADER | 0x000007;
|
|
/**
|
|
* Currently the RTF listtables is being parsed.
|
|
*/
|
|
public const int PARSER_IN_LIST_TABLE = PARSER_IN_HEADER | 0x000008;
|
|
/**
|
|
* Currently the RTF listtable override is being parsed.
|
|
*/
|
|
public const int PARSER_IN_LISTOVERRIDE_TABLE = PARSER_IN_HEADER | 0x000009;
|
|
/**
|
|
* Currently the RTF revtbl is being parsed.
|
|
*/
|
|
public const int PARSER_IN_REV_TABLE = PARSER_IN_HEADER | 0x00000A;
|
|
/**
|
|
* Currently the RTF rsidtable is being parsed.
|
|
*/
|
|
public const int PARSER_IN_RSID_TABLE = PARSER_IN_HEADER | 0x0000B;
|
|
/**
|
|
* Currently the RTF generator is being parsed.
|
|
*/
|
|
public const int PARSER_IN_GENERATOR = PARSER_IN_HEADER | 0x00000C;
|
|
/**
|
|
* Currently the RTF Paragraph group properties Table (word 2002)
|
|
*/
|
|
public const int PARSER_IN_PARAGRAPH_TABLE = PARSER_IN_HEADER | 0x00000E;
|
|
/**
|
|
* Currently the RTF Old Properties.
|
|
*/
|
|
public const int PARSER_IN_OLDCPROPS = PARSER_IN_HEADER | 0x00000F;
|
|
/**
|
|
* Currently the RTF Old Properties.
|
|
*/
|
|
public const int PARSER_IN_OLDPPROPS = PARSER_IN_HEADER | 0x000010;
|
|
/**
|
|
* Currently the RTF Old Properties.
|
|
*/
|
|
public const int PARSER_IN_OLDTPROPS = PARSER_IN_HEADER | 0x000012;
|
|
/**
|
|
* Currently the RTF Old Properties.
|
|
*/
|
|
public const int PARSER_IN_OLDSPROPS = PARSER_IN_HEADER | 0x000013;
|
|
/**
|
|
* Currently the RTF User Protection Information.
|
|
*/
|
|
public const int PARSER_IN_PROT_USER_TABLE = PARSER_IN_HEADER | 0x000014;
|
|
/**
|
|
* Currently the Latent Style and Formatting usage restrictions
|
|
*/
|
|
public const int PARSER_IN_LATENTSTYLES = PARSER_IN_HEADER | 0x000015;
|
|
|
|
public const int PARSER_IN_PARAGRAPH_GROUP_PROPERTIES =PARSER_IN_HEADER | 0x000016;
|
|
|
|
/*
|
|
* Document state values
|
|
*/
|
|
|
|
/**
|
|
* Currently the RTF document content is being parsed.
|
|
*/
|
|
public const int PARSER_IN_DOCUMENT = (0x2 << 28 ) | 0x000000;
|
|
|
|
/**
|
|
* Currently the RTF info group is being parsed.
|
|
*/
|
|
public const int PARSER_IN_INFO_GROUP = PARSER_IN_DOCUMENT | 0x000001;
|
|
|
|
|
|
public const int PARSER_IN_UPR = PARSER_IN_DOCUMENT | 0x000002;
|
|
/**
|
|
* Currently a shppict control word is being parsed.
|
|
*/
|
|
public const int PARSER_IN_SHPPICT = PARSER_IN_DOCUMENT | 0x000010; //16
|
|
/**
|
|
* Currently a pict control word is being parsed.
|
|
*/
|
|
public const int PARSER_IN_PICT = PARSER_IN_DOCUMENT | 0x000011; //17
|
|
/**
|
|
* Currently a picprop control word is being parsed.
|
|
*/
|
|
public const int PARSER_IN_PICPROP = PARSER_IN_DOCUMENT | 0x000012; //18
|
|
/**
|
|
* Currently a blipuid control word is being parsed.
|
|
*/
|
|
public const int PARSER_IN_BLIPUID = PARSER_IN_DOCUMENT | 0x000013; //18
|
|
|
|
/* other states */
|
|
/**
|
|
* The parser is at the beginning or the end of the file.
|
|
*/
|
|
public const int PARSER_STARTSTOP = (0x4 << 28)| 0x0001;
|
|
/* ERRORS */
|
|
/**
|
|
* Currently the parser is in an error state.
|
|
*/
|
|
public const int PARSER_ERROR = (0x8 << 28) | 0x0000;
|
|
/**
|
|
* The parser reached the end of the file.
|
|
*/
|
|
public const int PARSER_ERROR_EOF = PARSER_ERROR | 0x0001;
|
|
/**
|
|
* Currently the parser is in an unknown state.
|
|
*/
|
|
public const int PARSER_IN_UNKNOWN = PARSER_ERROR | 0x0FFFFFFF;
|
|
|
|
|
|
/**
|
|
* Conversion type is unknown
|
|
*/
|
|
public const int TYPE_UNIDENTIFIED = -1;
|
|
/**
|
|
* Conversion type is an import. Uses direct content to add everything.
|
|
* This is what the original import does.
|
|
*/
|
|
public const int TYPE_IMPORT_FULL = 0;
|
|
/**
|
|
* Conversion type is an import of a partial file/fragment. Uses direct content to add everything.
|
|
*/
|
|
public const int TYPE_IMPORT_FRAGMENT = 1;
|
|
/**
|
|
* Conversion type is a conversion. This uses the document (not rtfDoc) to add
|
|
* all the elements making it a different supported documents depending on the writer used.
|
|
*/
|
|
public const int TYPE_CONVERT = 2;
|
|
|
|
|
|
/**
|
|
* Destination is normal. Text is processed.
|
|
*/
|
|
public const int DESTINATION_NORMAL = 0;
|
|
/**
|
|
* Destination is skipping. Text is ignored.
|
|
*/
|
|
public const int DESTINATION_SKIP = 1;
|
|
|
|
//////////////////////////////////// TOKENISE VARIABLES ///////////////////
|
|
/*
|
|
* State flags use 4/28 bitmask.
|
|
* First 4 bits (nibble) indicates major state. Used for unknown and error
|
|
* Last 28 bits indicates the value;
|
|
*/
|
|
|
|
/**
|
|
* The RtfTokeniser is in its ground state. Any token may follow.
|
|
*/
|
|
public const int TOKENISER_NORMAL = 0x00000000;
|
|
/**
|
|
* The last token parsed was a slash.
|
|
*/
|
|
public const int TOKENISER_SKIP_BYTES = 0x00000001;
|
|
/**
|
|
* The RtfTokeniser is currently tokenising a control word.
|
|
*/
|
|
public const int TOKENISER_SKIP_GROUP = 0x00000002;
|
|
/**
|
|
* The RtfTokeniser is currently reading binary stream.
|
|
*/
|
|
public const int TOKENISER_BINARY= 0x00000003;
|
|
/**
|
|
* The RtfTokeniser is currently reading hex data.
|
|
*/
|
|
public const int TOKENISER_HEX= 0x00000004;
|
|
/**
|
|
* The RtfTokeniser ignore result
|
|
*/
|
|
public const int TOKENISER_IGNORE_RESULT= 0x00000005;
|
|
/**
|
|
* The RtfTokeniser is currently in error state
|
|
*/
|
|
public const int TOKENISER_STATE_IN_ERROR = unchecked((int)0x80000000); // 1000 0000 0000 0000 0000 0000 0000 0000
|
|
/**
|
|
* The RtfTokeniser is currently in an unkown state
|
|
*/
|
|
public const int TOKENISER_STATE_IN_UNKOWN = unchecked((int)0xFF000000); // 1111 0000 0000 0000 0000 0000 0000 0000
|
|
|
|
/**
|
|
* The current group nesting level.
|
|
*/
|
|
private int groupLevel = 0;
|
|
/**
|
|
* The current document group nesting level. Used for fragments.
|
|
*/
|
|
private int docGroupLevel = 0;
|
|
/**
|
|
* When the tokeniser is Binary.
|
|
*/
|
|
private long binByteCount = 0;
|
|
/**
|
|
* When the tokeniser is set to skip bytes, binSkipByteCount is the number of bytes to skip.
|
|
*/
|
|
private long binSkipByteCount = 0;
|
|
/**
|
|
* When the tokeniser is set to skip to next group, this is the group indentifier to return to.
|
|
*/
|
|
private int skipGroupLevel = 0;
|
|
|
|
//RTF parser error codes
|
|
public const int errOK =0; // Everything's fine!
|
|
public const int errStackUnderflow = -1; // Unmatched '}'
|
|
public const int errStackOverflow = -2; // Too many '{' -- memory exhausted
|
|
public const int errUnmatchedBrace = -3; // RTF ended during an open group.
|
|
public const int errInvalidHex = -4; // invalid hex character found in data
|
|
public const int errBadTable = -5; // RTF table (sym or prop) invalid
|
|
public const int errAssertion = -6; // Assertion failure
|
|
public const int errEndOfFile = -7; // End of file reached while reading RTF
|
|
public const int errCtrlWordNotFound = -8; // control word was not found
|
|
//////////////////////////////////// TOKENISE VARIABLES ///////////////////
|
|
|
|
|
|
//////////////////////////////////// STATS VARIABLES ///////////////////
|
|
/**
|
|
* Total bytes read.
|
|
*/
|
|
private long byteCount = 0;
|
|
/**
|
|
* Total control words processed.
|
|
*
|
|
* Contains both known and unknown.
|
|
*
|
|
* <code>ctrlWordCount</code> should equal
|
|
* <code>ctrlWrodHandlecCount</code> + <code>ctrlWordNotHandledCount</code + <code>ctrlWordSkippedCount</code>
|
|
*/
|
|
private long ctrlWordCount = 0;
|
|
/**
|
|
* Total { encountered as an open group token.
|
|
*/
|
|
private long openGroupCount = 0;
|
|
/**
|
|
* Total } encountered as a close group token.
|
|
*/
|
|
private long closeGroupCount = 0;
|
|
/**
|
|
* Total clear text characters processed.
|
|
*/
|
|
private long characterCount = 0;
|
|
/**
|
|
* Total control words recognized.
|
|
*/
|
|
private long ctrlWordHandledCount = 0;
|
|
/**
|
|
* Total control words not handled.
|
|
*/
|
|
private long ctrlWordNotHandledCount = 0;
|
|
/**
|
|
* Total control words skipped.
|
|
*/
|
|
private long ctrlWordSkippedCount = 0;
|
|
/**
|
|
* Total groups skipped. Includes { and } as a group.
|
|
*/
|
|
private long groupSkippedCount = 0;
|
|
/**
|
|
* Start time as a long.
|
|
*/
|
|
private long startTime = 0;
|
|
/**
|
|
* Stop time as a long.
|
|
*/
|
|
private long endTime = 0;
|
|
/**
|
|
* Start date as a date.
|
|
*/
|
|
private DateTime startDate;
|
|
/**
|
|
* End date as a date.
|
|
*/
|
|
private DateTime endDate;
|
|
//////////////////////////////////// STATS VARIABLES ///////////////////
|
|
/**
|
|
* Last control word and parameter processed.
|
|
*/
|
|
private RtfCtrlWordData lastCtrlWordParam = null;
|
|
|
|
/** The <code>RtfCtrlWordListener</code>. */
|
|
private ArrayList listeners = new ArrayList();
|
|
|
|
/* *********
|
|
* READER *
|
|
***********/
|
|
/**
|
|
* Imports a complete RTF document.
|
|
*
|
|
* @param readerIn
|
|
* The Reader to read the RTF document from.
|
|
* @param rtfDoc
|
|
* The RtfDocument to add the imported document to.
|
|
* @throws IOException On I/O errors.
|
|
*/
|
|
public void ImportRtfDocument(Stream readerIn, RtfDocument rtfDoc) {
|
|
if (readerIn == null || rtfDoc == null) return;
|
|
this.Init(TYPE_IMPORT_FULL, rtfDoc, readerIn, null);
|
|
this.SetCurrentDestination(RtfDestinationMgr.DESTINATION_NULL);
|
|
startDate = DateTime.Now;
|
|
startTime = startDate.Ticks / 10000L;
|
|
this.groupLevel = 0;
|
|
try {
|
|
this.Tokenise();
|
|
}
|
|
catch {
|
|
}
|
|
endDate = DateTime.Now;
|
|
endTime = endDate.Ticks / 10000L;
|
|
}
|
|
|
|
/**
|
|
* Converts an RTF document to an iText document.
|
|
*
|
|
* Usage: Create a parser object and call this method with the input stream and the iText Document object
|
|
*
|
|
* @param readerIn
|
|
* The Reader to read the RTF file from.
|
|
* @param doc
|
|
* The iText document that the RTF file is to be added to.
|
|
* @throws IOException
|
|
* On I/O errors.
|
|
*/
|
|
public void ConvertRtfDocument(Stream readerIn, Document doc) {
|
|
if (readerIn == null || doc == null) return;
|
|
this.Init(TYPE_CONVERT, null, readerIn, doc);
|
|
this.SetCurrentDestination(RtfDestinationMgr.DESTINATION_DOCUMENT);
|
|
startDate = DateTime.Now;
|
|
startTime = startDate.Ticks / 10000L;
|
|
this.groupLevel = 0;
|
|
this.Tokenise();
|
|
endDate = DateTime.Now;
|
|
endTime = endDate.Ticks / 10000L;
|
|
}
|
|
|
|
/**
|
|
* Imports an RTF fragment.
|
|
*
|
|
* @param readerIn
|
|
* The Reader to read the RTF fragment from.
|
|
* @param rtfDoc
|
|
* The RTF document to add the RTF fragment to.
|
|
* @param importMappings
|
|
* The RtfImportMappings defining font and color mappings for the fragment.
|
|
* @throws IOException
|
|
* On I/O errors.
|
|
*/
|
|
public void ImportRtfFragment(Stream readerIn, RtfDocument rtfDoc, RtfImportMappings importMappings) {
|
|
//public void ImportRtfFragment2(Reader readerIn, RtfDocument rtfDoc, RtfImportMappings importMappings) throws IOException {
|
|
if (readerIn == null || rtfDoc == null || importMappings==null) return;
|
|
this.Init(TYPE_IMPORT_FRAGMENT, rtfDoc, readerIn, null);
|
|
this.HandleImportMappings(importMappings);
|
|
this.SetCurrentDestination(RtfDestinationMgr.DESTINATION_DOCUMENT);
|
|
this.groupLevel = 1;
|
|
SetParserState(RtfParser.PARSER_IN_DOCUMENT);
|
|
startDate = DateTime.Now;
|
|
startTime = startDate.Ticks / 10000L;
|
|
this.Tokenise();
|
|
endDate = DateTime.Now;
|
|
endTime = endDate.Ticks / 10000L;
|
|
}
|
|
|
|
// listener methods
|
|
|
|
/**
|
|
* Adds a <CODE>EventListener</CODE> to the <CODE>RtfCtrlWordMgr</CODE>.
|
|
*
|
|
* @param listener
|
|
* the new EventListener.
|
|
*/
|
|
public void AddListener(IEventListener listener) {
|
|
listeners.Add(listener);
|
|
}
|
|
|
|
/**
|
|
* Removes a <CODE>EventListener</CODE> from the <CODE>RtfCtrlWordMgr</CODE>.
|
|
*
|
|
* @param listener
|
|
* the EventListener that has to be removed.
|
|
*/
|
|
public void RemoveListener(IEventListener listener) {
|
|
listeners.Remove(listener);
|
|
}
|
|
|
|
/**
|
|
* Initialize the parser object values.
|
|
*
|
|
* @param type Type of conversion or import
|
|
* @param rtfDoc The <code>RtfDocument</code>
|
|
* @param readerIn The input stream
|
|
* @param doc The iText <code>Document</code>
|
|
*/
|
|
private void Init(int type, RtfDocument rtfDoc, Stream readerIn, Document doc) {
|
|
|
|
Init_stats();
|
|
// initialize reader to a PushbackReader
|
|
this.pbReader = Init_Reader(readerIn);
|
|
|
|
this.conversionType = type;
|
|
this.rtfDoc = rtfDoc;
|
|
this.document = doc;
|
|
this.currentState = new RtfParserState();
|
|
this.stackState = new Stack();
|
|
this.SetParserState(PARSER_STARTSTOP);
|
|
this.importMgr = new RtfImportMgr(this.rtfDoc, this.document);
|
|
|
|
// get destination Mgr
|
|
this.destinationMgr = RtfDestinationMgr.GetInstance(this);
|
|
// set the parser
|
|
RtfDestinationMgr.SetParser(this);
|
|
|
|
|
|
// DEBUG INFO for timing and memory usage of RtfCtrlWordMgr object
|
|
// create multiple new RtfCtrlWordMgr objects to check timing and memory usage
|
|
// System.Gc();
|
|
// long endTime = 0;
|
|
// Date endDate = null;
|
|
// long endFree = 0;
|
|
// DecimalFormat df = new DecimalFormat("#,##0");
|
|
// Date startDate = new Date();
|
|
// long startTime = System.CurrentTimeMillis();
|
|
// long startFree = Runtime.GetRuntime().FreeMemory();
|
|
// System.out.Println("1:");
|
|
|
|
this.rtfKeywordMgr = new RtfCtrlWordMgr(this, this.pbReader);/////////DO NOT COMMENT OUT THIS LINE ///////////
|
|
|
|
foreach (object listener in listeners) {
|
|
if (listener is IRtfCtrlWordListener) {
|
|
this.rtfKeywordMgr.AddRtfCtrlWordListener((IRtfCtrlWordListener)listener);
|
|
}
|
|
}
|
|
// endFree = Runtime.GetRuntime().FreeMemory();
|
|
// endTime = System.CurrentTimeMillis();
|
|
// endDate = new Date();
|
|
// System.out.Println("RtfCtrlWordMgr start date: " + startDate.ToLocaleString());
|
|
// System.out.Println("RtfCtrlWordMgr end date : " + endDate.ToLocaleString());
|
|
// System.out.Println(" Elapsed time : " + Long.ToString(endTime - startTime) + " milliseconds.");
|
|
// System.out.Println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.Format(startFree / 1024) + "k");
|
|
// System.out.Println("End Constructor RtfCtrlWordMgr , free mem is " + df.Format(endFree / 1024) + "k");
|
|
// System.out.Println("RtfCtrlWordMgr used approximately " + df.Format((startFree - endFree) / 1024) + "k");
|
|
//
|
|
// System.Gc();
|
|
// System.out.Println("2:");
|
|
// startDate = new Date();
|
|
// startTime = System.CurrentTimeMillis();
|
|
// startFree = Runtime.GetRuntime().FreeMemory();
|
|
// RtfCtrlWordMgr rtfKeywordMgr2 = new RtfCtrlWordMgr(this, this.pbReader);
|
|
// endFree = Runtime.GetRuntime().FreeMemory();
|
|
// endTime = System.CurrentTimeMillis();
|
|
// endDate = new Date();
|
|
// System.out.Println("RtfCtrlWordMgr start date: " + startDate.ToLocaleString());
|
|
// System.out.Println("RtfCtrlWordMgr end date : " + endDate.ToLocaleString());
|
|
// System.out.Println(" Elapsed time : " + Long.ToString(endTime - startTime) + " milliseconds.");
|
|
// System.out.Println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.Format(startFree / 1024) + "k");
|
|
// System.out.Println("End Constructor RtfCtrlWordMgr , free mem is " + df.Format(endFree / 1024) + "k");
|
|
// System.out.Println("RtfCtrlWordMgr used approximately " + df.Format((startFree - endFree) / 1024) + "k");
|
|
//
|
|
// System.Gc();
|
|
// System.out.Println("3:");
|
|
// startDate = new Date();
|
|
// startTime = System.CurrentTimeMillis();
|
|
// startFree = Runtime.GetRuntime().FreeMemory();
|
|
// RtfCtrlWordMgr rtfKeywordMgr3 = new RtfCtrlWordMgr(this, this.pbReader);
|
|
// endFree = Runtime.GetRuntime().FreeMemory();
|
|
// endTime = System.CurrentTimeMillis();
|
|
// endDate = new Date();
|
|
// System.out.Println("RtfCtrlWordMgr start date: " + startDate.ToLocaleString());
|
|
// System.out.Println("RtfCtrlWordMgr end date : " + endDate.ToLocaleString());
|
|
// System.out.Println(" Elapsed time : " + Long.ToString(endTime - startTime) + " milliseconds.");
|
|
// System.out.Println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.Format(startFree / 1024) + "k");
|
|
// System.out.Println("End Constructor RtfCtrlWordMgr , free mem is " + df.Format(endFree / 1024) + "k");
|
|
// System.out.Println("RtfCtrlWordMgr used approximately " + df.Format((startFree - endFree) / 1024) + "k");
|
|
//
|
|
// System.Gc();
|
|
// System.out.Println("4:");
|
|
// startDate = new Date();
|
|
// startTime = System.CurrentTimeMillis();
|
|
// startFree = Runtime.GetRuntime().FreeMemory();
|
|
// RtfCtrlWordMgr rtfKeywordMgr4 = new RtfCtrlWordMgr(this, this.pbReader);
|
|
// endFree = Runtime.GetRuntime().FreeMemory();
|
|
// endTime = System.CurrentTimeMillis();
|
|
// endDate = new Date();
|
|
// System.out.Println("RtfCtrlWordMgr start date: " + startDate.ToLocaleString());
|
|
// System.out.Println("RtfCtrlWordMgr end date : " + endDate.ToLocaleString());
|
|
// System.out.Println(" Elapsed time : " + Long.ToString(endTime - startTime) + " milliseconds.");
|
|
// System.out.Println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.Format(startFree / 1024) + "k");
|
|
// System.out.Println("End Constructor RtfCtrlWordMgr , free mem is " + df.Format(endFree / 1024) + "k");
|
|
// System.out.Println("RtfCtrlWordMgr used approximately " + df.Format((startFree - endFree) / 1024) + "k");
|
|
//
|
|
// System.Gc();
|
|
// System.out.Println("5:");
|
|
// startDate = new Date();
|
|
// startTime = System.CurrentTimeMillis();
|
|
// startFree = Runtime.GetRuntime().FreeMemory();
|
|
// RtfCtrlWordMgr rtfKeywordMgr5 = new RtfCtrlWordMgr(this, this.pbReader);
|
|
// endFree = Runtime.GetRuntime().FreeMemory();
|
|
// endTime = System.CurrentTimeMillis();
|
|
// endDate = new Date();
|
|
// System.out.Println("RtfCtrlWordMgr start date: " + startDate.ToLocaleString());
|
|
// System.out.Println("RtfCtrlWordMgr end date : " + endDate.ToLocaleString());
|
|
// System.out.Println(" Elapsed time : " + Long.ToString(endTime - startTime) + " milliseconds.");
|
|
// System.out.Println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.Format(startFree / 1024) + "k");
|
|
// System.out.Println("End Constructor RtfCtrlWordMgr , free mem is " + df.Format(endFree / 1024) + "k");
|
|
// System.out.Println("RtfCtrlWordMgr used approximately " + df.Format((startFree - endFree) / 1024) + "k");
|
|
// System.Gc();
|
|
// System.out.Println("At ed:");
|
|
// startDate = new Date();
|
|
// startTime = System.CurrentTimeMillis();
|
|
// startFree = Runtime.GetRuntime().FreeMemory();
|
|
// //RtfCtrlWordMgr rtfKeywordMgr6 = new RtfCtrlWordMgr(this, this.pbReader);
|
|
// endFree = Runtime.GetRuntime().FreeMemory();
|
|
// endTime = System.CurrentTimeMillis();
|
|
// endDate = new Date();
|
|
// System.out.Println("RtfCtrlWordMgr start date: " + startDate.ToLocaleString());
|
|
// System.out.Println("RtfCtrlWordMgr end date : " + endDate.ToLocaleString());
|
|
// System.out.Println(" Elapsed time : " + Long.ToString(endTime - startTime) + " milliseconds.");
|
|
// System.out.Println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.Format(startFree / 1024) + "k");
|
|
// System.out.Println("End Constructor RtfCtrlWordMgr , free mem is " + df.Format(endFree / 1024) + "k");
|
|
// System.out.Println("RtfCtrlWordMgr used approximately " + df.Format((startFree - endFree) / 1024) + "k");
|
|
}
|
|
/**
|
|
* Initialize the statistics values.
|
|
*/
|
|
protected void Init_stats() {
|
|
byteCount = 0;
|
|
ctrlWordCount = 0;
|
|
openGroupCount = 0;
|
|
closeGroupCount = 0;
|
|
characterCount = 0;
|
|
ctrlWordHandledCount = 0;
|
|
ctrlWordNotHandledCount = 0;
|
|
ctrlWordSkippedCount = 0;
|
|
groupSkippedCount = 0;
|
|
startTime = 0;
|
|
endTime = 0;
|
|
//startDate = null;
|
|
//endDate = null;
|
|
}
|
|
|
|
/**
|
|
* Casts the input reader to a PushbackReader or
|
|
* creates a new PushbackReader from the Reader passed in.
|
|
* The reader is also transformed into a BufferedReader if necessary.
|
|
*
|
|
* @param readerIn
|
|
* The Reader object for the input file.
|
|
* @return
|
|
* PushbackReader object
|
|
*/
|
|
private PushbackStream Init_Reader(Stream readerIn) {
|
|
if (readerIn is PushbackStream) {
|
|
return (PushbackStream)readerIn;
|
|
}
|
|
|
|
// return the proper reader object to the parser setup
|
|
return new PushbackStream(readerIn);
|
|
}
|
|
|
|
/**
|
|
* Imports the mappings defined in the RtfImportMappings into the
|
|
* RtfImportHeader of this RtfParser2.
|
|
*
|
|
* @param importMappings
|
|
* The RtfImportMappings to import.
|
|
*/
|
|
private void HandleImportMappings(RtfImportMappings importMappings) {
|
|
foreach (String fontNr in importMappings.GetFontMappings().Keys) {
|
|
this.importMgr.ImportFont(fontNr, (String) importMappings.GetFontMappings()[fontNr]);
|
|
}
|
|
foreach (String colorNr in importMappings.GetColorMappings().Keys) {
|
|
this.importMgr.ImportColor(colorNr, (Color) importMappings.GetColorMappings()[colorNr]);
|
|
}
|
|
foreach (String listNr in importMappings.GetListMappings().Keys) {
|
|
this.importMgr.ImportList(listNr, (List) importMappings.GetListMappings()[listNr]);
|
|
}
|
|
foreach (String stylesheetListNr in importMappings.GetStylesheetListMappings().Keys) {
|
|
this.importMgr.ImportStylesheetList(stylesheetListNr, (List) importMappings.GetStylesheetListMappings()[stylesheetListNr]);
|
|
}
|
|
|
|
}
|
|
|
|
|
|
/* *****************************************
|
|
* DOCUMENT CONTROL METHODS
|
|
*
|
|
* Handles -
|
|
* handleOpenGroup: Open groups - '{'
|
|
* handleCloseGroup: Close groups - '}'
|
|
* handleCtrlWord: Ctrl Words - '\...'
|
|
* handleCharacter: Characters - Plain Text, etc.
|
|
*
|
|
*/
|
|
|
|
/**
|
|
* Handles open group tokens. ({)
|
|
*
|
|
* @return errOK if ok, other if an error occurred.
|
|
*/
|
|
public int HandleOpenGroup() {
|
|
int result = errOK;
|
|
this.openGroupCount++; // stats
|
|
this.groupLevel++; // current group level in tokeniser
|
|
this.docGroupLevel++; // current group level in document
|
|
if (this.GetTokeniserState() == TOKENISER_SKIP_GROUP) {
|
|
this.groupSkippedCount++;
|
|
}
|
|
|
|
RtfDestination dest = (RtfDestination)this.GetCurrentDestination();
|
|
bool handled = false;
|
|
|
|
if (dest != null) {
|
|
if (debugParser) {
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: before dest.HandleOpeningSubGroup()");
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: destination=" + dest.ToString());
|
|
}
|
|
handled = dest.HandleOpeningSubGroup();
|
|
if (debugParser) {
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: after dest.HandleOpeningSubGroup()");
|
|
}
|
|
}
|
|
|
|
this.stackState.Push(this.currentState);
|
|
this.currentState = new RtfParserState(this.currentState);
|
|
// do not set this true until after the state is pushed
|
|
// otherwise it inserts a { where one does not belong.
|
|
this.currentState.newGroup = true;
|
|
dest = (RtfDestination)this.GetCurrentDestination();
|
|
|
|
if (debugParser) {
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: HandleOpenGroup()");
|
|
if (this.lastCtrlWordParam != null)
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: LastCtrlWord=" + this.lastCtrlWordParam.ctrlWord);
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: grouplevel=" + groupLevel.ToString());
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: destination=" + dest.ToString());
|
|
}
|
|
|
|
if (dest != null) {
|
|
handled = dest.HandleOpenGroup();
|
|
}
|
|
|
|
if (debugParser) {
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: after dest.HandleOpenGroup(); handled=" + handled.ToString());
|
|
}
|
|
|
|
return result;
|
|
}
|
|
public static void OutputDebug(object doc, int groupLevel, String str) {
|
|
Console.Out.WriteLine(str);
|
|
if(doc == null) return;
|
|
if (groupLevel<0) groupLevel = 0;
|
|
String spaces = new String(' ', groupLevel*2);
|
|
if(doc is RtfDocument) {
|
|
((RtfDocument)doc).Add(new RtfDirectContent("\n" + spaces + str));
|
|
}
|
|
else if(doc is Document) {
|
|
try {
|
|
((Document)doc).Add(new RtfDirectContent("\n" + spaces + str));
|
|
} catch (DocumentException) {
|
|
}
|
|
}
|
|
}
|
|
/**
|
|
* Handles close group tokens. (})
|
|
*
|
|
* @return errOK if ok, other if an error occurred.
|
|
*/
|
|
public int HandleCloseGroup() {
|
|
int result = errOK;
|
|
this.closeGroupCount++; // stats
|
|
|
|
if (this.GetTokeniserState() != TOKENISER_SKIP_GROUP) {
|
|
if (debugParser) {
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: HandleCloseGroup()");
|
|
if (this.lastCtrlWordParam != null)
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: LastCtrlWord=" + this.lastCtrlWordParam.ctrlWord);
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: grouplevel=" + groupLevel.ToString());
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: destination=" + this.GetCurrentDestination().ToString());
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "");
|
|
}
|
|
RtfDestination dest = (RtfDestination)this.GetCurrentDestination();
|
|
bool handled = false;
|
|
|
|
if (dest != null) {
|
|
handled = dest.HandleCloseGroup();
|
|
}
|
|
if (debugParser) {
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: After dest.HandleCloseGroup(); handled = " + handled.ToString());
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "");
|
|
}
|
|
}
|
|
|
|
if (this.stackState.Count >0 ) {
|
|
this.currentState = (RtfParserState)this.stackState.Pop();
|
|
} else {
|
|
result = errStackUnderflow;
|
|
}
|
|
|
|
this.docGroupLevel--;
|
|
this.groupLevel--;
|
|
|
|
if (this.GetTokeniserState() == TOKENISER_SKIP_GROUP && this.groupLevel < this.skipGroupLevel) {
|
|
this.SetTokeniserState(TOKENISER_NORMAL);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
* Handles control word tokens. Depending on the current
|
|
* state a control word can lead to a state change. When
|
|
* parsing the actual document contents, certain tabled
|
|
* values are remapped. i.e. colors, fonts, styles, etc.
|
|
*
|
|
* @param ctrlWordData The control word to handle.
|
|
* @return errOK if ok, other if an error occurred.
|
|
*/
|
|
public int HandleCtrlWord(RtfCtrlWordData ctrlWordData) {
|
|
int result = errOK;
|
|
this.ctrlWordCount++; // stats
|
|
|
|
if (debugParser) {
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: handleCtrlWord=" + ctrlWordData.ctrlWord);
|
|
}
|
|
|
|
if (this.GetTokeniserState() == TOKENISER_SKIP_GROUP) {
|
|
this.ctrlWordSkippedCount++;
|
|
if (debugParser) {
|
|
RtfParser.OutputDebug(this.rtfDoc, groupLevel, "DEBUG: SKIPPED");
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// RtfDestination dest = (RtfDestination)this.GetCurrentDestination();
|
|
// bool handled = false;
|
|
// if (dest != null) {
|
|
// handled = dest.HandleControlWord(ctrlWordData);
|
|
// }
|
|
|
|
result = this.rtfKeywordMgr.HandleKeyword(ctrlWordData, this.groupLevel);
|
|
|
|
if ( result == errOK){
|
|
this.ctrlWordHandledCount++;
|
|
} else {
|
|
this.ctrlWordNotHandledCount++;
|
|
result = errOK; // hack for now.
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Handles text tokens. These are either handed on to the
|
|
* appropriate destination handler.
|
|
*
|
|
* @param nextChar
|
|
* The text token to handle.
|
|
* @return errOK if ok, other if an error occurred.
|
|
*/
|
|
public int HandleCharacter(int nextChar) {
|
|
this.characterCount++; // stats
|
|
|
|
if (this.GetTokeniserState() == TOKENISER_SKIP_GROUP) {
|
|
return errOK;
|
|
}
|
|
|
|
bool handled = false;
|
|
|
|
RtfDestination dest = (RtfDestination)this.GetCurrentDestination();
|
|
if (dest != null) {
|
|
handled = dest.HandleCharacter(nextChar);
|
|
}
|
|
|
|
return errOK;
|
|
}
|
|
|
|
/**
|
|
* Get the state of the parser.
|
|
*
|
|
* @return
|
|
* The current RtfParserState state object.
|
|
*/
|
|
public RtfParserState GetState(){
|
|
return this.currentState;
|
|
}
|
|
|
|
/**
|
|
* Get the current state of the parser.
|
|
*
|
|
* @return
|
|
* The current state of the parser.
|
|
*/
|
|
public int GetParserState(){
|
|
return this.currentState.parserState;
|
|
}
|
|
|
|
/**
|
|
* Set the state value of the parser.
|
|
*
|
|
* @param newState
|
|
* The new state for the parser
|
|
* @return
|
|
* The state of the parser.
|
|
*/
|
|
public int SetParserState(int newState){
|
|
this.currentState.parserState = newState;
|
|
return this.currentState.parserState;
|
|
}
|
|
|
|
/**
|
|
* Get the conversion type.
|
|
*
|
|
* @return
|
|
* The type of the conversion. Import or Convert.
|
|
*/
|
|
public int GetConversionType() {
|
|
return this.conversionType;
|
|
}
|
|
|
|
/**
|
|
* Get the RTF Document object.
|
|
* @return
|
|
* Returns the object rtfDoc.
|
|
*/
|
|
public RtfDocument GetRtfDocument() {
|
|
return this.rtfDoc;
|
|
}
|
|
|
|
/**
|
|
* Get the Document object.
|
|
* @return
|
|
* Returns the object rtfDoc.
|
|
*/
|
|
public Document GetDocument() {
|
|
return this.document;
|
|
}
|
|
|
|
/**
|
|
* Get the RtfImportHeader object.
|
|
* @return
|
|
* Returns the object importHeader.
|
|
*/
|
|
public RtfImportMgr GetImportManager() {
|
|
return importMgr;
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////////
|
|
// accessors for destinations
|
|
/**
|
|
* Set the current destination object for the current state.
|
|
* @param dest The destination value to set.
|
|
*/
|
|
public bool SetCurrentDestination(String destination) {
|
|
RtfDestination dest = RtfDestinationMgr.GetDestination(destination);
|
|
if (dest != null) {
|
|
this.currentState.destination = dest;
|
|
return false;
|
|
} else {
|
|
this.SetTokeniserStateSkipGroup();
|
|
return false;
|
|
}
|
|
}
|
|
/**
|
|
* Get the current destination object.
|
|
*
|
|
* @return The current state destination
|
|
*/
|
|
public RtfDestination GetCurrentDestination() {
|
|
return this.currentState.destination;
|
|
}
|
|
/**
|
|
* Get a destination from the map
|
|
*
|
|
* @para destination The string destination.
|
|
* @return The destination object from the map
|
|
*/
|
|
public RtfDestination GetDestination(String destination) {
|
|
return RtfDestinationMgr.GetDestination(destination);
|
|
}
|
|
|
|
/**
|
|
* Helper method to determine if this is a new group.
|
|
*
|
|
* @return true if this is a new group, otherwise it returns false.
|
|
*/
|
|
public bool IsNewGroup() {
|
|
return this.currentState.newGroup;
|
|
}
|
|
/**
|
|
* Helper method to set the new group flag
|
|
* @param value The bool value to set the flag
|
|
* @return The value of newGroup
|
|
*/
|
|
public bool SetNewGroup(bool value) {
|
|
this.currentState.newGroup = value;
|
|
return this.currentState.newGroup;
|
|
}
|
|
|
|
/* ************
|
|
* TOKENISER *
|
|
**************/
|
|
|
|
/**
|
|
* Read through the input file and parse the data stream into tokens.
|
|
*
|
|
* @throws IOException on IO error.
|
|
*/
|
|
public void Tokenise() {
|
|
int errorCode = errOK; // error code
|
|
int nextChar = 0;
|
|
this.SetTokeniserState(TOKENISER_NORMAL); // set initial tokeniser state
|
|
|
|
|
|
while((nextChar = this.pbReader.ReadByte()) != -1) {
|
|
this.byteCount++;
|
|
|
|
if (this.GetTokeniserState() == TOKENISER_BINARY) // if we're parsing binary data, handle it directly
|
|
{
|
|
if ((errorCode = ParseChar(nextChar)) != errOK)
|
|
return;
|
|
} else {
|
|
switch (nextChar) {
|
|
case '{': // scope delimiter - Open
|
|
this.HandleOpenGroup();
|
|
break;
|
|
case '}': // scope delimiter - Close
|
|
this.HandleCloseGroup();
|
|
break;
|
|
case '\n': // noise character
|
|
case '\r': // noise character
|
|
// if (this.IsImport()) {
|
|
// this.rtfDoc.Add(new RtfDirectContent(new String(nextChar)));
|
|
// }
|
|
break;
|
|
case '\\': // Control word start delimiter
|
|
if (ParseCtrlWord(pbReader) != errOK) {
|
|
// TODO: Indicate some type of error
|
|
return;
|
|
}
|
|
break;
|
|
default:
|
|
if (groupLevel == 0) { // BOMs
|
|
break;
|
|
}
|
|
if (this.GetTokeniserState() == TOKENISER_HEX) {
|
|
StringBuilder hexChars = new StringBuilder();
|
|
hexChars.Append(nextChar);
|
|
if((nextChar = pbReader.ReadByte()) == -1) {
|
|
return;
|
|
}
|
|
this.byteCount++;
|
|
hexChars.Append(nextChar);
|
|
try {
|
|
nextChar=int.Parse(hexChars.ToString(), NumberStyles.HexNumber);
|
|
} catch {
|
|
return;
|
|
}
|
|
this.SetTokeniserState(TOKENISER_NORMAL);
|
|
}
|
|
if ((errorCode = ParseChar(nextChar)) != errOK) {
|
|
return; // some error occurred. we should send a
|
|
// real error
|
|
}
|
|
break;
|
|
} // switch (nextChar[0])
|
|
} // end if (this.GetTokeniserState() == TOKENISER_BINARY)
|
|
|
|
// if (groupLevel < 1 && this.IsImportFragment()) return; //return errOK;
|
|
// if (groupLevel < 0 && this.IsImportFull()) return; //return errStackUnderflow;
|
|
// if (groupLevel < 0 && this.IsConvert()) return; //return errStackUnderflow;
|
|
|
|
}// end while (reader.Read(nextChar) != -1)
|
|
RtfDestination dest = (RtfDestination)this.GetCurrentDestination();
|
|
if (dest != null) {
|
|
dest.CloseDestination();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Process the character and send it to the current destination.
|
|
* @param nextChar
|
|
* The character to process
|
|
* @return
|
|
* Returns an error code or errOK if no error.
|
|
*/
|
|
private int ParseChar(int nextChar) {
|
|
// figure out where to put the character
|
|
// needs to handle group levels for parsing
|
|
// examples
|
|
/*
|
|
* {\f3\froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;}
|
|
* {\f7\fswiss\fcharset0\fprq2{\*\panose 020b0604020202030204}Helv{\*\falt Arial};} <- special case!!!!
|
|
* {\f5\froman\fcharset0 Tahoma;}
|
|
* {\f6\froman\fcharset0 Arial Black;}
|
|
* {\info(\author name}{\company company name}}
|
|
* ... document text ...
|
|
*/
|
|
if (this.GetTokeniserState() == TOKENISER_BINARY && --binByteCount <= 0)
|
|
this.SetTokeniserStateNormal();
|
|
if (this.GetTokeniserState() == TOKENISER_SKIP_BYTES && --binSkipByteCount <= 0)
|
|
this.SetTokeniserStateNormal();
|
|
return this.HandleCharacter(nextChar);
|
|
}
|
|
|
|
/**
|
|
* Parses a keyword and it's parameter if one exists
|
|
* @param reader
|
|
* This is a pushback reader for file input.
|
|
* @return
|
|
* Returns an error code or errOK if no error.
|
|
* @throws IOException
|
|
* Catch any file read problem.
|
|
*/
|
|
private int ParseCtrlWord(PushbackStream reader) {
|
|
int nextChar = 0;
|
|
int result = errOK;
|
|
|
|
if((nextChar = reader.ReadByte()) == -1) {
|
|
return errEndOfFile;
|
|
}
|
|
this.byteCount++;
|
|
|
|
StringBuilder parsedCtrlWord = new StringBuilder();
|
|
StringBuilder parsedParam= new StringBuilder();
|
|
RtfCtrlWordData ctrlWordParam = new RtfCtrlWordData();
|
|
|
|
if (!Char.IsLetterOrDigit((char)nextChar)) {
|
|
parsedCtrlWord.Append((char)nextChar);
|
|
ctrlWordParam.ctrlWord = parsedCtrlWord.ToString();
|
|
result = this.HandleCtrlWord(ctrlWordParam);
|
|
lastCtrlWordParam = ctrlWordParam;
|
|
return result;
|
|
}
|
|
|
|
// for ( ; Character.IsLetter(nextChar[0]); reader.Read(nextChar) ) {
|
|
// parsedCtrlWord.Append(nextChar[0]);
|
|
// }
|
|
do {
|
|
parsedCtrlWord.Append((char)nextChar);
|
|
//TODO: catch EOF
|
|
nextChar = reader.ReadByte();
|
|
this.byteCount++;
|
|
} while (Char.IsLetter((char)nextChar));
|
|
|
|
ctrlWordParam.ctrlWord = parsedCtrlWord.ToString();
|
|
|
|
if ((char)nextChar == '-') {
|
|
ctrlWordParam.isNeg = true;
|
|
if((nextChar = reader.ReadByte()) == -1) {
|
|
return errEndOfFile;
|
|
}
|
|
this.byteCount++;
|
|
}
|
|
|
|
if (Char.IsDigit((char)nextChar)) {
|
|
ctrlWordParam.hasParam = true;
|
|
// for ( ; Character.IsDigit(nextChar[0]); reader.Read(nextChar) ) {
|
|
// parsedParam.Append(nextChar[0]);
|
|
// }
|
|
do {
|
|
parsedParam.Append((char)nextChar);
|
|
//TODO: catch EOF
|
|
nextChar = reader.ReadByte();
|
|
this.byteCount++;
|
|
} while (Char.IsDigit((char)nextChar));
|
|
|
|
ctrlWordParam.param = parsedParam.ToString();
|
|
}
|
|
|
|
// push this character back into the stream
|
|
if ((char)nextChar != ' ') { // || this.IsImport() ) {
|
|
reader.Unread(nextChar);
|
|
}
|
|
|
|
if (debugParser) {
|
|
// // debug: insrsid6254399
|
|
// if (ctrlWordParam.ctrlWord.Equals("proptype") && ctrlWordParam.param.Equals("30")) {
|
|
// System.out.Print("Debug value found\n");
|
|
// }
|
|
// if (ctrlWordParam.ctrlWord.Equals("panose") ) {
|
|
// System.out.Print("Debug value found\n");
|
|
// }
|
|
}
|
|
|
|
result = this.HandleCtrlWord(ctrlWordParam);
|
|
lastCtrlWordParam = ctrlWordParam;
|
|
return result;
|
|
|
|
}
|
|
|
|
/**
|
|
* Set the current state of the tokeniser.
|
|
* @param value The new state of the tokeniser.
|
|
* @return The state of the tokeniser.
|
|
*/
|
|
public int SetTokeniserState(int value) {
|
|
this.currentState.tokeniserState = value;
|
|
return this.currentState.tokeniserState;
|
|
}
|
|
|
|
/**
|
|
* Get the current state of the tokeniser.
|
|
* @return The current state of the tokeniser.
|
|
*/
|
|
public int GetTokeniserState() {
|
|
return this.currentState.tokeniserState;
|
|
}
|
|
|
|
/**
|
|
* Gets the current group level
|
|
*
|
|
* @return
|
|
* The current group level value.
|
|
*/
|
|
public int GetLevel() {
|
|
return this.groupLevel;
|
|
}
|
|
|
|
|
|
/**
|
|
* Set the tokeniser state to skip to the end of the group.
|
|
* Sets the state to TOKENISER_SKIP_GROUP and skipGroupLevel to the current group level.
|
|
*/
|
|
public void SetTokeniserStateNormal() {
|
|
this.SetTokeniserState(TOKENISER_NORMAL);
|
|
}
|
|
|
|
/**
|
|
* Set the tokeniser state to skip to the end of the group.
|
|
* Sets the state to TOKENISER_SKIP_GROUP and skipGroupLevel to the current group level.
|
|
*/
|
|
public void SetTokeniserStateSkipGroup() {
|
|
this.SetTokeniserState(TOKENISER_SKIP_GROUP);
|
|
this.skipGroupLevel = this.groupLevel;
|
|
}
|
|
|
|
/**
|
|
* Sets the number of bytes to skip and the state of the tokeniser.
|
|
*
|
|
* @param numberOfBytesToSkip
|
|
* The numbere of bytes to skip in the file.
|
|
*/
|
|
public void SetTokeniserSkipBytes(long numberOfBytesToSkip) {
|
|
this.SetTokeniserState(TOKENISER_SKIP_BYTES);
|
|
this.binSkipByteCount = numberOfBytesToSkip;
|
|
}
|
|
|
|
/**
|
|
* Sets the number of binary bytes.
|
|
*
|
|
* @param binaryCount
|
|
* The number of binary bytes.
|
|
*/
|
|
public void SetTokeniserStateBinary(int binaryCount) {
|
|
this.SetTokeniserState(TOKENISER_BINARY);
|
|
this.binByteCount = binaryCount;
|
|
}
|
|
/**
|
|
* Sets the number of binary bytes.
|
|
*
|
|
* @param binaryCount
|
|
* The number of binary bytes.
|
|
*/
|
|
public void SetTokeniserStateBinary(long binaryCount) {
|
|
this.SetTokeniserState(TOKENISER_BINARY);
|
|
this.binByteCount = binaryCount;
|
|
}
|
|
/**
|
|
* Helper method to determin if conversion is TYPE_CONVERT
|
|
* @return true if TYPE_CONVERT, otherwise false
|
|
* @see com.lowagie.text.rtf.direct.RtfParser#TYPE_CONVERT
|
|
*/
|
|
public bool IsConvert() {
|
|
return (this.GetConversionType() == RtfParser.TYPE_CONVERT);
|
|
}
|
|
|
|
/**
|
|
* Helper method to determin if conversion is TYPE_IMPORT_FULL or TYPE_IMPORT_FRAGMENT
|
|
* @return true if TYPE_CONVERT, otherwise false
|
|
* @see com.lowagie.text.rtf.direct.RtfParser#TYPE_IMPORT_FULL
|
|
* @see com.lowagie.text.rtf.direct.RtfParser#TYPE_IMPORT_FRAGMENT
|
|
*/
|
|
public bool IsImport() {
|
|
return (IsImportFull() || this.IsImportFragment());
|
|
}
|
|
/**
|
|
* Helper method to determin if conversion is TYPE_IMPORT_FULL
|
|
* @return true if TYPE_CONVERT, otherwise false
|
|
* @see com.lowagie.text.rtf.direct.RtfParser#TYPE_IMPORT_FULL
|
|
*/
|
|
public bool IsImportFull() {
|
|
return (this.GetConversionType() == RtfParser.TYPE_IMPORT_FULL);
|
|
}
|
|
/**
|
|
* Helper method to determin if conversion is TYPE_IMPORT_FRAGMENT
|
|
* @return true if TYPE_CONVERT, otherwise false
|
|
* @see com.lowagie.text.rtf.direct.RtfParser#TYPE_IMPORT_FRAGMENT
|
|
*/
|
|
public bool IsImportFragment() {
|
|
return (this.GetConversionType() == RtfParser.TYPE_IMPORT_FRAGMENT);
|
|
}
|
|
/**
|
|
* Helper method to indicate if this control word was a \* control word.
|
|
* @return true if it was a \* control word, otherwise false
|
|
*/
|
|
public bool GetExtendedDestination() {
|
|
return this.currentState.isExtendedDestination;
|
|
}
|
|
/**
|
|
* Helper method to set the extended control word flag.
|
|
* @param value Boolean to set the value to.
|
|
* @return isExtendedDestination.
|
|
*/
|
|
public bool SetExtendedDestination(bool value) {
|
|
this.currentState.isExtendedDestination = value;
|
|
return this.currentState.isExtendedDestination;
|
|
}
|
|
|
|
/**
|
|
* Get the logfile name.
|
|
*
|
|
* @return the logFile
|
|
*/
|
|
public String GetLogFile() {
|
|
return logFile;
|
|
}
|
|
|
|
/**
|
|
* Set the logFile name
|
|
*
|
|
* @param logFile the logFile to set
|
|
*/
|
|
public void SetLogFile(String logFile) {
|
|
this.logFile = logFile;
|
|
}
|
|
/**
|
|
* Set the logFile name
|
|
*
|
|
* @param logFile the logFile to set
|
|
*/
|
|
public void SetLogFile(String logFile, bool logAppend) {
|
|
this.logFile = logFile;
|
|
this.SetLogAppend(logAppend);
|
|
}
|
|
|
|
/**
|
|
* Get flag indicating if logging is on or off.
|
|
*
|
|
* @return the logging
|
|
*/
|
|
public bool IsLogging() {
|
|
return logging;
|
|
}
|
|
|
|
/**
|
|
* Set flag indicating if logging is on or off
|
|
* @param logging <code>true</code> to turn on logging, <code>false</code> to turn off logging.
|
|
*/
|
|
public void SetLogging(bool logging) {
|
|
this.logging = logging;
|
|
}
|
|
|
|
/**
|
|
* @return the logAppend
|
|
*/
|
|
public bool IsLogAppend() {
|
|
return logAppend;
|
|
}
|
|
|
|
/**
|
|
* @param logAppend the logAppend to set
|
|
*/
|
|
public void SetLogAppend(bool logAppend) {
|
|
this.logAppend = logAppend;
|
|
}
|
|
|
|
/*
|
|
* Statistics
|
|
*
|
|
public void PrintStats(PrintStream out) {
|
|
if (out == null) return;
|
|
|
|
out.Println("");
|
|
out.Println("Parser statistics:");
|
|
out.Println("Process start date: " + startDate.ToLocaleString());
|
|
out.Println("Process end date : " + endDate.ToLocaleString());
|
|
out.Println(" Elapsed time : " + Long.ToString(endTime - startTime) + " milliseconds.");
|
|
out.Println("Total bytes read : " + Long.ToString(byteCount));
|
|
out.Println("Open group count : " + Long.ToString(openGroupCount));
|
|
out.Print("Close group count : " + Long.ToString(closeGroupCount));
|
|
out.Println(" (Groups Skipped): " + Long.ToString(groupSkippedCount));
|
|
out.Print("Control word count: " + Long.ToString(ctrlWordCount));
|
|
out.Print(" - Handled: " + Long.ToString(ctrlWordHandledCount));
|
|
out.Print(" Not Handled: " + Long.ToString(ctrlWordNotHandledCount));
|
|
out.Println(" Skipped: " + Long.ToString(ctrlWordSkippedCount));
|
|
out.Println("Plain text char count: " + Long.ToString(characterCount));
|
|
}*/
|
|
}
|
|
} |