Initial Commit

This commit is contained in:
2023-06-21 12:46:23 -04:00
commit c70248a520
1352 changed files with 336780 additions and 0 deletions

View File

@@ -0,0 +1,381 @@
using System;
using System.Collections;
using iTextSharp.text;
/*
* $Id: EntitiesToSymbol.cs,v 1.3 2008/05/13 11:26:14 psoares33 Exp $
*
*
* Copyright 1999, 2000, 2001, 2002 Bruno Lowagie.
*
* The contents of this file are subject to the Mozilla Public License Version 1.1
* (the "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the License.
*
* The Original Code is 'iText, a free JAVA-PDF library'.
*
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
* All Rights Reserved.
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
*
* Contributor(s): all the names of the contributors are added in the source code
* where applicable.
*
* Alternatively, the contents of this file may be used under the terms of the
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
* provisions of LGPL are applicable instead of those above. If you wish to
* allow use of your version of this file only under the terms of the LGPL
* License and not to allow others to use your version of this file under
* the MPL, indicate your decision by deleting the provisions above and
* replace them with the notice and other provisions required by the LGPL.
* If you do not delete the provisions above, a recipient may use your version
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the MPL as stated above or under the terms of the GNU
* Library General Public License as published by the Free Software Foundation;
* either version 2 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
* details.
*
* If you didn't download this code from the following link, you should check if
* you aren't using an obsolete version:
* http://www.lowagie.com/iText/
*/
namespace iTextSharp.text.xml.simpleparser {
/**
* This class contains entities that can be used in an entity tag.
*/
public class EntitiesToSymbol {
/**
* This is a map that contains all possible id values of the entity tag
* that can be translated to a character in font Symbol.
*/
public static readonly Hashtable map;
static EntitiesToSymbol() {
map = new Hashtable();
map["169"] = (char)227;
map["172"] = (char)216;
map["174"] = (char)210;
map["177"] = (char)177;
map["215"] = (char)180;
map["247"] = (char)184;
map["8230"] = (char)188;
map["8242"] = (char)162;
map["8243"] = (char)178;
map["8260"] = (char)164;
map["8364"] = (char)240;
map["8465"] = (char)193;
map["8472"] = (char)195;
map["8476"] = (char)194;
map["8482"] = (char)212;
map["8501"] = (char)192;
map["8592"] = (char)172;
map["8593"] = (char)173;
map["8594"] = (char)174;
map["8595"] = (char)175;
map["8596"] = (char)171;
map["8629"] = (char)191;
map["8656"] = (char)220;
map["8657"] = (char)221;
map["8658"] = (char)222;
map["8659"] = (char)223;
map["8660"] = (char)219;
map["8704"] = (char)34;
map["8706"] = (char)182;
map["8707"] = (char)36;
map["8709"] = (char)198;
map["8711"] = (char)209;
map["8712"] = (char)206;
map["8713"] = (char)207;
map["8717"] = (char)39;
map["8719"] = (char)213;
map["8721"] = (char)229;
map["8722"] = (char)45;
map["8727"] = (char)42;
map["8729"] = (char)183;
map["8730"] = (char)214;
map["8733"] = (char)181;
map["8734"] = (char)165;
map["8736"] = (char)208;
map["8743"] = (char)217;
map["8744"] = (char)218;
map["8745"] = (char)199;
map["8746"] = (char)200;
map["8747"] = (char)242;
map["8756"] = (char)92;
map["8764"] = (char)126;
map["8773"] = (char)64;
map["8776"] = (char)187;
map["8800"] = (char)185;
map["8801"] = (char)186;
map["8804"] = (char)163;
map["8805"] = (char)179;
map["8834"] = (char)204;
map["8835"] = (char)201;
map["8836"] = (char)203;
map["8838"] = (char)205;
map["8839"] = (char)202;
map["8853"] = (char)197;
map["8855"] = (char)196;
map["8869"] = (char)94;
map["8901"] = (char)215;
map["8992"] = (char)243;
map["8993"] = (char)245;
map["9001"] = (char)225;
map["9002"] = (char)241;
map["913"] = (char)65;
map["914"] = (char)66;
map["915"] = (char)71;
map["916"] = (char)68;
map["917"] = (char)69;
map["918"] = (char)90;
map["919"] = (char)72;
map["920"] = (char)81;
map["921"] = (char)73;
map["922"] = (char)75;
map["923"] = (char)76;
map["924"] = (char)77;
map["925"] = (char)78;
map["926"] = (char)88;
map["927"] = (char)79;
map["928"] = (char)80;
map["929"] = (char)82;
map["931"] = (char)83;
map["932"] = (char)84;
map["933"] = (char)85;
map["934"] = (char)70;
map["935"] = (char)67;
map["936"] = (char)89;
map["937"] = (char)87;
map["945"] = (char)97;
map["946"] = (char)98;
map["947"] = (char)103;
map["948"] = (char)100;
map["949"] = (char)101;
map["950"] = (char)122;
map["951"] = (char)104;
map["952"] = (char)113;
map["953"] = (char)105;
map["954"] = (char)107;
map["955"] = (char)108;
map["956"] = (char)109;
map["957"] = (char)110;
map["958"] = (char)120;
map["959"] = (char)111;
map["960"] = (char)112;
map["961"] = (char)114;
map["962"] = (char)86;
map["963"] = (char)115;
map["964"] = (char)116;
map["965"] = (char)117;
map["966"] = (char)102;
map["967"] = (char)99;
map["9674"] = (char)224;
map["968"] = (char)121;
map["969"] = (char)119;
map["977"] = (char)74;
map["978"] = (char)161;
map["981"] = (char)106;
map["982"] = (char)118;
map["9824"] = (char)170;
map["9827"] = (char)167;
map["9829"] = (char)169;
map["9830"] = (char)168;
map["Alpha"] = (char)65;
map["Beta"] = (char)66;
map["Chi"] = (char)67;
map["Delta"] = (char)68;
map["Epsilon"] = (char)69;
map["Eta"] = (char)72;
map["Gamma"] = (char)71;
map["Iota"] = (char)73;
map["Kappa"] = (char)75;
map["Lambda"] = (char)76;
map["Mu"] = (char)77;
map["Nu"] = (char)78;
map["Omega"] = (char)87;
map["Omicron"] = (char)79;
map["Phi"] = (char)70;
map["Pi"] = (char)80;
map["Prime"] = (char)178;
map["Psi"] = (char)89;
map["Rho"] = (char)82;
map["Sigma"] = (char)83;
map["Tau"] = (char)84;
map["Theta"] = (char)81;
map["Upsilon"] = (char)85;
map["Xi"] = (char)88;
map["Zeta"] = (char)90;
map["alefsym"] = (char)192;
map["alpha"] = (char)97;
map["and"] = (char)217;
map["ang"] = (char)208;
map["asymp"] = (char)187;
map["beta"] = (char)98;
map["cap"] = (char)199;
map["chi"] = (char)99;
map["clubs"] = (char)167;
map["cong"] = (char)64;
map["copy"] = (char)211;
map["crarr"] = (char)191;
map["cup"] = (char)200;
map["dArr"] = (char)223;
map["darr"] = (char)175;
map["delta"] = (char)100;
map["diams"] = (char)168;
map["divide"] = (char)184;
map["empty"] = (char)198;
map["epsilon"] = (char)101;
map["equiv"] = (char)186;
map["eta"] = (char)104;
map["euro"] = (char)240;
map["exist"] = (char)36;
map["forall"] = (char)34;
map["frasl"] = (char)164;
map["gamma"] = (char)103;
map["ge"] = (char)179;
map["hArr"] = (char)219;
map["harr"] = (char)171;
map["hearts"] = (char)169;
map["hellip"] = (char)188;
map["horizontal arrow extender"] = (char)190;
map["image"] = (char)193;
map["infin"] = (char)165;
map["int"] = (char)242;
map["iota"] = (char)105;
map["isin"] = (char)206;
map["kappa"] = (char)107;
map["lArr"] = (char)220;
map["lambda"] = (char)108;
map["lang"] = (char)225;
map["large brace extender"] = (char)239;
map["large integral extender"] = (char)244;
map["large left brace (bottom)"] = (char)238;
map["large left brace (middle)"] = (char)237;
map["large left brace (top)"] = (char)236;
map["large left bracket (bottom)"] = (char)235;
map["large left bracket (extender)"] = (char)234;
map["large left bracket (top)"] = (char)233;
map["large left parenthesis (bottom)"] = (char)232;
map["large left parenthesis (extender)"] = (char)231;
map["large left parenthesis (top)"] = (char)230;
map["large right brace (bottom)"] = (char)254;
map["large right brace (middle)"] = (char)253;
map["large right brace (top)"] = (char)252;
map["large right bracket (bottom)"] = (char)251;
map["large right bracket (extender)"] = (char)250;
map["large right bracket (top)"] = (char)249;
map["large right parenthesis (bottom)"] = (char)248;
map["large right parenthesis (extender)"] = (char)247;
map["large right parenthesis (top)"] = (char)246;
map["larr"] = (char)172;
map["le"] = (char)163;
map["lowast"] = (char)42;
map["loz"] = (char)224;
map["minus"] = (char)45;
map["mu"] = (char)109;
map["nabla"] = (char)209;
map["ne"] = (char)185;
map["not"] = (char)216;
map["notin"] = (char)207;
map["nsub"] = (char)203;
map["nu"] = (char)110;
map["omega"] = (char)119;
map["omicron"] = (char)111;
map["oplus"] = (char)197;
map["or"] = (char)218;
map["otimes"] = (char)196;
map["part"] = (char)182;
map["perp"] = (char)94;
map["phi"] = (char)102;
map["pi"] = (char)112;
map["piv"] = (char)118;
map["plusmn"] = (char)177;
map["prime"] = (char)162;
map["prod"] = (char)213;
map["prop"] = (char)181;
map["psi"] = (char)121;
map["rArr"] = (char)222;
map["radic"] = (char)214;
map["radical extender"] = (char)96;
map["rang"] = (char)241;
map["rarr"] = (char)174;
map["real"] = (char)194;
map["reg"] = (char)210;
map["rho"] = (char)114;
map["sdot"] = (char)215;
map["sigma"] = (char)115;
map["sigmaf"] = (char)86;
map["sim"] = (char)126;
map["spades"] = (char)170;
map["sub"] = (char)204;
map["sube"] = (char)205;
map["sum"] = (char)229;
map["sup"] = (char)201;
map["supe"] = (char)202;
map["tau"] = (char)116;
map["there4"] = (char)92;
map["theta"] = (char)113;
map["thetasym"] = (char)74;
map["times"] = (char)180;
map["trade"] = (char)212;
map["uArr"] = (char)221;
map["uarr"] = (char)173;
map["upsih"] = (char)161;
map["upsilon"] = (char)117;
map["vertical arrow extender"] = (char)189;
map["weierp"] = (char)195;
map["xi"] = (char)120;
map["zeta"] = (char)122;
}
/**
* Gets a chunk with a symbol character.
* @param e a symbol value (see Entities class: alfa is greek alfa,...)
* @param font the font if the symbol isn't found (otherwise Font.SYMBOL)
* @return a Chunk
*/
public static Chunk Get(String e, Font font) {
char s = GetCorrespondingSymbol(e);
if (s == '\0') {
try {
return new Chunk("" + (char)int.Parse(e), font);
}
catch (Exception) {
return new Chunk(e, font);
}
}
Font symbol = new Font(Font.SYMBOL, font.Size, font.Style, font.Color);
return new Chunk(s.ToString(), symbol);
}
/**
* Looks for the corresponding symbol in the font Symbol.
*
* @param name the name of the entity
* @return the corresponding character in font Symbol
*/
public static char GetCorrespondingSymbol(String name) {
if (map.ContainsKey(name))
return (char)map[name];
else
return '\0';
}
}
}

View File

@@ -0,0 +1,442 @@
using System;
using System.Collections;
using System.Text;
using System.Globalization;
/*
* $Id: EntitiesToUnicode.cs,v 1.3 2008/05/13 11:26:14 psoares33 Exp $
*
*
* Copyright 2003-2007 Paulo Soares and Bruno Lowagie.
*
* The contents of this file are subject to the Mozilla Public License Version 1.1
* (the "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the License.
*
* The Original Code is 'iText, a free JAVA-PDF library'.
*
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
* All Rights Reserved.
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
*
* Contributor(s): all the names of the contributors are added in the source code
* where applicable.
*
* Alternatively, the contents of this file may be used under the terms of the
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
* provisions of LGPL are applicable instead of those above. If you wish to
* allow use of your version of this file only under the terms of the LGPL
* License and not to allow others to use your version of this file under
* the MPL, indicate your decision by deleting the provisions above and
* replace them with the notice and other provisions required by the LGPL.
* If you do not delete the provisions above, a recipient may use your version
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the MPL as stated above or under the terms of the GNU
* Library General Public License as published by the Free Software Foundation;
* either version 2 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
* details.
*
* If you didn't download this code from the following link, you should check if
* you aren't using an obsolete version:
* http://www.lowagie.com/iText/
*/
namespace iTextSharp.text.xml.simpleparser {
/**
* This class contains entities that can be used in an entity tag.
*/
public class EntitiesToUnicode {
/**
* This is a map that contains the names of entities and their unicode value.
*/
public static readonly Hashtable map = new Hashtable();
static EntitiesToUnicode() {
map["nbsp"] = '\u00a0'; // no-break space = non-breaking space, U+00A0 ISOnum
map["iexcl"] = '\u00a1'; // inverted exclamation mark, U+00A1 ISOnum
map["cent"] = '\u00a2'; // cent sign, U+00A2 ISOnum
map["pound"] = '\u00a3'; // pound sign, U+00A3 ISOnum
map["curren"] = '\u00a4'; // currency sign, U+00A4 ISOnum
map["yen"] = '\u00a5'; // yen sign = yuan sign, U+00A5 ISOnum
map["brvbar"] = '\u00a6'; // broken bar = broken vertical bar, U+00A6 ISOnum
map["sect"] = '\u00a7'; // section sign, U+00A7 ISOnum
map["uml"] = '\u00a8'; // diaeresis = spacing diaeresis, U+00A8 ISOdia
map["copy"] = '\u00a9'; // copyright sign, U+00A9 ISOnum
map["ordf"] = '\u00aa'; // feminine ordinal indicator, U+00AA ISOnum
map["laquo"] = '\u00ab'; // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
map["not"] = '\u00ac'; // not sign, U+00AC ISOnum
map["shy"] = '\u00ad'; // soft hyphen = discretionary hyphen, U+00AD ISOnum
map["reg"] = '\u00ae'; // registered sign = registered trade mark sign, U+00AE ISOnum
map["macr"] = '\u00af'; // macron = spacing macron = overline = APL overbar, U+00AF ISOdia
map["deg"] = '\u00b0'; // degree sign, U+00B0 ISOnum
map["plusmn"] = '\u00b1'; // plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
map["sup2"] = '\u00b2'; // superscript two = superscript digit two = squared, U+00B2 ISOnum
map["sup3"] = '\u00b3'; // superscript three = superscript digit three = cubed, U+00B3 ISOnum
map["acute"] = '\u00b4'; // acute accent = spacing acute, U+00B4 ISOdia
map["micro"] = '\u00b5'; // micro sign, U+00B5 ISOnum
map["para"] = '\u00b6'; // pilcrow sign = paragraph sign, U+00B6 ISOnum
map["middot"] = '\u00b7'; // middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
map["cedil"] = '\u00b8'; // cedilla = spacing cedilla, U+00B8 ISOdia
map["sup1"] = '\u00b9'; // superscript one = superscript digit one, U+00B9 ISOnum
map["ordm"] = '\u00ba'; // masculine ordinal indicator, U+00BA ISOnum
map["raquo"] = '\u00bb'; // right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
map["frac14"] = '\u00bc'; // vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
map["frac12"] = '\u00bd'; // vulgar fraction one half = fraction one half, U+00BD ISOnum
map["frac34"] = '\u00be'; // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
map["iquest"] = '\u00bf'; // inverted question mark = turned question mark, U+00BF ISOnum
map["Agrave"] = '\u00c0'; // latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
map["Aacute"] = '\u00c1'; // latin capital letter A with acute, U+00C1 ISOlat1
map["Acirc"] = '\u00c2'; // latin capital letter A with circumflex, U+00C2 ISOlat1
map["Atilde"] = '\u00c3'; // latin capital letter A with tilde, U+00C3 ISOlat1
map["Auml"] = '\u00c4'; // latin capital letter A with diaeresis, U+00C4 ISOlat1
map["Aring"] = '\u00c5'; // latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
map["AElig"] = '\u00c6'; // latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
map["Ccedil"] = '\u00c7'; // latin capital letter C with cedilla, U+00C7 ISOlat1
map["Egrave"] = '\u00c8'; // latin capital letter E with grave, U+00C8 ISOlat1
map["Eacute"] = '\u00c9'; // latin capital letter E with acute, U+00C9 ISOlat1
map["Ecirc"] = '\u00ca'; // latin capital letter E with circumflex, U+00CA ISOlat1
map["Euml"] = '\u00cb'; // latin capital letter E with diaeresis, U+00CB ISOlat1
map["Igrave"] = '\u00cc'; // latin capital letter I with grave, U+00CC ISOlat1
map["Iacute"] = '\u00cd'; // latin capital letter I with acute, U+00CD ISOlat1
map["Icirc"] = '\u00ce'; // latin capital letter I with circumflex, U+00CE ISOlat1
map["Iuml"] = '\u00cf'; // latin capital letter I with diaeresis, U+00CF ISOlat1
map["ETH"] = '\u00d0'; // latin capital letter ETH, U+00D0 ISOlat1
map["Ntilde"] = '\u00d1'; // latin capital letter N with tilde, U+00D1 ISOlat1
map["Ograve"] = '\u00d2'; // latin capital letter O with grave, U+00D2 ISOlat1
map["Oacute"] = '\u00d3'; // latin capital letter O with acute, U+00D3 ISOlat1
map["Ocirc"] = '\u00d4'; // latin capital letter O with circumflex, U+00D4 ISOlat1
map["Otilde"] = '\u00d5'; // latin capital letter O with tilde, U+00D5 ISOlat1
map["Ouml"] = '\u00d6'; // latin capital letter O with diaeresis, U+00D6 ISOlat1
map["times"] = '\u00d7'; // multiplication sign, U+00D7 ISOnum
map["Oslash"] = '\u00d8'; // latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
map["Ugrave"] = '\u00d9'; // latin capital letter U with grave, U+00D9 ISOlat1
map["Uacute"] = '\u00da'; // latin capital letter U with acute, U+00DA ISOlat1
map["Ucirc"] = '\u00db'; // latin capital letter U with circumflex, U+00DB ISOlat1
map["Uuml"] = '\u00dc'; // latin capital letter U with diaeresis, U+00DC ISOlat1
map["Yacute"] = '\u00dd'; // latin capital letter Y with acute, U+00DD ISOlat1
map["THORN"] = '\u00de'; // latin capital letter THORN, U+00DE ISOlat1
map["szlig"] = '\u00df'; // latin small letter sharp s = ess-zed, U+00DF ISOlat1
map["agrave"] = '\u00e0'; // latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
map["aacute"] = '\u00e1'; // latin small letter a with acute, U+00E1 ISOlat1
map["acirc"] = '\u00e2'; // latin small letter a with circumflex, U+00E2 ISOlat1
map["atilde"] = '\u00e3'; // latin small letter a with tilde, U+00E3 ISOlat1
map["auml"] = '\u00e4'; // latin small letter a with diaeresis, U+00E4 ISOlat1
map["aring"] = '\u00e5'; // latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
map["aelig"] = '\u00e6'; // latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
map["ccedil"] = '\u00e7'; // latin small letter c with cedilla, U+00E7 ISOlat1
map["egrave"] = '\u00e8'; // latin small letter e with grave, U+00E8 ISOlat1
map["eacute"] = '\u00e9'; // latin small letter e with acute, U+00E9 ISOlat1
map["ecirc"] = '\u00ea'; // latin small letter e with circumflex, U+00EA ISOlat1
map["euml"] = '\u00eb'; // latin small letter e with diaeresis, U+00EB ISOlat1
map["igrave"] = '\u00ec'; // latin small letter i with grave, U+00EC ISOlat1
map["iacute"] = '\u00ed'; // latin small letter i with acute, U+00ED ISOlat1
map["icirc"] = '\u00ee'; // latin small letter i with circumflex, U+00EE ISOlat1
map["iuml"] = '\u00ef'; // latin small letter i with diaeresis, U+00EF ISOlat1
map["eth"] = '\u00f0'; // latin small letter eth, U+00F0 ISOlat1
map["ntilde"] = '\u00f1'; // latin small letter n with tilde, U+00F1 ISOlat1
map["ograve"] = '\u00f2'; // latin small letter o with grave, U+00F2 ISOlat1
map["oacute"] = '\u00f3'; // latin small letter o with acute, U+00F3 ISOlat1
map["ocirc"] = '\u00f4'; // latin small letter o with circumflex, U+00F4 ISOlat1
map["otilde"] = '\u00f5'; // latin small letter o with tilde, U+00F5 ISOlat1
map["ouml"] = '\u00f6'; // latin small letter o with diaeresis, U+00F6 ISOlat1
map["divide"] = '\u00f7'; // division sign, U+00F7 ISOnum
map["oslash"] = '\u00f8'; // latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
map["ugrave"] = '\u00f9'; // latin small letter u with grave, U+00F9 ISOlat1
map["uacute"] = '\u00fa'; // latin small letter u with acute, U+00FA ISOlat1
map["ucirc"] = '\u00fb'; // latin small letter u with circumflex, U+00FB ISOlat1
map["uuml"] = '\u00fc'; // latin small letter u with diaeresis, U+00FC ISOlat1
map["yacute"] = '\u00fd'; // latin small letter y with acute, U+00FD ISOlat1
map["thorn"] = '\u00fe'; // latin small letter thorn, U+00FE ISOlat1
map["yuml"] = '\u00ff'; // latin small letter y with diaeresis, U+00FF ISOlat1
// Latin Extended-B
map["fnof"] = '\u0192'; // latin small f with hook = function = florin, U+0192 ISOtech
// Greek
map["Alpha"] = '\u0391'; // greek capital letter alpha, U+0391
map["Beta"] = '\u0392'; // greek capital letter beta, U+0392
map["Gamma"] = '\u0393'; // greek capital letter gamma, U+0393 ISOgrk3
map["Delta"] = '\u0394'; // greek capital letter delta, U+0394 ISOgrk3
map["Epsilon"] = '\u0395'; // greek capital letter epsilon, U+0395
map["Zeta"] = '\u0396'; // greek capital letter zeta, U+0396
map["Eta"] = '\u0397'; // greek capital letter eta, U+0397
map["Theta"] = '\u0398'; // greek capital letter theta, U+0398 ISOgrk3
map["Iota"] = '\u0399'; // greek capital letter iota, U+0399
map["Kappa"] = '\u039a'; // greek capital letter kappa, U+039A
map["Lambda"] = '\u039b'; // greek capital letter lambda, U+039B ISOgrk3
map["Mu"] = '\u039c'; // greek capital letter mu, U+039C
map["Nu"] = '\u039d'; // greek capital letter nu, U+039D
map["Xi"] = '\u039e'; // greek capital letter xi, U+039E ISOgrk3
map["Omicron"] = '\u039f'; // greek capital letter omicron, U+039F
map["Pi"] = '\u03a0'; // greek capital letter pi, U+03A0 ISOgrk3
map["Rho"] = '\u03a1'; // greek capital letter rho, U+03A1
// there is no Sigmaf, and no U+03A2 character either
map["Sigma"] = '\u03a3'; // greek capital letter sigma, U+03A3 ISOgrk3
map["Tau"] = '\u03a4'; // greek capital letter tau, U+03A4
map["Upsilon"] = '\u03a5'; // greek capital letter upsilon, U+03A5 ISOgrk3
map["Phi"] = '\u03a6'; // greek capital letter phi, U+03A6 ISOgrk3
map["Chi"] = '\u03a7'; // greek capital letter chi, U+03A7
map["Psi"] = '\u03a8'; // greek capital letter psi, U+03A8 ISOgrk3
map["Omega"] = '\u03a9'; // greek capital letter omega, U+03A9 ISOgrk3
map["alpha"] = '\u03b1'; // greek small letter alpha, U+03B1 ISOgrk3
map["beta"] = '\u03b2'; // greek small letter beta, U+03B2 ISOgrk3
map["gamma"] = '\u03b3'; // greek small letter gamma, U+03B3 ISOgrk3
map["delta"] = '\u03b4'; // greek small letter delta, U+03B4 ISOgrk3
map["epsilon"] = '\u03b5'; // greek small letter epsilon, U+03B5 ISOgrk3
map["zeta"] = '\u03b6'; // greek small letter zeta, U+03B6 ISOgrk3
map["eta"] = '\u03b7'; // greek small letter eta, U+03B7 ISOgrk3
map["theta"] = '\u03b8'; // greek small letter theta, U+03B8 ISOgrk3
map["iota"] = '\u03b9'; // greek small letter iota, U+03B9 ISOgrk3
map["kappa"] = '\u03ba'; // greek small letter kappa, U+03BA ISOgrk3
map["lambda"] = '\u03bb'; // greek small letter lambda, U+03BB ISOgrk3
map["mu"] = '\u03bc'; // greek small letter mu, U+03BC ISOgrk3
map["nu"] = '\u03bd'; // greek small letter nu, U+03BD ISOgrk3
map["xi"] = '\u03be'; // greek small letter xi, U+03BE ISOgrk3
map["omicron"] = '\u03bf'; // greek small letter omicron, U+03BF NEW
map["pi"] = '\u03c0'; // greek small letter pi, U+03C0 ISOgrk3
map["rho"] = '\u03c1'; // greek small letter rho, U+03C1 ISOgrk3
map["sigmaf"] = '\u03c2'; // greek small letter final sigma, U+03C2 ISOgrk3
map["sigma"] = '\u03c3'; // greek small letter sigma, U+03C3 ISOgrk3
map["tau"] = '\u03c4'; // greek small letter tau, U+03C4 ISOgrk3
map["upsilon"] = '\u03c5'; // greek small letter upsilon, U+03C5 ISOgrk3
map["phi"] = '\u03c6'; // greek small letter phi, U+03C6 ISOgrk3
map["chi"] = '\u03c7'; // greek small letter chi, U+03C7 ISOgrk3
map["psi"] = '\u03c8'; // greek small letter psi, U+03C8 ISOgrk3
map["omega"] = '\u03c9'; // greek small letter omega, U+03C9 ISOgrk3
map["thetasym"] = '\u03d1'; // greek small letter theta symbol, U+03D1 NEW
map["upsih"] = '\u03d2'; // greek upsilon with hook symbol, U+03D2 NEW
map["piv"] = '\u03d6'; // greek pi symbol, U+03D6 ISOgrk3
// General Punctuation
map["bull"] = '\u2022'; // bullet = black small circle, U+2022 ISOpub
// bullet is NOT the same as bullet operator, U+2219
map["hellip"] = '\u2026'; // horizontal ellipsis = three dot leader, U+2026 ISOpub
map["prime"] = '\u2032'; // prime = minutes = feet, U+2032 ISOtech
map["Prime"] = '\u2033'; // double prime = seconds = inches, U+2033 ISOtech
map["oline"] = '\u203e'; // overline = spacing overscore, U+203E NEW
map["frasl"] = '\u2044'; // fraction slash, U+2044 NEW
// Letterlike Symbols
map["weierp"] = '\u2118'; // script capital P = power set = Weierstrass p, U+2118 ISOamso
map["image"] = '\u2111'; // blackletter capital I = imaginary part, U+2111 ISOamso
map["real"] = '\u211c'; // blackletter capital R = real part symbol, U+211C ISOamso
map["trade"] = '\u2122'; // trade mark sign, U+2122 ISOnum
map["alefsym"] = '\u2135'; // alef symbol = first transfinite cardinal, U+2135 NEW
// alef symbol is NOT the same as hebrew letter alef,
// U+05D0 although the same glyph could be used to depict both characters
// Arrows
map["larr"] = '\u2190'; // leftwards arrow, U+2190 ISOnum
map["uarr"] = '\u2191'; // upwards arrow, U+2191 ISOnum
map["rarr"] = '\u2192'; // rightwards arrow, U+2192 ISOnum
map["darr"] = '\u2193'; // downwards arrow, U+2193 ISOnum
map["harr"] = '\u2194'; // left right arrow, U+2194 ISOamsa
map["crarr"] = '\u21b5'; // downwards arrow with corner leftwards = carriage return, U+21B5 NEW
map["lArr"] = '\u21d0'; // leftwards double arrow, U+21D0 ISOtech
// ISO 10646 does not say that lArr is the same as the 'is implied by' arrow
// but also does not have any other character for that function. So ? lArr can
// be used for 'is implied by' as ISOtech suggests
map["uArr"] = '\u21d1'; // upwards double arrow, U+21D1 ISOamsa
map["rArr"] = '\u21d2'; // rightwards double arrow, U+21D2 ISOtech
// ISO 10646 does not say this is the 'implies' character but does not have
// another character with this function so ?
// rArr can be used for 'implies' as ISOtech suggests
map["dArr"] = '\u21d3'; // downwards double arrow, U+21D3 ISOamsa
map["hArr"] = '\u21d4'; // left right double arrow, U+21D4 ISOamsa
// Mathematical Operators
map["forall"] = '\u2200'; // for all, U+2200 ISOtech
map["part"] = '\u2202'; // partial differential, U+2202 ISOtech
map["exist"] = '\u2203'; // there exists, U+2203 ISOtech
map["empty"] = '\u2205'; // empty set = null set = diameter, U+2205 ISOamso
map["nabla"] = '\u2207'; // nabla = backward difference, U+2207 ISOtech
map["isin"] = '\u2208'; // element of, U+2208 ISOtech
map["notin"] = '\u2209'; // not an element of, U+2209 ISOtech
map["ni"] = '\u220b'; // contains as member, U+220B ISOtech
// should there be a more memorable name than 'ni'?
map["prod"] = '\u220f'; // n-ary product = product sign, U+220F ISOamsb
// prod is NOT the same character as U+03A0 'greek capital letter pi' though
// the same glyph might be used for both
map["sum"] = '\u2211'; // n-ary sumation, U+2211 ISOamsb
// sum is NOT the same character as U+03A3 'greek capital letter sigma'
// though the same glyph might be used for both
map["minus"] = '\u2212'; // minus sign, U+2212 ISOtech
map["lowast"] = '\u2217'; // asterisk operator, U+2217 ISOtech
map["radic"] = '\u221a'; // square root = radical sign, U+221A ISOtech
map["prop"] = '\u221d'; // proportional to, U+221D ISOtech
map["infin"] = '\u221e'; // infinity, U+221E ISOtech
map["ang"] = '\u2220'; // angle, U+2220 ISOamso
map["and"] = '\u2227'; // logical and = wedge, U+2227 ISOtech
map["or"] = '\u2228'; // logical or = vee, U+2228 ISOtech
map["cap"] = '\u2229'; // intersection = cap, U+2229 ISOtech
map["cup"] = '\u222a'; // union = cup, U+222A ISOtech
map["int"] = '\u222b'; // integral, U+222B ISOtech
map["there4"] = '\u2234'; // therefore, U+2234 ISOtech
map["sim"] = '\u223c'; // tilde operator = varies with = similar to, U+223C ISOtech
// tilde operator is NOT the same character as the tilde, U+007E,
// although the same glyph might be used to represent both
map["cong"] = '\u2245'; // approximately equal to, U+2245 ISOtech
map["asymp"] = '\u2248'; // almost equal to = asymptotic to, U+2248 ISOamsr
map["ne"] = '\u2260'; // not equal to, U+2260 ISOtech
map["equiv"] = '\u2261'; // identical to, U+2261 ISOtech
map["le"] = '\u2264'; // less-than or equal to, U+2264 ISOtech
map["ge"] = '\u2265'; // greater-than or equal to, U+2265 ISOtech
map["sub"] = '\u2282'; // subset of, U+2282 ISOtech
map["sup"] = '\u2283'; // superset of, U+2283 ISOtech
// note that nsup, 'not a superset of, U+2283' is not covered by the Symbol
// font encoding and is not included. Should it be, for symmetry?
// It is in ISOamsn
map["nsub"] = '\u2284'; // not a subset of, U+2284 ISOamsn
map["sube"] = '\u2286'; // subset of or equal to, U+2286 ISOtech
map["supe"] = '\u2287'; // superset of or equal to, U+2287 ISOtech
map["oplus"] = '\u2295'; // circled plus = direct sum, U+2295 ISOamsb
map["otimes"] = '\u2297'; // circled times = vector product, U+2297 ISOamsb
map["perp"] = '\u22a5'; // up tack = orthogonal to = perpendicular, U+22A5 ISOtech
map["sdot"] = '\u22c5'; // dot operator, U+22C5 ISOamsb
// dot operator is NOT the same character as U+00B7 middle dot
// Miscellaneous Technical
map["lceil"] = '\u2308'; // left ceiling = apl upstile, U+2308 ISOamsc
map["rceil"] = '\u2309'; // right ceiling, U+2309 ISOamsc
map["lfloor"] = '\u230a'; // left floor = apl downstile, U+230A ISOamsc
map["rfloor"] = '\u230b'; // right floor, U+230B ISOamsc
map["lang"] = '\u2329'; // left-pointing angle bracket = bra, U+2329 ISOtech
// lang is NOT the same character as U+003C 'less than'
// or U+2039 'single left-pointing angle quotation mark'
map["rang"] = '\u232a'; // right-pointing angle bracket = ket, U+232A ISOtech
// rang is NOT the same character as U+003E 'greater than'
// or U+203A 'single right-pointing angle quotation mark'
// Geometric Shapes
map["loz"] = '\u25ca'; // lozenge, U+25CA ISOpub
// Miscellaneous Symbols
map["spades"] = '\u2660'; // black spade suit, U+2660 ISOpub
// black here seems to mean filled as opposed to hollow
map["clubs"] = '\u2663'; // black club suit = shamrock, U+2663 ISOpub
map["hearts"] = '\u2665'; // black heart suit = valentine, U+2665 ISOpub
map["diams"] = '\u2666'; // black diamond suit, U+2666 ISOpub
// C0 Controls and Basic Latin
map["quot"] = '\u0022'; // quotation mark = APL quote, U+0022 ISOnum
map["amp"] = '\u0026'; // ampersand, U+0026 ISOnum
map["apos"] = '\'';
map["lt"] = '\u003c'; // less-than sign, U+003C ISOnum
map["gt"] = '\u003e'; // greater-than sign, U+003E ISOnum
// Latin Extended-A
map["OElig"] = '\u0152'; // latin capital ligature OE, U+0152 ISOlat2
map["oelig"] = '\u0153'; // latin small ligature oe, U+0153 ISOlat2
// ligature is a misnomer, this is a separate character in some languages
map["Scaron"] = '\u0160'; // latin capital letter S with caron, U+0160 ISOlat2
map["scaron"] = '\u0161'; // latin small letter s with caron, U+0161 ISOlat2
map["Yuml"] = '\u0178'; // latin capital letter Y with diaeresis, U+0178 ISOlat2
// Spacing Modifier Letters
map["circ"] = '\u02c6'; // modifier letter circumflex accent, U+02C6 ISOpub
map["tilde"] = '\u02dc'; // small tilde, U+02DC ISOdia
// General Punctuation
map["ensp"] = '\u2002'; // en space, U+2002 ISOpub
map["emsp"] = '\u2003'; // em space, U+2003 ISOpub
map["thinsp"] = '\u2009'; // thin space, U+2009 ISOpub
map["zwnj"] = '\u200c'; // zero width non-joiner, U+200C NEW RFC 2070
map["zwj"] = '\u200d'; // zero width joiner, U+200D NEW RFC 2070
map["lrm"] = '\u200e'; // left-to-right mark, U+200E NEW RFC 2070
map["rlm"] = '\u200f'; // right-to-left mark, U+200F NEW RFC 2070
map["ndash"] = '\u2013'; // en dash, U+2013 ISOpub
map["mdash"] = '\u2014'; // em dash, U+2014 ISOpub
map["lsquo"] = '\u2018'; // left single quotation mark, U+2018 ISOnum
map["rsquo"] = '\u2019'; // right single quotation mark, U+2019 ISOnum
map["sbquo"] = '\u201a'; // single low-9 quotation mark, U+201A NEW
map["ldquo"] = '\u201c'; // left double quotation mark, U+201C ISOnum
map["rdquo"] = '\u201d'; // right double quotation mark, U+201D ISOnum
map["bdquo"] = '\u201e'; // double low-9 quotation mark, U+201E NEW
map["dagger"] = '\u2020'; // dagger, U+2020 ISOpub
map["Dagger"] = '\u2021'; // double dagger, U+2021 ISOpub
map["permil"] = '\u2030'; // per mille sign, U+2030 ISOtech
map["lsaquo"] = '\u2039'; // single left-pointing angle quotation mark, U+2039 ISO proposed
// lsaquo is proposed but not yet ISO standardized
map["rsaquo"] = '\u203a'; // single right-pointing angle quotation mark, U+203A ISO proposed
// rsaquo is proposed but not yet ISO standardized
map["euro"] = '\u20ac'; // euro sign, U+20AC NEW
}
/**
* Translates an entity to a unicode character.
*
* @param name the name of the entity
* @return the corresponding unicode character
*/
public static char DecodeEntity(String name) {
if (name.StartsWith("#x")) {
try {
return (char)int.Parse(name.Substring(2), NumberStyles.AllowHexSpecifier);
}
catch {
return '\0';
}
}
if (name.StartsWith("#")) {
try {
return (char)int.Parse(name.Substring(1));
}
catch {
return '\0';
}
}
object c = map[name];
if (c == null)
return '\0';
else
return (char)c;
}
/**
* Translates a String with entities (&...;) to a String without entities,
* replacing the entity with the right (unicode) character.
*/
public static String DecodeString(String s) {
int pos_amp = s.IndexOf('&');
if (pos_amp == -1) return s;
int pos_sc;
int pos_a;
StringBuilder buf = new StringBuilder(s.Substring(0, pos_amp));
char replace;
while (true) {
pos_sc = s.IndexOf(';', pos_amp);
if (pos_sc == -1) {
buf.Append(s.Substring(pos_amp));
return buf.ToString();
}
pos_a = s.IndexOf('&', pos_amp + 1);
while (pos_a != -1 && pos_a < pos_sc) {
buf.Append(s.Substring(pos_amp, pos_a - pos_amp));
pos_amp = pos_a;
pos_a = s.IndexOf('&', pos_amp + 1);
}
replace = DecodeEntity(s.Substring(pos_amp + 1, pos_sc - (pos_amp + 1)));
if (s.Length < pos_sc + 1) {
return buf.ToString();
}
if (replace == '\0') {
buf.Append(s.Substring(pos_amp, pos_sc + 1 - pos_amp));
}
else {
buf.Append(replace);
}
pos_amp = s.IndexOf('&', pos_sc);
if (pos_amp == -1) {
buf.Append(s.Substring(pos_sc + 1));
return buf.ToString();
}
else {
buf.Append(s.Substring(pos_sc + 1, pos_amp - (pos_sc + 1)));
}
}
}
}
}

View File

@@ -0,0 +1,81 @@
using System;
using System.Collections;
/*
* Copyright 2003 Paulo Soares
*
* The contents of this file are subject to the Mozilla Public License Version 1.1
* (the "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the License.
*
* The Original Code is 'iText, a free JAVA-PDF library'.
*
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
* All Rights Reserved.
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
*
* Contributor(s): all the names of the contributors are added in the source code
* where applicable.
*
* Alternatively, the contents of this file may be used under the terms of the
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
* provisions of LGPL are applicable instead of those above. If you wish to
* allow use of your version of this file only under the terms of the LGPL
* License and not to allow others to use your version of this file under
* the MPL, indicate your decision by deleting the provisions above and
* replace them with the notice and other provisions required by the LGPL.
* If you do not delete the provisions above, a recipient may use your version
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the MPL as stated above or under the terms of the GNU
* Library General Public License as published by the Free Software Foundation;
* either version 2 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
* details.
*
* If you didn't download this code from the following link, you should check if
* you aren't using an obsolete version:
* http://www.lowagie.com/iText/
*/
namespace iTextSharp.text.xml.simpleparser {
/**
* The handler for the events fired by <CODE>SimpleXMLParser</CODE>.
* @author Paulo Soares (psoares@consiste.pt)
*/
public interface ISimpleXMLDocHandler {
/**
* Called when a start tag is found.
* @param tag the tag name
* @param h the tag's attributes
*/
void StartElement(String tag, Hashtable h);
/**
* Called when an end tag is found.
* @param tag the tag name
*/
void EndElement(String tag);
/**
* Called when the document starts to be parsed.
*/
void StartDocument();
/**
* Called after the document is parsed.
*/
void EndDocument();
/**
* Called when a text element is found.
* @param str the text element, probably a fragment.
*/
void Text(String str);
}
}

View File

@@ -0,0 +1,61 @@
using System;
/*
* Copyright 2003 Paulo Soares
*
* The contents of this file are subject to the Mozilla Public License Version 1.1
* (the "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the License.
*
* The Original Code is 'iText, a free JAVA-PDF library'.
*
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
* All Rights Reserved.
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
*
* Contributor(s): all the names of the contributors are added in the source code
* where applicable.
*
* Alternatively, the contents of this file may be used under the terms of the
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
* provisions of LGPL are applicable instead of those above. If you wish to
* allow use of your version of this file only under the terms of the LGPL
* License and not to allow others to use your version of this file under
* the MPL, indicate your decision by deleting the provisions above and
* replace them with the notice and other provisions required by the LGPL.
* If you do not delete the provisions above, a recipient may use your version
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the MPL as stated above or under the terms of the GNU
* Library General Public License as published by the Free Software Foundation;
* either version 2 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
* details.
*
* If you didn't download this code from the following link, you should check if
* you aren't using an obsolete version:
* http://www.lowagie.com/iText/
*/
namespace iTextSharp.text.xml.simpleparser {
/**
* The handler for the events fired by <CODE>SimpleXMLParser</CODE>.
* @author Paulo Soares (psoares@consiste.pt)
*/
public interface ISimpleXMLDocHandlerComment {
/**
* Called when a comment is found.
* @param text the comment text
*/
void Comment(String text);
}
}

View File

@@ -0,0 +1,551 @@
using System;
using System.Collections;
using System.Text;
/*
* $Id: IanaEncodings.cs,v 1.4 2008/05/13 11:26:14 psoares33 Exp $
*
*
* Copyright 2003-2007 Paulo Soares and Bruno Lowagie.
*
* The contents of this file are subject to the Mozilla Public License Version 1.1
* (the "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the License.
*
* The Original Code is 'iText, a free JAVA-PDF library'.
*
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
* All Rights Reserved.
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
*
* Contributor(s): all the names of the contributors are added in the source code
* where applicable.
*
* Alternatively, the contents of this file may be used under the terms of the
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
* provisions of LGPL are applicable instead of those above. If you wish to
* allow use of your version of this file only under the terms of the LGPL
* License and not to allow others to use your version of this file under
* the MPL, indicate your decision by deleting the provisions above and
* replace them with the notice and other provisions required by the LGPL.
* If you do not delete the provisions above, a recipient may use your version
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the MPL as stated above or under the terms of the GNU
* Library General Public License as published by the Free Software Foundation;
* either version 2 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
* details.
*
* If you didn't download this code from the following link, you should check if
* you aren't using an obsolete version:
* http://www.lowagie.com/iText/
*
* The values used in this class are based on class org.apache.xercis.util.EncodingMap
* http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/EncodingMap.java?view=markup
* This class was originally published under the following license:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
namespace iTextSharp.text.xml.simpleparser {
/**
* Translates a IANA encoding name to a Java encoding.
*/
public class IanaEncodings {
/** The object that maps IANA to Java encodings. */
private static readonly Hashtable map = new Hashtable();
static IanaEncodings() {
// add IANA to .NET encoding mappings.
map["CP037"] = 37;
map["CSIBM037"] = 37;
map["EBCDIC-CP-CA"] = 37;
map["EBCDIC-CP-NL"] = 37;
map["EBCDIC-CP-US"] = 37;
map["EBCDIC-CP-WT"] = 37;
map["IBM037"] = 37;
map["CP437"] = 437;
map["CSPC8CODEPAGE437"] = 437;
map["IBM437"] = 437;
map["CP500"] = 500;
map["CSIBM500"] = 500;
map["EBCDIC-CP-BE"] = 500;
map["EBCDIC-CP-CH"] = 500;
map["IBM500"] = 500;
map["ASMO-708"] = 708;
map["DOS-720"] = 720;
map["IBM737"] = 737;
map["IBM775"] = 775;
map["CP850"] = 850;
map["IBM850"] = 850;
map["CP852"] = 852;
map["IBM852"] = 852;
map["CP855"] = 855;
map["IBM855"] = 855;
map["CP857"] = 857;
map["IBM857"] = 857;
map["CCSID00858"] = 858;
map["CP00858"] = 858;
map["CP858"] = 858;
map["IBM00858"] = 858;
map["PC-MULTILINGUAL-850+EURO"] = 858;
map["CP860"] = 860;
map["IBM860"] = 860;
map["CP861"] = 861;
map["IBM861"] = 861;
map["CP862"] = 862;
map["DOS-862"] = 862;
map["IBM862"] = 862;
map["CP863"] = 863;
map["IBM863"] = 863;
map["CP864"] = 864;
map["IBM864"] = 864;
map["CP865"] = 865;
map["IBM865"] = 865;
map["CP866"] = 866;
map["IBM866"] = 866;
map["CP869"] = 869;
map["IBM869"] = 869;
map["CP870"] = 870;
map["CSIBM870"] = 870;
map["EBCDIC-CP-ROECE"] = 870;
map["EBCDIC-CP-YU"] = 870;
map["IBM870"] = 870;
map["DOS-874"] = 874;
map["ISO-8859-11"] = 874;
map["MS874"] = 874;
map["TIS620"] = 874;
map["TIS-620"] = 874;
map["WINDOWS-874"] = 874;
map["CP875"] = 875;
map["CSSHIFTJIS"] = 932;
map["CSWINDOWS31J"] = 932;
map["MS932"] = 932;
map["MS_KANJI"] = 932;
map["SHIFT-JIS"] = 932;
map["SHIFT_JIS"] = 932;
map["SJIS"] = 932;
map["X-MS-CP932"] = 932;
map["X-SJIS"] = 932;
map["CHINESE"] = 936;
map["CN-GB"] = 936;
map["CSGB2312"] = 936;
map["CSGB231280"] = 936;
map["CSISO58GB231280"] = 936;
map["GB2312"] = 936;
map["GB2312-80"] = 936;
map["GB231280"] = 936;
map["GB_2312-80"] = 936;
map["GBK"] = 936;
map["ISO-IR-58"] = 936;
map["MS936"] = 936;
map["CSKSC56011987"] = 949;
map["ISO-IR-149"] = 949;
map["KOREAN"] = 949;
map["KS-C-5601"] = 949;
map["KS-C5601"] = 949;
map["KS_C_5601"] = 949;
map["KS_C_5601-1987"] = 949;
map["KS_C_5601-1989"] = 949;
map["KS_C_5601_1987"] = 949;
map["KSC5601"] = 949;
map["KSC_5601"] = 949;
map["MS949"] = 949;
map["BIG5"] = 950;
map["BIG5-HKSCS"] = 950;
map["CN-BIG5"] = 950;
map["CSBIG5"] = 950;
map["MS950"] = 950;
map["X-X-BIG5"] = 950;
map["CP1026"] = 1026;
map["CSIBM1026"] = 1026;
map["IBM1026"] = 1026;
map["IBM01047"] = 1047;
map["CCSID01140"] = 1140;
map["CP01140"] = 1140;
map["EBCDIC-US-37+EURO"] = 1140;
map["IBM01140"] = 1140;
map["CCSID01141"] = 1141;
map["CP01141"] = 1141;
map["EBCDIC-DE-273+EURO"] = 1141;
map["IBM01141"] = 1141;
map["CCSID01142"] = 1142;
map["CP01142"] = 1142;
map["EBCDIC-DK-277+EURO"] = 1142;
map["EBCDIC-NO-277+EURO"] = 1142;
map["IBM01142"] = 1142;
map["CCSID01143"] = 1143;
map["CP01143"] = 1143;
map["EBCDIC-FI-278+EURO"] = 1143;
map["EBCDIC-SE-278+EURO"] = 1143;
map["IBM01143"] = 1143;
map["CCSID01144"] = 1144;
map["CP01144"] = 1144;
map["EBCDIC-IT-280+EURO"] = 1144;
map["IBM01144"] = 1144;
map["CCSID01145"] = 1145;
map["CP01145"] = 1145;
map["EBCDIC-ES-284+EURO"] = 1145;
map["IBM01145"] = 1145;
map["CCSID01146"] = 1146;
map["CP01146"] = 1146;
map["EBCDIC-GB-285+EURO"] = 1146;
map["IBM01146"] = 1146;
map["CCSID01147"] = 1147;
map["CP01147"] = 1147;
map["EBCDIC-FR-297+EURO"] = 1147;
map["IBM01147"] = 1147;
map["CCSID01148"] = 1148;
map["CP01148"] = 1148;
map["EBCDIC-INTERNATIONAL-500+EURO"] = 1148;
map["IBM01148"] = 1148;
map["CCSID01149"] = 1149;
map["CP01149"] = 1149;
map["EBCDIC-IS-871+EURO"] = 1149;
map["IBM01149"] = 1149;
map["ISO-10646-UCS-2"] = 1200;
map["UCS-2"] = 1200;
map["UNICODE"] = 1200;
map["UTF-16"] = 1200;
map["UTF-16LE"] = 1200;
map["UNICODELITTLEUNMARKED"] = 1200;
map["UNICODELITTLE"] = 1200;
map["UNICODEFFFE"] = 1201;
map["UTF-16BE"] = 1201;
map["UNICODEBIGUNMARKED"] = 1201;
map["UNICODEBIG"] = 1201;
map["CP1250"] = 1250;
map["WINDOWS-1250"] = 1250;
map["X-CP1250"] = 1250;
map["CP1251"] = 1251;
map["WINDOWS-1251"] = 1251;
map["X-CP1251"] = 1251;
map["CP1252"] = 1252;
map["WINDOWS-1252"] = 1252;
map["X-ANSI"] = 1252;
map["CP1253"] = 1253;
map["WINDOWS-1253"] = 1253;
map["CP1254"] = 1254;
map["WINDOWS-1254"] = 1254;
map["CP1255"] = 1255;
map["WINDOWS-1255"] = 1255;
map["CP1256"] = 1256;
map["WINDOWS-1256"] = 1256;
map["CP1257"] = 1257;
map["WINDOWS-1257"] = 1257;
map["CP1258"] = 1258;
map["WINDOWS-1258"] = 1258;
map["JOHAB"] = 1361;
map["MACINTOSH"] = 10000;
map["MACROMAN"] = 10000;
map["X-MAC-JAPANESE"] = 10001;
map["X-MAC-CHINESETRAD"] = 10002;
map["X-MAC-KOREAN"] = 10003;
map["MACARABIC"] = 10004;
map["X-MAC-ARABIC"] = 10004;
map["MACHEBREW"] = 10005;
map["X-MAC-HEBREW"] = 10005;
map["MACGREEK"] = 10006;
map["X-MAC-GREEK"] = 10006;
map["MACCYRILLIC"] = 10007;
map["X-MAC-CYRILLIC"] = 10007;
map["X-MAC-CHINESESIMP"] = 10008;
map["MACROMANIA"] = 10010;
map["MACROMANIAN"] = 10010;
map["X-MAC-ROMANIAN"] = 10010;
map["MACUKRAINE"] = 10017;
map["MACUKRAINIAN"] = 10017;
map["X-MAC-UKRAINIAN"] = 10017;
map["MACTHAI"] = 10021;
map["X-MAC-THAI"] = 10021;
map["MACCENTRALEUROPE"] = 10029;
map["X-MAC-CE"] = 10029;
map["MACICELANDIC"] = 10079;
map["MACICELAND"] = 10079;
map["X-MAC-ICELANDIC"] = 10079;
map["MACTURKISH"] = 10081;
map["X-MAC-TURKISH"] = 10081;
map["MACCROATIAN"] = 10082;
map["X-MAC-CROATIAN"] = 10082;
map["X-CHINESE-CNS"] = 20000;
map["X-CP20001"] = 20001;
map["X-CHINESE-ETEN"] = 20002;
map["X-CP20003"] = 20003;
map["X-CP20004"] = 20004;
map["X-CP20005"] = 20005;
map["IRV"] = 20105;
map["X-IA5"] = 20105;
map["DIN_66003"] = 20106;
map["GERMAN"] = 20106;
map["X-IA5-GERMAN"] = 20106;
map["SEN_850200_B"] = 20107;
map["SWEDISH"] = 20107;
map["X-IA5-SWEDISH"] = 20107;
map["NORWEGIAN"] = 20108;
map["NS_4551-1"] = 20108;
map["X-IA5-NORWEGIAN"] = 20108;
map["ANSI_X3.4-1968"] = 20127;
map["ANSI_X3.4-1986"] = 20127;
map["ASCII"] = 20127;
map["CP367"] = 20127;
map["CSASCII"] = 20127;
map["IBM367"] = 20127;
map["ISO-IR-6"] = 20127;
map["ISO646-US"] = 20127;
map["ISO_646.IRV:1991"] = 20127;
map["US"] = 20127;
map["US-ASCII"] = 20127;
map["X-CP20261"] = 20261;
map["X-CP20269"] = 20269;
map["CP273"] = 20273;
map["CSIBM273"] = 20273;
map["IBM273"] = 20273;
map["CSIBM277"] = 20277;
map["EBCDIC-CP-DK"] = 20277;
map["EBCDIC-CP-NO"] = 20277;
map["IBM277"] = 20277;
map["CP278"] = 20278;
map["CSIBM278"] = 20278;
map["EBCDIC-CP-FI"] = 20278;
map["EBCDIC-CP-SE"] = 20278;
map["IBM278"] = 20278;
map["CP280"] = 20280;
map["CSIBM280"] = 20280;
map["EBCDIC-CP-IT"] = 20280;
map["IBM280"] = 20280;
map["CP284"] = 20284;
map["CSIBM284"] = 20284;
map["EBCDIC-CP-ES"] = 20284;
map["IBM284"] = 20284;
map["CP285"] = 20285;
map["CSIBM285"] = 20285;
map["EBCDIC-CP-GB"] = 20285;
map["IBM285"] = 20285;
map["CP290"] = 20290;
map["CSIBM290"] = 20290;
map["EBCDIC-JP-KANA"] = 20290;
map["IBM290"] = 20290;
map["CP297"] = 20297;
map["CSIBM297"] = 20297;
map["EBCDIC-CP-FR"] = 20297;
map["IBM297"] = 20297;
map["CP420"] = 20420;
map["CSIBM420"] = 20420;
map["EBCDIC-CP-AR1"] = 20420;
map["IBM420"] = 20420;
map["CP423"] = 20423;
map["CSIBM423"] = 20423;
map["EBCDIC-CP-GR"] = 20423;
map["IBM423"] = 20423;
map["CP424"] = 20424;
map["CSIBM424"] = 20424;
map["EBCDIC-CP-HE"] = 20424;
map["IBM424"] = 20424;
map["X-EBCDIC-KOREANEXTENDED"] = 20833;
map["CSIBMTHAI"] = 20838;
map["IBM-THAI"] = 20838;
map["CSKOI8R"] = 20866;
map["KOI"] = 20866;
map["KOI8"] = 20866;
map["KOI8-R"] = 20866;
map["KOI8R"] = 20866;
map["CP871"] = 20871;
map["CSIBM871"] = 20871;
map["EBCDIC-CP-IS"] = 20871;
map["IBM871"] = 20871;
map["CP880"] = 20880;
map["CSIBM880"] = 20880;
map["EBCDIC-CYRILLIC"] = 20880;
map["IBM880"] = 20880;
map["CP905"] = 20905;
map["CSIBM905"] = 20905;
map["EBCDIC-CP-TR"] = 20905;
map["IBM905"] = 20905;
map["CCSID00924"] = 20924;
map["CP00924"] = 20924;
map["EBCDIC-LATIN9--EURO"] = 20924;
map["IBM00924"] = 20924;
map["X-CP20936"] = 20936;
map["X-CP20949"] = 20949;
map["CP1025"] = 21025;
map["X-CP21027"] = 21027;
map["KOI8-RU"] = 21866;
map["KOI8-U"] = 21866;
map["CP819"] = 28591;
map["CSISOLATIN1"] = 28591;
map["IBM819"] = 28591;
map["ISO-8859-1"] = 28591;
map["ISO-IR-100"] = 28591;
map["ISO8859-1"] = 28591;
map["ISO_8859-1"] = 28591;
map["ISO_8859-1:1987"] = 28591;
map["L1"] = 28591;
map["LATIN1"] = 28591;
map["CSISOLATIN2"] = 28592;
map["ISO-8859-2"] = 28592;
map["ISO-IR-101"] = 28592;
map["ISO8859-2"] = 28592;
map["ISO_8859-2"] = 28592;
map["ISO_8859-2:1987"] = 28592;
map["L2"] = 28592;
map["LATIN2"] = 28592;
map["CSISOLATIN3"] = 28593;
map["ISO-8859-3"] = 28593;
map["ISO-IR-109"] = 28593;
map["ISO_8859-3"] = 28593;
map["ISO_8859-3:1988"] = 28593;
map["L3"] = 28593;
map["LATIN3"] = 28593;
map["CSISOLATIN4"] = 28594;
map["ISO-8859-4"] = 28594;
map["ISO-IR-110"] = 28594;
map["ISO_8859-4"] = 28594;
map["ISO_8859-4:1988"] = 28594;
map["L4"] = 28594;
map["LATIN4"] = 28594;
map["CSISOLATINCYRILLIC"] = 28595;
map["CYRILLIC"] = 28595;
map["ISO-8859-5"] = 28595;
map["ISO-IR-144"] = 28595;
map["ISO_8859-5"] = 28595;
map["ISO_8859-5:1988"] = 28595;
map["ARABIC"] = 28596;
map["CSISOLATINARABIC"] = 28596;
map["ECMA-114"] = 28596;
map["ISO-8859-6"] = 28596;
map["ISO-IR-127"] = 28596;
map["ISO_8859-6"] = 28596;
map["ISO_8859-6:1987"] = 28596;
map["CSISOLATINGREEK"] = 28597;
map["ECMA-118"] = 28597;
map["ELOT_928"] = 28597;
map["GREEK"] = 28597;
map["GREEK8"] = 28597;
map["ISO-8859-7"] = 28597;
map["ISO-IR-126"] = 28597;
map["ISO_8859-7"] = 28597;
map["ISO_8859-7:1987"] = 28597;
map["CSISOLATINHEBREW"] = 28598;
map["HEBREW"] = 28598;
map["ISO-8859-8"] = 28598;
map["ISO-IR-138"] = 28598;
map["ISO_8859-8"] = 28598;
map["ISO_8859-8:1988"] = 28598;
map["LOGICAL"] = 28598;
map["VISUAL"] = 28598;
map["CSISOLATIN5"] = 28599;
map["ISO-8859-9"] = 28599;
map["ISO-IR-148"] = 28599;
map["ISO_8859-9"] = 28599;
map["ISO_8859-9:1989"] = 28599;
map["L5"] = 28599;
map["LATIN5"] = 28599;
map["ISO-8859-13"] = 28603;
map["CSISOLATIN9"] = 28605;
map["ISO-8859-15"] = 28605;
map["ISO_8859-15"] = 28605;
map["L9"] = 28605;
map["LATIN9"] = 28605;
map["X-EUROPA"] = 29001;
map["ISO-8859-8-I"] = 38598;
map["ISO-2022-JP"] = 50220;
map["CSISO2022JP"] = 50221;
map["CSISO2022KR"] = 50225;
map["ISO-2022-KR"] = 50225;
map["ISO-2022-KR-7"] = 50225;
map["ISO-2022-KR-7BIT"] = 50225;
map["CP50227"] = 50227;
map["X-CP50227"] = 50227;
map["CP930"] = 50930;
map["X-EBCDIC-JAPANESEANDUSCANADA"] = 50931;
map["CP933"] = 50933;
map["CP935"] = 50935;
map["CP937"] = 50937;
map["CP939"] = 50939;
map["CSEUCPKDFMTJAPANESE"] = 51932;
map["EUC-JP"] = 51932;
map["EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE"] = 51932;
map["ISO-2022-JPEUC"] = 51932;
map["X-EUC"] = 51932;
map["X-EUC-JP"] = 51932;
map["EUC-CN"] = 51936;
map["X-EUC-CN"] = 51936;
map["CSEUCKR"] = 51949;
map["EUC-KR"] = 51949;
map["ISO-2022-KR-8"] = 51949;
map["ISO-2022-KR-8BIT"] = 51949;
map["HZ-GB-2312"] = 52936;
map["GB18030"] = 54936;
map["X-ISCII-DE"] = 57002;
map["X-ISCII-BE"] = 57003;
map["X-ISCII-TA"] = 57004;
map["X-ISCII-TE"] = 57005;
map["X-ISCII-AS"] = 57006;
map["X-ISCII-OR"] = 57007;
map["X-ISCII-KA"] = 57008;
map["X-ISCII-MA"] = 57009;
map["X-ISCII-GU"] = 57010;
map["X-ISCII-PA"] = 57011;
map["CSUNICODE11UTF7"] = 65000;
map["UNICODE-1-1-UTF-7"] = 65000;
map["UNICODE-2-0-UTF-7"] = 65000;
map["UTF-7"] = 65000;
map["X-UNICODE-1-1-UTF-7"] = 65000;
map["X-UNICODE-2-0-UTF-7"] = 65000;
map["UNICODE-1-1-UTF-8"] = 65001;
map["UNICODE-2-0-UTF-8"] = 65001;
map["UTF-8"] = 65001;
map["X-UNICODE-1-1-UTF-8"] = 65001;
map["X-UNICODE-2-0-UTF-8"] = 65001;
}
public static int GetEncodingNumber(string name) {
object n = map[name.ToUpper(System.Globalization.CultureInfo.InvariantCulture)];
if (n == null)
return 0;
return (int)n;
}
public static Encoding GetEncodingEncoding(string name) {
String nameU = name.ToUpper(System.Globalization.CultureInfo.InvariantCulture);
if (nameU.Equals("UNICODEBIGUNMARKED"))
return new UnicodeEncoding(true, false);
if (nameU.Equals("UNICODEBIG"))
return new UnicodeEncoding(true, true);
if (nameU.Equals("UNICODELITTLEUNMARKED"))
return new UnicodeEncoding(false, false);
if (nameU.Equals("UNICODELITTLE"))
return new UnicodeEncoding(false, true);
if (map.ContainsKey(nameU))
return Encoding.GetEncoding((int)map[nameU]);
else
return Encoding.GetEncoding(name);
}
}
}

View File

@@ -0,0 +1,740 @@
using System;
using System.IO;
using System.Text;
using System.Collections;
using System.Globalization;
/*
* Copyright 2003 Paulo Soares
*
* The contents of this file are subject to the Mozilla Public License Version 1.1
* (the "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the License.
*
* The Original Code is 'iText, a free JAVA-PDF library'.
*
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
* All Rights Reserved.
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
*
* Contributor(s): all the names of the contributors are added in the source code
* where applicable.
*
* Alternatively, the contents of this file may be used under the terms of the
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
* provisions of LGPL are applicable instead of those above. If you wish to
* allow use of your version of this file only under the terms of the LGPL
* License and not to allow others to use your version of this file under
* the MPL, indicate your decision by deleting the provisions above and
* replace them with the notice and other provisions required by the LGPL.
* If you do not delete the provisions above, a recipient may use your version
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the MPL as stated above or under the terms of the GNU
* Library General Public License as published by the Free Software Foundation;
* either version 2 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
* details.
*
* If you didn't download this code from the following link, you should check if
* you aren't using an obsolete version:
* http://www.lowagie.com/iText/
*
* The code to recognize the encoding in this class and in the convenience class IanaEncodings was taken from Apache Xerces published under the following license:
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Part of this code is based on the Quick-and-Dirty XML parser by Steven Brandt.
* The code for the Quick-and-Dirty parser was published in JavaWorld (java tip 128).
* Steven Brandt and JavaWorld gave permission to use the code for free.
* (Bruno Lowagie and Paulo Soares chose to use it under the MPL/LGPL in
* conformance with the rest of the code).
* The original code can be found on this url: <A HREF="http://www.javaworld.com/javatips/jw-javatip128_p.html">http://www.javaworld.com/javatips/jw-javatip128_p.html</A>.
* It was substantially refactored by Bruno Lowagie.
*
* The method 'private static String getEncodingName(byte[] b4)' was found
* in org.apache.xerces.impl.XMLEntityManager, originaly published by the
* Apache Software Foundation under the Apache Software License; now being
* used in iText under the MPL.
*/
namespace iTextSharp.text.xml.simpleparser {
/**
* A simple XML and HTML parser. This parser is, like the SAX parser,
* an event based parser, but with much less functionality.
* <p>
* The parser can:
* <p>
* <ul>
* <li>It recognizes the encoding used
* <li>It recognizes all the elements' start tags and end tags
* <li>It lists attributes, where attribute values can be enclosed in single or double quotes
* <li>It recognizes the <code>&lt;[CDATA[ ... ]]&gt;</code> construct
* <li>It recognizes the standard entities: &amp;amp;, &amp;lt;, &amp;gt;, &amp;quot;, and &amp;apos;, as well as numeric entities
* <li>It maps lines ending in <code>\r\n</code> and <code>\r</code> to <code>\n</code> on input, in accordance with the XML Specification, Section 2.11
* </ul>
* <p>
* The code is based on <A HREF="http://www.javaworld.com/javaworld/javatips/javatip128/">
* http://www.javaworld.com/javaworld/javatips/javatip128/</A> with some extra
* code from XERCES to recognize the encoding.
*/
public sealed class SimpleXMLParser {
/** possible states */
private const int UNKNOWN = 0;
private const int TEXT = 1;
private const int TAG_ENCOUNTERED = 2;
private const int EXAMIN_TAG = 3;
private const int TAG_EXAMINED = 4;
private const int IN_CLOSETAG = 5;
private const int SINGLE_TAG = 6;
private const int CDATA = 7;
private const int COMMENT = 8;
private const int PI = 9;
private const int ENTITY = 10;
private const int QUOTE = 11;
private const int ATTRIBUTE_KEY = 12;
private const int ATTRIBUTE_EQUAL = 13;
private const int ATTRIBUTE_VALUE = 14;
/** the state stack */
internal Stack stack;
/** The current character. */
internal int character = 0;
/** The previous character. */
internal int previousCharacter = -1;
/** the line we are currently reading */
internal int lines = 1;
/** the column where the current character occurs */
internal int columns = 0;
/** was the last character equivalent to a newline? */
internal bool eol = false;
/** the current state */
internal int state;
/** Are we parsing HTML? */
internal bool html;
/** current text (whatever is encountered between tags) */
internal StringBuilder text = new StringBuilder();
/** current entity (whatever is encountered between & and ;) */
internal StringBuilder entity = new StringBuilder();
/** current tagname */
internal String tag = null;
/** current attributes */
internal Hashtable attributes = null;
/** The handler to which we are going to forward document content */
internal ISimpleXMLDocHandler doc;
/** The handler to which we are going to forward comments. */
internal ISimpleXMLDocHandlerComment comment;
/** Keeps track of the number of tags that are open. */
internal int nested = 0;
/** the quote character that was used to open the quote. */
internal int quoteCharacter = '"';
/** the attribute key. */
internal String attributekey = null;
/** the attribute value. */
internal String attributevalue = null;
/**
* Creates a Simple XML parser object.
* Call Go(BufferedReader) immediately after creation.
*/
private SimpleXMLParser(ISimpleXMLDocHandler doc, ISimpleXMLDocHandlerComment comment, bool html) {
this.doc = doc;
this.comment = comment;
this.html = html;
stack = new Stack();
state = html ? TEXT : UNKNOWN;
}
/**
* Does the actual parsing. Perform this immediately
* after creating the parser object.
*/
private void Go(TextReader reader) {
doc.StartDocument();
while (true) {
// read a new character
if (previousCharacter == -1) {
character = reader.Read();
}
// or re-examin the previous character
else {
character = previousCharacter;
previousCharacter = -1;
}
// the end of the file was reached
if (character == -1) {
if (html) {
if (html && state == TEXT)
Flush();
doc.EndDocument();
} else {
ThrowException("Missing end tag");
}
return;
}
// dealing with \n and \r
if (character == '\n' && eol) {
eol = false;
continue;
} else if (eol) {
eol = false;
} else if (character == '\n') {
lines++;
columns = 0;
} else if (character == '\r') {
eol = true;
character = '\n';
lines++;
columns = 0;
} else {
columns++;
}
switch (state) {
// we are in an unknown state before there's actual content
case UNKNOWN:
if (character == '<') {
SaveState(TEXT);
state = TAG_ENCOUNTERED;
}
break;
// we can encounter any content
case TEXT:
if (character == '<') {
Flush();
SaveState(state);
state = TAG_ENCOUNTERED;
} else if (character == '&') {
SaveState(state);
entity.Length = 0;
state = ENTITY;
} else
text.Append((char)character);
break;
// we have just seen a < and are wondering what we are looking at
// <foo>, </foo>, <!-- ... --->, etc.
case TAG_ENCOUNTERED:
InitTag();
if (character == '/') {
state = IN_CLOSETAG;
} else if (character == '?') {
RestoreState();
state = PI;
} else {
text.Append((char)character);
state = EXAMIN_TAG;
}
break;
// we are processing something like this <foo ... >.
// It could still be a <!-- ... --> or something.
case EXAMIN_TAG:
if (character == '>') {
DoTag();
ProcessTag(true);
InitTag();
state = RestoreState();
} else if (character == '/') {
state = SINGLE_TAG;
} else if (character == '-' && text.ToString().Equals("!-")) {
Flush();
state = COMMENT;
} else if (character == '[' && text.ToString().Equals("![CDATA")) {
Flush();
state = CDATA;
} else if (character == 'E' && text.ToString().Equals("!DOCTYP")) {
Flush();
state = PI;
} else if (char.IsWhiteSpace((char)character)) {
DoTag();
state = TAG_EXAMINED;
} else {
text.Append((char)character);
}
break;
// we know the name of the tag now.
case TAG_EXAMINED:
if (character == '>') {
ProcessTag(true);
InitTag();
state = RestoreState();
} else if (character == '/') {
state = SINGLE_TAG;
} else if (char.IsWhiteSpace((char)character)) {
// empty
} else {
text.Append((char)character);
state = ATTRIBUTE_KEY;
}
break;
// we are processing a closing tag: e.g. </foo>
case IN_CLOSETAG:
if (character == '>') {
DoTag();
ProcessTag(false);
if (!html && nested==0) return;
state = RestoreState();
} else {
if (!char.IsWhiteSpace((char)character))
text.Append((char)character);
}
break;
// we have just seen something like this: <foo a="b"/
// and are looking for the final >.
case SINGLE_TAG:
if (character != '>')
ThrowException("Expected > for tag: <"+tag+"/>");
DoTag();
ProcessTag(true);
ProcessTag(false);
InitTag();
if (!html && nested==0) {
doc.EndDocument();
return;
}
state = RestoreState();
break;
// we are processing CDATA
case CDATA:
if (character == '>'
&& text.ToString().EndsWith("]]")) {
text.Length = text.Length - 2;
Flush();
state = RestoreState();
} else
text.Append((char)character);
break;
// we are processing a comment. We are inside
// the <!-- .... --> looking for the -->.
case COMMENT:
if (character == '>'
&& text.ToString().EndsWith("--")) {
text.Length = text.Length - 2;
Flush();
state = RestoreState();
} else
text.Append((char)character);
break;
// We are inside one of these <? ... ?> or one of these <!DOCTYPE ... >
case PI:
if (character == '>') {
state = RestoreState();
if (state == TEXT) state = UNKNOWN;
}
break;
// we are processing an entity, e.g. &lt;, &#187;, etc.
case ENTITY:
if (character == ';') {
state = RestoreState();
String cent = entity.ToString();
entity.Length = 0;
char ce = EntitiesToUnicode.DecodeEntity(cent);
if (ce == '\0')
text.Append('&').Append(cent).Append(';');
else
text.Append(ce);
} else if ((character != '#' && (character < '0' || character > '9') && (character < 'a' || character > 'z')
&& (character < 'A' || character > 'Z')) || entity.Length >= 7) {
state = RestoreState();
previousCharacter = character;
text.Append('&').Append(entity.ToString());
entity.Length = 0;
}
else {
entity.Append((char)character);
}
break;
// We are processing the quoted right-hand side of an element's attribute.
case QUOTE:
if (html && quoteCharacter == ' ' && character == '>') {
Flush();
ProcessTag(true);
InitTag();
state = RestoreState();
}
else if (html && quoteCharacter == ' ' && char.IsWhiteSpace((char)character)) {
Flush();
state = TAG_EXAMINED;
}
else if (html && quoteCharacter == ' ') {
text.Append((char)character);
}
else if (character == quoteCharacter) {
Flush();
state = TAG_EXAMINED;
} else if (" \r\n\u0009".IndexOf((char)character)>=0) {
text.Append(' ');
} else if (character == '&') {
SaveState(state);
state = ENTITY;
entity.Length = 0;
} else {
text.Append((char)character);
}
break;
case ATTRIBUTE_KEY:
if (char.IsWhiteSpace((char)character)) {
Flush();
state = ATTRIBUTE_EQUAL;
} else if (character == '=') {
Flush();
state = ATTRIBUTE_VALUE;
} else if (html && character == '>') {
text.Length = 0;
ProcessTag(true);
InitTag();
state = RestoreState();
} else {
text.Append((char)character);
}
break;
case ATTRIBUTE_EQUAL:
if (character == '=') {
state = ATTRIBUTE_VALUE;
} else if (char.IsWhiteSpace((char)character)) {
// empty
} else if (html && character == '>') {
text.Length = 0;
ProcessTag(true);
InitTag();
state = RestoreState();
} else if (html && character == '/') {
Flush();
state = SINGLE_TAG;
} else if (html) {
Flush();
text.Append((char)character);
state = ATTRIBUTE_KEY;
} else {
ThrowException("Error in attribute processing.");
}
break;
case ATTRIBUTE_VALUE:
if (character == '"' || character == '\'') {
quoteCharacter = character;
state = QUOTE;
} else if (char.IsWhiteSpace((char)character)) {
// empty
} else if (html && character == '>') {
Flush();
ProcessTag(true);
InitTag();
state = RestoreState();
} else if (html) {
text.Append((char)character);
quoteCharacter = ' ';
state = QUOTE;
} else {
ThrowException("Error in attribute processing");
}
break;
}
}
}
/**
* Gets a state from the stack
* @return the previous state
*/
private int RestoreState() {
if (stack.Count != 0)
return (int)stack.Pop();
else
return UNKNOWN;
}
/**
* Adds a state to the stack.
* @param s a state to add to the stack
*/
private void SaveState(int s) {
stack.Push(s);
}
/**
* Flushes the text that is currently in the buffer.
* The text can be ignored, added to the document
* as content or as comment,... depending on the current state.
*/
private void Flush() {
switch (state){
case TEXT:
case CDATA:
if (text.Length > 0) {
doc.Text(text.ToString());
}
break;
case COMMENT:
if (comment != null) {
comment.Comment(text.ToString());
}
break;
case ATTRIBUTE_KEY:
attributekey = text.ToString();
if (html)
attributekey = attributekey.ToLower(CultureInfo.InvariantCulture);
break;
case QUOTE:
case ATTRIBUTE_VALUE:
attributevalue = text.ToString();
attributes[attributekey] = attributevalue;
break;
default:
// do nothing
break;
}
text.Length = 0;
}
/**
* Initialized the tag name and attributes.
*/
private void InitTag() {
tag = null;
attributes = new Hashtable();
}
/** Sets the name of the tag. */
private void DoTag() {
if (tag == null)
tag = text.ToString();
if (html)
tag = tag.ToLower(CultureInfo.InvariantCulture);
text.Length = 0;
}
/**
* processes the tag.
* @param start if true we are dealing with a tag that has just been opened; if false we are closing a tag.
*/
private void ProcessTag(bool start) {
if (start) {
nested++;
doc.StartElement(tag,attributes);
}
else {
nested--;
doc.EndElement(tag);
}
}
/** Throws an exception */
private void ThrowException(String s) {
throw new IOException(s+" near line " + lines + ", column " + columns);
}
/**
* Parses the XML document firing the events to the handler.
* @param doc the document handler
* @param r the document. The encoding is already resolved. The reader is not closed
* @throws IOException on error
*/
public static void Parse(ISimpleXMLDocHandler doc, ISimpleXMLDocHandlerComment comment, TextReader r, bool html) {
SimpleXMLParser parser = new SimpleXMLParser(doc, comment, html);
parser.Go(r);
}
/**
* Parses the XML document firing the events to the handler.
* @param doc the document handler
* @param in the document. The encoding is deduced from the stream. The stream is not closed
* @throws IOException on error
*/
public static void Parse(ISimpleXMLDocHandler doc, Stream inp) {
byte[] b4 = new byte[4];
int count = inp.Read(b4, 0, b4.Length);
if (count != 4)
throw new IOException("Insufficient length.");
String encoding = GetEncodingName(b4);
String decl = null;
if (encoding.Equals("UTF-8")) {
StringBuilder sb = new StringBuilder();
int c;
while ((c = inp.ReadByte()) != -1) {
if (c == '>')
break;
sb.Append((char)c);
}
decl = sb.ToString();
}
else if (encoding.Equals("CP037")) {
MemoryStream bi = new MemoryStream();
int c;
while ((c = inp.ReadByte()) != -1) {
if (c == 0x6e) // that's '>' in ebcdic
break;
bi.WriteByte((byte)c);
}
decl = Encoding.GetEncoding(37).GetString(bi.ToArray());//cp037 ebcdic
}
if (decl != null) {
decl = GetDeclaredEncoding(decl);
if (decl != null)
encoding = decl;
}
Parse(doc, new StreamReader(inp, IanaEncodings.GetEncodingEncoding(encoding)));
}
private static String GetDeclaredEncoding(String decl) {
if (decl == null)
return null;
int idx = decl.IndexOf("encoding");
if (idx < 0)
return null;
int idx1 = decl.IndexOf('"', idx);
int idx2 = decl.IndexOf('\'', idx);
if (idx1 == idx2)
return null;
if ((idx1 < 0 && idx2 > 0) || (idx2 > 0 && idx2 < idx1)) {
int idx3 = decl.IndexOf('\'', idx2 + 1);
if (idx3 < 0)
return null;
return decl.Substring(idx2 + 1, idx3 - (idx2 + 1));
}
if ((idx2 < 0 && idx1 > 0) || (idx1 > 0 && idx1 < idx2)) {
int idx3 = decl.IndexOf('"', idx1 + 1);
if (idx3 < 0)
return null;
return decl.Substring(idx1 + 1, idx3 - (idx1 + 1));
}
return null;
}
public static void Parse(ISimpleXMLDocHandler doc, TextReader r) {
Parse(doc, null, r, false);
}
/**
* Escapes a string with the appropriated XML codes.
* @param s the string to be escaped
* @param onlyASCII codes above 127 will always be escaped with &amp;#nn; if <CODE>true</CODE>
* @return the escaped string
*/
public static String EscapeXML(String s, bool onlyASCII) {
char[] cc = s.ToCharArray();
int len = cc.Length;
StringBuilder sb = new StringBuilder();
for (int k = 0; k < len; ++k) {
int c = cc[k];
switch (c) {
case '<':
sb.Append("&lt;");
break;
case '>':
sb.Append("&gt;");
break;
case '&':
sb.Append("&amp;");
break;
case '"':
sb.Append("&quot;");
break;
case '\'':
sb.Append("&apos;");
break;
default:
if (onlyASCII && c > 127)
sb.Append("&#").Append(c).Append(';');
else
sb.Append((char)c);
break;
}
}
return sb.ToString();
}
/**
* Returns the IANA encoding name that is auto-detected from
* the bytes specified, with the endian-ness of that encoding where appropriate.
* (method found in org.apache.xerces.impl.XMLEntityManager, originaly published
* by the Apache Software Foundation under the Apache Software License; now being
* used in iText under the MPL)
* @param b4 The first four bytes of the input.
* @return an IANA-encoding string
*/
private static String GetEncodingName(byte[] b4) {
// UTF-16, with BOM
int b0 = b4[0] & 0xFF;
int b1 = b4[1] & 0xFF;
if (b0 == 0xFE && b1 == 0xFF) {
// UTF-16, big-endian
return "UTF-16BE";
}
if (b0 == 0xFF && b1 == 0xFE) {
// UTF-16, little-endian
return "UTF-16LE";
}
// UTF-8 with a BOM
int b2 = b4[2] & 0xFF;
if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
return "UTF-8";
}
// other encodings
int b3 = b4[3] & 0xFF;
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
// UCS-4, big endian (1234)
return "ISO-10646-UCS-4";
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
// UCS-4, little endian (4321)
return "ISO-10646-UCS-4";
}
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
// UCS-4, unusual octet order (2143)
// REVISIT: What should this be?
return "ISO-10646-UCS-4";
}
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
// UCS-4, unusual octect order (3412)
// REVISIT: What should this be?
return "ISO-10646-UCS-4";
}
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
// UTF-16, big-endian, no BOM
// (or could turn out to be UCS-2...
// REVISIT: What should this be?
return "UTF-16BE";
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
// UTF-16, little-endian, no BOM
// (or could turn out to be UCS-2...
return "UTF-16LE";
}
if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
// EBCDIC
// a la xerces1, return CP037 instead of EBCDIC here
return "CP037";
}
// default encoding
return "UTF-8";
}
}
}