Initial Commit
This commit is contained in:
381
iTechSharp/iTextSharp/text/xml/simpleparser/EntitiesToSymbol.cs
Normal file
381
iTechSharp/iTextSharp/text/xml/simpleparser/EntitiesToSymbol.cs
Normal file
@@ -0,0 +1,381 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
using iTextSharp.text;
|
||||
/*
|
||||
* $Id: EntitiesToSymbol.cs,v 1.3 2008/05/13 11:26:14 psoares33 Exp $
|
||||
*
|
||||
*
|
||||
* Copyright 1999, 2000, 2001, 2002 Bruno Lowagie.
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
||||
* (the "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the License.
|
||||
*
|
||||
* The Original Code is 'iText, a free JAVA-PDF library'.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
|
||||
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
|
||||
* All Rights Reserved.
|
||||
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
|
||||
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): all the names of the contributors are added in the source code
|
||||
* where applicable.
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of the
|
||||
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
|
||||
* provisions of LGPL are applicable instead of those above. If you wish to
|
||||
* allow use of your version of this file only under the terms of the LGPL
|
||||
* License and not to allow others to use your version of this file under
|
||||
* the MPL, indicate your decision by deleting the provisions above and
|
||||
* replace them with the notice and other provisions required by the LGPL.
|
||||
* If you do not delete the provisions above, a recipient may use your version
|
||||
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the MPL as stated above or under the terms of the GNU
|
||||
* Library General Public License as published by the Free Software Foundation;
|
||||
* either version 2 of the License, or any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
|
||||
* details.
|
||||
*
|
||||
* If you didn't download this code from the following link, you should check if
|
||||
* you aren't using an obsolete version:
|
||||
* http://www.lowagie.com/iText/
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.xml.simpleparser {
|
||||
|
||||
/**
|
||||
* This class contains entities that can be used in an entity tag.
|
||||
*/
|
||||
|
||||
public class EntitiesToSymbol {
|
||||
|
||||
/**
|
||||
* This is a map that contains all possible id values of the entity tag
|
||||
* that can be translated to a character in font Symbol.
|
||||
*/
|
||||
public static readonly Hashtable map;
|
||||
|
||||
static EntitiesToSymbol() {
|
||||
map = new Hashtable();
|
||||
map["169"] = (char)227;
|
||||
map["172"] = (char)216;
|
||||
map["174"] = (char)210;
|
||||
map["177"] = (char)177;
|
||||
map["215"] = (char)180;
|
||||
map["247"] = (char)184;
|
||||
map["8230"] = (char)188;
|
||||
map["8242"] = (char)162;
|
||||
map["8243"] = (char)178;
|
||||
map["8260"] = (char)164;
|
||||
map["8364"] = (char)240;
|
||||
map["8465"] = (char)193;
|
||||
map["8472"] = (char)195;
|
||||
map["8476"] = (char)194;
|
||||
map["8482"] = (char)212;
|
||||
map["8501"] = (char)192;
|
||||
map["8592"] = (char)172;
|
||||
map["8593"] = (char)173;
|
||||
map["8594"] = (char)174;
|
||||
map["8595"] = (char)175;
|
||||
map["8596"] = (char)171;
|
||||
map["8629"] = (char)191;
|
||||
map["8656"] = (char)220;
|
||||
map["8657"] = (char)221;
|
||||
map["8658"] = (char)222;
|
||||
map["8659"] = (char)223;
|
||||
map["8660"] = (char)219;
|
||||
map["8704"] = (char)34;
|
||||
map["8706"] = (char)182;
|
||||
map["8707"] = (char)36;
|
||||
map["8709"] = (char)198;
|
||||
map["8711"] = (char)209;
|
||||
map["8712"] = (char)206;
|
||||
map["8713"] = (char)207;
|
||||
map["8717"] = (char)39;
|
||||
map["8719"] = (char)213;
|
||||
map["8721"] = (char)229;
|
||||
map["8722"] = (char)45;
|
||||
map["8727"] = (char)42;
|
||||
map["8729"] = (char)183;
|
||||
map["8730"] = (char)214;
|
||||
map["8733"] = (char)181;
|
||||
map["8734"] = (char)165;
|
||||
map["8736"] = (char)208;
|
||||
map["8743"] = (char)217;
|
||||
map["8744"] = (char)218;
|
||||
map["8745"] = (char)199;
|
||||
map["8746"] = (char)200;
|
||||
map["8747"] = (char)242;
|
||||
map["8756"] = (char)92;
|
||||
map["8764"] = (char)126;
|
||||
map["8773"] = (char)64;
|
||||
map["8776"] = (char)187;
|
||||
map["8800"] = (char)185;
|
||||
map["8801"] = (char)186;
|
||||
map["8804"] = (char)163;
|
||||
map["8805"] = (char)179;
|
||||
map["8834"] = (char)204;
|
||||
map["8835"] = (char)201;
|
||||
map["8836"] = (char)203;
|
||||
map["8838"] = (char)205;
|
||||
map["8839"] = (char)202;
|
||||
map["8853"] = (char)197;
|
||||
map["8855"] = (char)196;
|
||||
map["8869"] = (char)94;
|
||||
map["8901"] = (char)215;
|
||||
map["8992"] = (char)243;
|
||||
map["8993"] = (char)245;
|
||||
map["9001"] = (char)225;
|
||||
map["9002"] = (char)241;
|
||||
map["913"] = (char)65;
|
||||
map["914"] = (char)66;
|
||||
map["915"] = (char)71;
|
||||
map["916"] = (char)68;
|
||||
map["917"] = (char)69;
|
||||
map["918"] = (char)90;
|
||||
map["919"] = (char)72;
|
||||
map["920"] = (char)81;
|
||||
map["921"] = (char)73;
|
||||
map["922"] = (char)75;
|
||||
map["923"] = (char)76;
|
||||
map["924"] = (char)77;
|
||||
map["925"] = (char)78;
|
||||
map["926"] = (char)88;
|
||||
map["927"] = (char)79;
|
||||
map["928"] = (char)80;
|
||||
map["929"] = (char)82;
|
||||
map["931"] = (char)83;
|
||||
map["932"] = (char)84;
|
||||
map["933"] = (char)85;
|
||||
map["934"] = (char)70;
|
||||
map["935"] = (char)67;
|
||||
map["936"] = (char)89;
|
||||
map["937"] = (char)87;
|
||||
map["945"] = (char)97;
|
||||
map["946"] = (char)98;
|
||||
map["947"] = (char)103;
|
||||
map["948"] = (char)100;
|
||||
map["949"] = (char)101;
|
||||
map["950"] = (char)122;
|
||||
map["951"] = (char)104;
|
||||
map["952"] = (char)113;
|
||||
map["953"] = (char)105;
|
||||
map["954"] = (char)107;
|
||||
map["955"] = (char)108;
|
||||
map["956"] = (char)109;
|
||||
map["957"] = (char)110;
|
||||
map["958"] = (char)120;
|
||||
map["959"] = (char)111;
|
||||
map["960"] = (char)112;
|
||||
map["961"] = (char)114;
|
||||
map["962"] = (char)86;
|
||||
map["963"] = (char)115;
|
||||
map["964"] = (char)116;
|
||||
map["965"] = (char)117;
|
||||
map["966"] = (char)102;
|
||||
map["967"] = (char)99;
|
||||
map["9674"] = (char)224;
|
||||
map["968"] = (char)121;
|
||||
map["969"] = (char)119;
|
||||
map["977"] = (char)74;
|
||||
map["978"] = (char)161;
|
||||
map["981"] = (char)106;
|
||||
map["982"] = (char)118;
|
||||
map["9824"] = (char)170;
|
||||
map["9827"] = (char)167;
|
||||
map["9829"] = (char)169;
|
||||
map["9830"] = (char)168;
|
||||
map["Alpha"] = (char)65;
|
||||
map["Beta"] = (char)66;
|
||||
map["Chi"] = (char)67;
|
||||
map["Delta"] = (char)68;
|
||||
map["Epsilon"] = (char)69;
|
||||
map["Eta"] = (char)72;
|
||||
map["Gamma"] = (char)71;
|
||||
map["Iota"] = (char)73;
|
||||
map["Kappa"] = (char)75;
|
||||
map["Lambda"] = (char)76;
|
||||
map["Mu"] = (char)77;
|
||||
map["Nu"] = (char)78;
|
||||
map["Omega"] = (char)87;
|
||||
map["Omicron"] = (char)79;
|
||||
map["Phi"] = (char)70;
|
||||
map["Pi"] = (char)80;
|
||||
map["Prime"] = (char)178;
|
||||
map["Psi"] = (char)89;
|
||||
map["Rho"] = (char)82;
|
||||
map["Sigma"] = (char)83;
|
||||
map["Tau"] = (char)84;
|
||||
map["Theta"] = (char)81;
|
||||
map["Upsilon"] = (char)85;
|
||||
map["Xi"] = (char)88;
|
||||
map["Zeta"] = (char)90;
|
||||
map["alefsym"] = (char)192;
|
||||
map["alpha"] = (char)97;
|
||||
map["and"] = (char)217;
|
||||
map["ang"] = (char)208;
|
||||
map["asymp"] = (char)187;
|
||||
map["beta"] = (char)98;
|
||||
map["cap"] = (char)199;
|
||||
map["chi"] = (char)99;
|
||||
map["clubs"] = (char)167;
|
||||
map["cong"] = (char)64;
|
||||
map["copy"] = (char)211;
|
||||
map["crarr"] = (char)191;
|
||||
map["cup"] = (char)200;
|
||||
map["dArr"] = (char)223;
|
||||
map["darr"] = (char)175;
|
||||
map["delta"] = (char)100;
|
||||
map["diams"] = (char)168;
|
||||
map["divide"] = (char)184;
|
||||
map["empty"] = (char)198;
|
||||
map["epsilon"] = (char)101;
|
||||
map["equiv"] = (char)186;
|
||||
map["eta"] = (char)104;
|
||||
map["euro"] = (char)240;
|
||||
map["exist"] = (char)36;
|
||||
map["forall"] = (char)34;
|
||||
map["frasl"] = (char)164;
|
||||
map["gamma"] = (char)103;
|
||||
map["ge"] = (char)179;
|
||||
map["hArr"] = (char)219;
|
||||
map["harr"] = (char)171;
|
||||
map["hearts"] = (char)169;
|
||||
map["hellip"] = (char)188;
|
||||
map["horizontal arrow extender"] = (char)190;
|
||||
map["image"] = (char)193;
|
||||
map["infin"] = (char)165;
|
||||
map["int"] = (char)242;
|
||||
map["iota"] = (char)105;
|
||||
map["isin"] = (char)206;
|
||||
map["kappa"] = (char)107;
|
||||
map["lArr"] = (char)220;
|
||||
map["lambda"] = (char)108;
|
||||
map["lang"] = (char)225;
|
||||
map["large brace extender"] = (char)239;
|
||||
map["large integral extender"] = (char)244;
|
||||
map["large left brace (bottom)"] = (char)238;
|
||||
map["large left brace (middle)"] = (char)237;
|
||||
map["large left brace (top)"] = (char)236;
|
||||
map["large left bracket (bottom)"] = (char)235;
|
||||
map["large left bracket (extender)"] = (char)234;
|
||||
map["large left bracket (top)"] = (char)233;
|
||||
map["large left parenthesis (bottom)"] = (char)232;
|
||||
map["large left parenthesis (extender)"] = (char)231;
|
||||
map["large left parenthesis (top)"] = (char)230;
|
||||
map["large right brace (bottom)"] = (char)254;
|
||||
map["large right brace (middle)"] = (char)253;
|
||||
map["large right brace (top)"] = (char)252;
|
||||
map["large right bracket (bottom)"] = (char)251;
|
||||
map["large right bracket (extender)"] = (char)250;
|
||||
map["large right bracket (top)"] = (char)249;
|
||||
map["large right parenthesis (bottom)"] = (char)248;
|
||||
map["large right parenthesis (extender)"] = (char)247;
|
||||
map["large right parenthesis (top)"] = (char)246;
|
||||
map["larr"] = (char)172;
|
||||
map["le"] = (char)163;
|
||||
map["lowast"] = (char)42;
|
||||
map["loz"] = (char)224;
|
||||
map["minus"] = (char)45;
|
||||
map["mu"] = (char)109;
|
||||
map["nabla"] = (char)209;
|
||||
map["ne"] = (char)185;
|
||||
map["not"] = (char)216;
|
||||
map["notin"] = (char)207;
|
||||
map["nsub"] = (char)203;
|
||||
map["nu"] = (char)110;
|
||||
map["omega"] = (char)119;
|
||||
map["omicron"] = (char)111;
|
||||
map["oplus"] = (char)197;
|
||||
map["or"] = (char)218;
|
||||
map["otimes"] = (char)196;
|
||||
map["part"] = (char)182;
|
||||
map["perp"] = (char)94;
|
||||
map["phi"] = (char)102;
|
||||
map["pi"] = (char)112;
|
||||
map["piv"] = (char)118;
|
||||
map["plusmn"] = (char)177;
|
||||
map["prime"] = (char)162;
|
||||
map["prod"] = (char)213;
|
||||
map["prop"] = (char)181;
|
||||
map["psi"] = (char)121;
|
||||
map["rArr"] = (char)222;
|
||||
map["radic"] = (char)214;
|
||||
map["radical extender"] = (char)96;
|
||||
map["rang"] = (char)241;
|
||||
map["rarr"] = (char)174;
|
||||
map["real"] = (char)194;
|
||||
map["reg"] = (char)210;
|
||||
map["rho"] = (char)114;
|
||||
map["sdot"] = (char)215;
|
||||
map["sigma"] = (char)115;
|
||||
map["sigmaf"] = (char)86;
|
||||
map["sim"] = (char)126;
|
||||
map["spades"] = (char)170;
|
||||
map["sub"] = (char)204;
|
||||
map["sube"] = (char)205;
|
||||
map["sum"] = (char)229;
|
||||
map["sup"] = (char)201;
|
||||
map["supe"] = (char)202;
|
||||
map["tau"] = (char)116;
|
||||
map["there4"] = (char)92;
|
||||
map["theta"] = (char)113;
|
||||
map["thetasym"] = (char)74;
|
||||
map["times"] = (char)180;
|
||||
map["trade"] = (char)212;
|
||||
map["uArr"] = (char)221;
|
||||
map["uarr"] = (char)173;
|
||||
map["upsih"] = (char)161;
|
||||
map["upsilon"] = (char)117;
|
||||
map["vertical arrow extender"] = (char)189;
|
||||
map["weierp"] = (char)195;
|
||||
map["xi"] = (char)120;
|
||||
map["zeta"] = (char)122;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a chunk with a symbol character.
|
||||
* @param e a symbol value (see Entities class: alfa is greek alfa,...)
|
||||
* @param font the font if the symbol isn't found (otherwise Font.SYMBOL)
|
||||
* @return a Chunk
|
||||
*/
|
||||
|
||||
public static Chunk Get(String e, Font font) {
|
||||
char s = GetCorrespondingSymbol(e);
|
||||
if (s == '\0') {
|
||||
try {
|
||||
return new Chunk("" + (char)int.Parse(e), font);
|
||||
}
|
||||
catch (Exception) {
|
||||
return new Chunk(e, font);
|
||||
}
|
||||
}
|
||||
Font symbol = new Font(Font.SYMBOL, font.Size, font.Style, font.Color);
|
||||
return new Chunk(s.ToString(), symbol);
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the corresponding symbol in the font Symbol.
|
||||
*
|
||||
* @param name the name of the entity
|
||||
* @return the corresponding character in font Symbol
|
||||
*/
|
||||
|
||||
public static char GetCorrespondingSymbol(String name) {
|
||||
if (map.ContainsKey(name))
|
||||
return (char)map[name];
|
||||
else
|
||||
return '\0';
|
||||
}
|
||||
}
|
||||
}
|
442
iTechSharp/iTextSharp/text/xml/simpleparser/EntitiesToUnicode.cs
Normal file
442
iTechSharp/iTextSharp/text/xml/simpleparser/EntitiesToUnicode.cs
Normal file
@@ -0,0 +1,442 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
using System.Globalization;
|
||||
/*
|
||||
* $Id: EntitiesToUnicode.cs,v 1.3 2008/05/13 11:26:14 psoares33 Exp $
|
||||
*
|
||||
*
|
||||
* Copyright 2003-2007 Paulo Soares and Bruno Lowagie.
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
||||
* (the "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the License.
|
||||
*
|
||||
* The Original Code is 'iText, a free JAVA-PDF library'.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
|
||||
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
|
||||
* All Rights Reserved.
|
||||
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
|
||||
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): all the names of the contributors are added in the source code
|
||||
* where applicable.
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of the
|
||||
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
|
||||
* provisions of LGPL are applicable instead of those above. If you wish to
|
||||
* allow use of your version of this file only under the terms of the LGPL
|
||||
* License and not to allow others to use your version of this file under
|
||||
* the MPL, indicate your decision by deleting the provisions above and
|
||||
* replace them with the notice and other provisions required by the LGPL.
|
||||
* If you do not delete the provisions above, a recipient may use your version
|
||||
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the MPL as stated above or under the terms of the GNU
|
||||
* Library General Public License as published by the Free Software Foundation;
|
||||
* either version 2 of the License, or any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
|
||||
* details.
|
||||
*
|
||||
* If you didn't download this code from the following link, you should check if
|
||||
* you aren't using an obsolete version:
|
||||
* http://www.lowagie.com/iText/
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.xml.simpleparser {
|
||||
|
||||
/**
|
||||
* This class contains entities that can be used in an entity tag.
|
||||
*/
|
||||
|
||||
public class EntitiesToUnicode {
|
||||
|
||||
/**
|
||||
* This is a map that contains the names of entities and their unicode value.
|
||||
*/
|
||||
public static readonly Hashtable map = new Hashtable();
|
||||
|
||||
static EntitiesToUnicode() {
|
||||
map["nbsp"] = '\u00a0'; // no-break space = non-breaking space, U+00A0 ISOnum
|
||||
map["iexcl"] = '\u00a1'; // inverted exclamation mark, U+00A1 ISOnum
|
||||
map["cent"] = '\u00a2'; // cent sign, U+00A2 ISOnum
|
||||
map["pound"] = '\u00a3'; // pound sign, U+00A3 ISOnum
|
||||
map["curren"] = '\u00a4'; // currency sign, U+00A4 ISOnum
|
||||
map["yen"] = '\u00a5'; // yen sign = yuan sign, U+00A5 ISOnum
|
||||
map["brvbar"] = '\u00a6'; // broken bar = broken vertical bar, U+00A6 ISOnum
|
||||
map["sect"] = '\u00a7'; // section sign, U+00A7 ISOnum
|
||||
map["uml"] = '\u00a8'; // diaeresis = spacing diaeresis, U+00A8 ISOdia
|
||||
map["copy"] = '\u00a9'; // copyright sign, U+00A9 ISOnum
|
||||
map["ordf"] = '\u00aa'; // feminine ordinal indicator, U+00AA ISOnum
|
||||
map["laquo"] = '\u00ab'; // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
|
||||
map["not"] = '\u00ac'; // not sign, U+00AC ISOnum
|
||||
map["shy"] = '\u00ad'; // soft hyphen = discretionary hyphen, U+00AD ISOnum
|
||||
map["reg"] = '\u00ae'; // registered sign = registered trade mark sign, U+00AE ISOnum
|
||||
map["macr"] = '\u00af'; // macron = spacing macron = overline = APL overbar, U+00AF ISOdia
|
||||
map["deg"] = '\u00b0'; // degree sign, U+00B0 ISOnum
|
||||
map["plusmn"] = '\u00b1'; // plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
|
||||
map["sup2"] = '\u00b2'; // superscript two = superscript digit two = squared, U+00B2 ISOnum
|
||||
map["sup3"] = '\u00b3'; // superscript three = superscript digit three = cubed, U+00B3 ISOnum
|
||||
map["acute"] = '\u00b4'; // acute accent = spacing acute, U+00B4 ISOdia
|
||||
map["micro"] = '\u00b5'; // micro sign, U+00B5 ISOnum
|
||||
map["para"] = '\u00b6'; // pilcrow sign = paragraph sign, U+00B6 ISOnum
|
||||
map["middot"] = '\u00b7'; // middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
|
||||
map["cedil"] = '\u00b8'; // cedilla = spacing cedilla, U+00B8 ISOdia
|
||||
map["sup1"] = '\u00b9'; // superscript one = superscript digit one, U+00B9 ISOnum
|
||||
map["ordm"] = '\u00ba'; // masculine ordinal indicator, U+00BA ISOnum
|
||||
map["raquo"] = '\u00bb'; // right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
|
||||
map["frac14"] = '\u00bc'; // vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
|
||||
map["frac12"] = '\u00bd'; // vulgar fraction one half = fraction one half, U+00BD ISOnum
|
||||
map["frac34"] = '\u00be'; // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
|
||||
map["iquest"] = '\u00bf'; // inverted question mark = turned question mark, U+00BF ISOnum
|
||||
map["Agrave"] = '\u00c0'; // latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
|
||||
map["Aacute"] = '\u00c1'; // latin capital letter A with acute, U+00C1 ISOlat1
|
||||
map["Acirc"] = '\u00c2'; // latin capital letter A with circumflex, U+00C2 ISOlat1
|
||||
map["Atilde"] = '\u00c3'; // latin capital letter A with tilde, U+00C3 ISOlat1
|
||||
map["Auml"] = '\u00c4'; // latin capital letter A with diaeresis, U+00C4 ISOlat1
|
||||
map["Aring"] = '\u00c5'; // latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
|
||||
map["AElig"] = '\u00c6'; // latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
|
||||
map["Ccedil"] = '\u00c7'; // latin capital letter C with cedilla, U+00C7 ISOlat1
|
||||
map["Egrave"] = '\u00c8'; // latin capital letter E with grave, U+00C8 ISOlat1
|
||||
map["Eacute"] = '\u00c9'; // latin capital letter E with acute, U+00C9 ISOlat1
|
||||
map["Ecirc"] = '\u00ca'; // latin capital letter E with circumflex, U+00CA ISOlat1
|
||||
map["Euml"] = '\u00cb'; // latin capital letter E with diaeresis, U+00CB ISOlat1
|
||||
map["Igrave"] = '\u00cc'; // latin capital letter I with grave, U+00CC ISOlat1
|
||||
map["Iacute"] = '\u00cd'; // latin capital letter I with acute, U+00CD ISOlat1
|
||||
map["Icirc"] = '\u00ce'; // latin capital letter I with circumflex, U+00CE ISOlat1
|
||||
map["Iuml"] = '\u00cf'; // latin capital letter I with diaeresis, U+00CF ISOlat1
|
||||
map["ETH"] = '\u00d0'; // latin capital letter ETH, U+00D0 ISOlat1
|
||||
map["Ntilde"] = '\u00d1'; // latin capital letter N with tilde, U+00D1 ISOlat1
|
||||
map["Ograve"] = '\u00d2'; // latin capital letter O with grave, U+00D2 ISOlat1
|
||||
map["Oacute"] = '\u00d3'; // latin capital letter O with acute, U+00D3 ISOlat1
|
||||
map["Ocirc"] = '\u00d4'; // latin capital letter O with circumflex, U+00D4 ISOlat1
|
||||
map["Otilde"] = '\u00d5'; // latin capital letter O with tilde, U+00D5 ISOlat1
|
||||
map["Ouml"] = '\u00d6'; // latin capital letter O with diaeresis, U+00D6 ISOlat1
|
||||
map["times"] = '\u00d7'; // multiplication sign, U+00D7 ISOnum
|
||||
map["Oslash"] = '\u00d8'; // latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
|
||||
map["Ugrave"] = '\u00d9'; // latin capital letter U with grave, U+00D9 ISOlat1
|
||||
map["Uacute"] = '\u00da'; // latin capital letter U with acute, U+00DA ISOlat1
|
||||
map["Ucirc"] = '\u00db'; // latin capital letter U with circumflex, U+00DB ISOlat1
|
||||
map["Uuml"] = '\u00dc'; // latin capital letter U with diaeresis, U+00DC ISOlat1
|
||||
map["Yacute"] = '\u00dd'; // latin capital letter Y with acute, U+00DD ISOlat1
|
||||
map["THORN"] = '\u00de'; // latin capital letter THORN, U+00DE ISOlat1
|
||||
map["szlig"] = '\u00df'; // latin small letter sharp s = ess-zed, U+00DF ISOlat1
|
||||
map["agrave"] = '\u00e0'; // latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
|
||||
map["aacute"] = '\u00e1'; // latin small letter a with acute, U+00E1 ISOlat1
|
||||
map["acirc"] = '\u00e2'; // latin small letter a with circumflex, U+00E2 ISOlat1
|
||||
map["atilde"] = '\u00e3'; // latin small letter a with tilde, U+00E3 ISOlat1
|
||||
map["auml"] = '\u00e4'; // latin small letter a with diaeresis, U+00E4 ISOlat1
|
||||
map["aring"] = '\u00e5'; // latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
|
||||
map["aelig"] = '\u00e6'; // latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
|
||||
map["ccedil"] = '\u00e7'; // latin small letter c with cedilla, U+00E7 ISOlat1
|
||||
map["egrave"] = '\u00e8'; // latin small letter e with grave, U+00E8 ISOlat1
|
||||
map["eacute"] = '\u00e9'; // latin small letter e with acute, U+00E9 ISOlat1
|
||||
map["ecirc"] = '\u00ea'; // latin small letter e with circumflex, U+00EA ISOlat1
|
||||
map["euml"] = '\u00eb'; // latin small letter e with diaeresis, U+00EB ISOlat1
|
||||
map["igrave"] = '\u00ec'; // latin small letter i with grave, U+00EC ISOlat1
|
||||
map["iacute"] = '\u00ed'; // latin small letter i with acute, U+00ED ISOlat1
|
||||
map["icirc"] = '\u00ee'; // latin small letter i with circumflex, U+00EE ISOlat1
|
||||
map["iuml"] = '\u00ef'; // latin small letter i with diaeresis, U+00EF ISOlat1
|
||||
map["eth"] = '\u00f0'; // latin small letter eth, U+00F0 ISOlat1
|
||||
map["ntilde"] = '\u00f1'; // latin small letter n with tilde, U+00F1 ISOlat1
|
||||
map["ograve"] = '\u00f2'; // latin small letter o with grave, U+00F2 ISOlat1
|
||||
map["oacute"] = '\u00f3'; // latin small letter o with acute, U+00F3 ISOlat1
|
||||
map["ocirc"] = '\u00f4'; // latin small letter o with circumflex, U+00F4 ISOlat1
|
||||
map["otilde"] = '\u00f5'; // latin small letter o with tilde, U+00F5 ISOlat1
|
||||
map["ouml"] = '\u00f6'; // latin small letter o with diaeresis, U+00F6 ISOlat1
|
||||
map["divide"] = '\u00f7'; // division sign, U+00F7 ISOnum
|
||||
map["oslash"] = '\u00f8'; // latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
|
||||
map["ugrave"] = '\u00f9'; // latin small letter u with grave, U+00F9 ISOlat1
|
||||
map["uacute"] = '\u00fa'; // latin small letter u with acute, U+00FA ISOlat1
|
||||
map["ucirc"] = '\u00fb'; // latin small letter u with circumflex, U+00FB ISOlat1
|
||||
map["uuml"] = '\u00fc'; // latin small letter u with diaeresis, U+00FC ISOlat1
|
||||
map["yacute"] = '\u00fd'; // latin small letter y with acute, U+00FD ISOlat1
|
||||
map["thorn"] = '\u00fe'; // latin small letter thorn, U+00FE ISOlat1
|
||||
map["yuml"] = '\u00ff'; // latin small letter y with diaeresis, U+00FF ISOlat1
|
||||
// Latin Extended-B
|
||||
map["fnof"] = '\u0192'; // latin small f with hook = function = florin, U+0192 ISOtech
|
||||
// Greek
|
||||
map["Alpha"] = '\u0391'; // greek capital letter alpha, U+0391
|
||||
map["Beta"] = '\u0392'; // greek capital letter beta, U+0392
|
||||
map["Gamma"] = '\u0393'; // greek capital letter gamma, U+0393 ISOgrk3
|
||||
map["Delta"] = '\u0394'; // greek capital letter delta, U+0394 ISOgrk3
|
||||
map["Epsilon"] = '\u0395'; // greek capital letter epsilon, U+0395
|
||||
map["Zeta"] = '\u0396'; // greek capital letter zeta, U+0396
|
||||
map["Eta"] = '\u0397'; // greek capital letter eta, U+0397
|
||||
map["Theta"] = '\u0398'; // greek capital letter theta, U+0398 ISOgrk3
|
||||
map["Iota"] = '\u0399'; // greek capital letter iota, U+0399
|
||||
map["Kappa"] = '\u039a'; // greek capital letter kappa, U+039A
|
||||
map["Lambda"] = '\u039b'; // greek capital letter lambda, U+039B ISOgrk3
|
||||
map["Mu"] = '\u039c'; // greek capital letter mu, U+039C
|
||||
map["Nu"] = '\u039d'; // greek capital letter nu, U+039D
|
||||
map["Xi"] = '\u039e'; // greek capital letter xi, U+039E ISOgrk3
|
||||
map["Omicron"] = '\u039f'; // greek capital letter omicron, U+039F
|
||||
map["Pi"] = '\u03a0'; // greek capital letter pi, U+03A0 ISOgrk3
|
||||
map["Rho"] = '\u03a1'; // greek capital letter rho, U+03A1
|
||||
// there is no Sigmaf, and no U+03A2 character either
|
||||
map["Sigma"] = '\u03a3'; // greek capital letter sigma, U+03A3 ISOgrk3
|
||||
map["Tau"] = '\u03a4'; // greek capital letter tau, U+03A4
|
||||
map["Upsilon"] = '\u03a5'; // greek capital letter upsilon, U+03A5 ISOgrk3
|
||||
map["Phi"] = '\u03a6'; // greek capital letter phi, U+03A6 ISOgrk3
|
||||
map["Chi"] = '\u03a7'; // greek capital letter chi, U+03A7
|
||||
map["Psi"] = '\u03a8'; // greek capital letter psi, U+03A8 ISOgrk3
|
||||
map["Omega"] = '\u03a9'; // greek capital letter omega, U+03A9 ISOgrk3
|
||||
map["alpha"] = '\u03b1'; // greek small letter alpha, U+03B1 ISOgrk3
|
||||
map["beta"] = '\u03b2'; // greek small letter beta, U+03B2 ISOgrk3
|
||||
map["gamma"] = '\u03b3'; // greek small letter gamma, U+03B3 ISOgrk3
|
||||
map["delta"] = '\u03b4'; // greek small letter delta, U+03B4 ISOgrk3
|
||||
map["epsilon"] = '\u03b5'; // greek small letter epsilon, U+03B5 ISOgrk3
|
||||
map["zeta"] = '\u03b6'; // greek small letter zeta, U+03B6 ISOgrk3
|
||||
map["eta"] = '\u03b7'; // greek small letter eta, U+03B7 ISOgrk3
|
||||
map["theta"] = '\u03b8'; // greek small letter theta, U+03B8 ISOgrk3
|
||||
map["iota"] = '\u03b9'; // greek small letter iota, U+03B9 ISOgrk3
|
||||
map["kappa"] = '\u03ba'; // greek small letter kappa, U+03BA ISOgrk3
|
||||
map["lambda"] = '\u03bb'; // greek small letter lambda, U+03BB ISOgrk3
|
||||
map["mu"] = '\u03bc'; // greek small letter mu, U+03BC ISOgrk3
|
||||
map["nu"] = '\u03bd'; // greek small letter nu, U+03BD ISOgrk3
|
||||
map["xi"] = '\u03be'; // greek small letter xi, U+03BE ISOgrk3
|
||||
map["omicron"] = '\u03bf'; // greek small letter omicron, U+03BF NEW
|
||||
map["pi"] = '\u03c0'; // greek small letter pi, U+03C0 ISOgrk3
|
||||
map["rho"] = '\u03c1'; // greek small letter rho, U+03C1 ISOgrk3
|
||||
map["sigmaf"] = '\u03c2'; // greek small letter final sigma, U+03C2 ISOgrk3
|
||||
map["sigma"] = '\u03c3'; // greek small letter sigma, U+03C3 ISOgrk3
|
||||
map["tau"] = '\u03c4'; // greek small letter tau, U+03C4 ISOgrk3
|
||||
map["upsilon"] = '\u03c5'; // greek small letter upsilon, U+03C5 ISOgrk3
|
||||
map["phi"] = '\u03c6'; // greek small letter phi, U+03C6 ISOgrk3
|
||||
map["chi"] = '\u03c7'; // greek small letter chi, U+03C7 ISOgrk3
|
||||
map["psi"] = '\u03c8'; // greek small letter psi, U+03C8 ISOgrk3
|
||||
map["omega"] = '\u03c9'; // greek small letter omega, U+03C9 ISOgrk3
|
||||
map["thetasym"] = '\u03d1'; // greek small letter theta symbol, U+03D1 NEW
|
||||
map["upsih"] = '\u03d2'; // greek upsilon with hook symbol, U+03D2 NEW
|
||||
map["piv"] = '\u03d6'; // greek pi symbol, U+03D6 ISOgrk3
|
||||
// General Punctuation
|
||||
map["bull"] = '\u2022'; // bullet = black small circle, U+2022 ISOpub
|
||||
// bullet is NOT the same as bullet operator, U+2219
|
||||
map["hellip"] = '\u2026'; // horizontal ellipsis = three dot leader, U+2026 ISOpub
|
||||
map["prime"] = '\u2032'; // prime = minutes = feet, U+2032 ISOtech
|
||||
map["Prime"] = '\u2033'; // double prime = seconds = inches, U+2033 ISOtech
|
||||
map["oline"] = '\u203e'; // overline = spacing overscore, U+203E NEW
|
||||
map["frasl"] = '\u2044'; // fraction slash, U+2044 NEW
|
||||
// Letterlike Symbols
|
||||
map["weierp"] = '\u2118'; // script capital P = power set = Weierstrass p, U+2118 ISOamso
|
||||
map["image"] = '\u2111'; // blackletter capital I = imaginary part, U+2111 ISOamso
|
||||
map["real"] = '\u211c'; // blackletter capital R = real part symbol, U+211C ISOamso
|
||||
map["trade"] = '\u2122'; // trade mark sign, U+2122 ISOnum
|
||||
map["alefsym"] = '\u2135'; // alef symbol = first transfinite cardinal, U+2135 NEW
|
||||
// alef symbol is NOT the same as hebrew letter alef,
|
||||
// U+05D0 although the same glyph could be used to depict both characters
|
||||
// Arrows
|
||||
map["larr"] = '\u2190'; // leftwards arrow, U+2190 ISOnum
|
||||
map["uarr"] = '\u2191'; // upwards arrow, U+2191 ISOnum
|
||||
map["rarr"] = '\u2192'; // rightwards arrow, U+2192 ISOnum
|
||||
map["darr"] = '\u2193'; // downwards arrow, U+2193 ISOnum
|
||||
map["harr"] = '\u2194'; // left right arrow, U+2194 ISOamsa
|
||||
map["crarr"] = '\u21b5'; // downwards arrow with corner leftwards = carriage return, U+21B5 NEW
|
||||
map["lArr"] = '\u21d0'; // leftwards double arrow, U+21D0 ISOtech
|
||||
// ISO 10646 does not say that lArr is the same as the 'is implied by' arrow
|
||||
// but also does not have any other character for that function. So ? lArr can
|
||||
// be used for 'is implied by' as ISOtech suggests
|
||||
map["uArr"] = '\u21d1'; // upwards double arrow, U+21D1 ISOamsa
|
||||
map["rArr"] = '\u21d2'; // rightwards double arrow, U+21D2 ISOtech
|
||||
// ISO 10646 does not say this is the 'implies' character but does not have
|
||||
// another character with this function so ?
|
||||
// rArr can be used for 'implies' as ISOtech suggests
|
||||
map["dArr"] = '\u21d3'; // downwards double arrow, U+21D3 ISOamsa
|
||||
map["hArr"] = '\u21d4'; // left right double arrow, U+21D4 ISOamsa
|
||||
// Mathematical Operators
|
||||
map["forall"] = '\u2200'; // for all, U+2200 ISOtech
|
||||
map["part"] = '\u2202'; // partial differential, U+2202 ISOtech
|
||||
map["exist"] = '\u2203'; // there exists, U+2203 ISOtech
|
||||
map["empty"] = '\u2205'; // empty set = null set = diameter, U+2205 ISOamso
|
||||
map["nabla"] = '\u2207'; // nabla = backward difference, U+2207 ISOtech
|
||||
map["isin"] = '\u2208'; // element of, U+2208 ISOtech
|
||||
map["notin"] = '\u2209'; // not an element of, U+2209 ISOtech
|
||||
map["ni"] = '\u220b'; // contains as member, U+220B ISOtech
|
||||
// should there be a more memorable name than 'ni'?
|
||||
map["prod"] = '\u220f'; // n-ary product = product sign, U+220F ISOamsb
|
||||
// prod is NOT the same character as U+03A0 'greek capital letter pi' though
|
||||
// the same glyph might be used for both
|
||||
map["sum"] = '\u2211'; // n-ary sumation, U+2211 ISOamsb
|
||||
// sum is NOT the same character as U+03A3 'greek capital letter sigma'
|
||||
// though the same glyph might be used for both
|
||||
map["minus"] = '\u2212'; // minus sign, U+2212 ISOtech
|
||||
map["lowast"] = '\u2217'; // asterisk operator, U+2217 ISOtech
|
||||
map["radic"] = '\u221a'; // square root = radical sign, U+221A ISOtech
|
||||
map["prop"] = '\u221d'; // proportional to, U+221D ISOtech
|
||||
map["infin"] = '\u221e'; // infinity, U+221E ISOtech
|
||||
map["ang"] = '\u2220'; // angle, U+2220 ISOamso
|
||||
map["and"] = '\u2227'; // logical and = wedge, U+2227 ISOtech
|
||||
map["or"] = '\u2228'; // logical or = vee, U+2228 ISOtech
|
||||
map["cap"] = '\u2229'; // intersection = cap, U+2229 ISOtech
|
||||
map["cup"] = '\u222a'; // union = cup, U+222A ISOtech
|
||||
map["int"] = '\u222b'; // integral, U+222B ISOtech
|
||||
map["there4"] = '\u2234'; // therefore, U+2234 ISOtech
|
||||
map["sim"] = '\u223c'; // tilde operator = varies with = similar to, U+223C ISOtech
|
||||
// tilde operator is NOT the same character as the tilde, U+007E,
|
||||
// although the same glyph might be used to represent both
|
||||
map["cong"] = '\u2245'; // approximately equal to, U+2245 ISOtech
|
||||
map["asymp"] = '\u2248'; // almost equal to = asymptotic to, U+2248 ISOamsr
|
||||
map["ne"] = '\u2260'; // not equal to, U+2260 ISOtech
|
||||
map["equiv"] = '\u2261'; // identical to, U+2261 ISOtech
|
||||
map["le"] = '\u2264'; // less-than or equal to, U+2264 ISOtech
|
||||
map["ge"] = '\u2265'; // greater-than or equal to, U+2265 ISOtech
|
||||
map["sub"] = '\u2282'; // subset of, U+2282 ISOtech
|
||||
map["sup"] = '\u2283'; // superset of, U+2283 ISOtech
|
||||
// note that nsup, 'not a superset of, U+2283' is not covered by the Symbol
|
||||
// font encoding and is not included. Should it be, for symmetry?
|
||||
// It is in ISOamsn
|
||||
map["nsub"] = '\u2284'; // not a subset of, U+2284 ISOamsn
|
||||
map["sube"] = '\u2286'; // subset of or equal to, U+2286 ISOtech
|
||||
map["supe"] = '\u2287'; // superset of or equal to, U+2287 ISOtech
|
||||
map["oplus"] = '\u2295'; // circled plus = direct sum, U+2295 ISOamsb
|
||||
map["otimes"] = '\u2297'; // circled times = vector product, U+2297 ISOamsb
|
||||
map["perp"] = '\u22a5'; // up tack = orthogonal to = perpendicular, U+22A5 ISOtech
|
||||
map["sdot"] = '\u22c5'; // dot operator, U+22C5 ISOamsb
|
||||
// dot operator is NOT the same character as U+00B7 middle dot
|
||||
// Miscellaneous Technical
|
||||
map["lceil"] = '\u2308'; // left ceiling = apl upstile, U+2308 ISOamsc
|
||||
map["rceil"] = '\u2309'; // right ceiling, U+2309 ISOamsc
|
||||
map["lfloor"] = '\u230a'; // left floor = apl downstile, U+230A ISOamsc
|
||||
map["rfloor"] = '\u230b'; // right floor, U+230B ISOamsc
|
||||
map["lang"] = '\u2329'; // left-pointing angle bracket = bra, U+2329 ISOtech
|
||||
// lang is NOT the same character as U+003C 'less than'
|
||||
// or U+2039 'single left-pointing angle quotation mark'
|
||||
map["rang"] = '\u232a'; // right-pointing angle bracket = ket, U+232A ISOtech
|
||||
// rang is NOT the same character as U+003E 'greater than'
|
||||
// or U+203A 'single right-pointing angle quotation mark'
|
||||
// Geometric Shapes
|
||||
map["loz"] = '\u25ca'; // lozenge, U+25CA ISOpub
|
||||
// Miscellaneous Symbols
|
||||
map["spades"] = '\u2660'; // black spade suit, U+2660 ISOpub
|
||||
// black here seems to mean filled as opposed to hollow
|
||||
map["clubs"] = '\u2663'; // black club suit = shamrock, U+2663 ISOpub
|
||||
map["hearts"] = '\u2665'; // black heart suit = valentine, U+2665 ISOpub
|
||||
map["diams"] = '\u2666'; // black diamond suit, U+2666 ISOpub
|
||||
// C0 Controls and Basic Latin
|
||||
map["quot"] = '\u0022'; // quotation mark = APL quote, U+0022 ISOnum
|
||||
map["amp"] = '\u0026'; // ampersand, U+0026 ISOnum
|
||||
map["apos"] = '\'';
|
||||
map["lt"] = '\u003c'; // less-than sign, U+003C ISOnum
|
||||
map["gt"] = '\u003e'; // greater-than sign, U+003E ISOnum
|
||||
// Latin Extended-A
|
||||
map["OElig"] = '\u0152'; // latin capital ligature OE, U+0152 ISOlat2
|
||||
map["oelig"] = '\u0153'; // latin small ligature oe, U+0153 ISOlat2
|
||||
// ligature is a misnomer, this is a separate character in some languages
|
||||
map["Scaron"] = '\u0160'; // latin capital letter S with caron, U+0160 ISOlat2
|
||||
map["scaron"] = '\u0161'; // latin small letter s with caron, U+0161 ISOlat2
|
||||
map["Yuml"] = '\u0178'; // latin capital letter Y with diaeresis, U+0178 ISOlat2
|
||||
// Spacing Modifier Letters
|
||||
map["circ"] = '\u02c6'; // modifier letter circumflex accent, U+02C6 ISOpub
|
||||
map["tilde"] = '\u02dc'; // small tilde, U+02DC ISOdia
|
||||
// General Punctuation
|
||||
map["ensp"] = '\u2002'; // en space, U+2002 ISOpub
|
||||
map["emsp"] = '\u2003'; // em space, U+2003 ISOpub
|
||||
map["thinsp"] = '\u2009'; // thin space, U+2009 ISOpub
|
||||
map["zwnj"] = '\u200c'; // zero width non-joiner, U+200C NEW RFC 2070
|
||||
map["zwj"] = '\u200d'; // zero width joiner, U+200D NEW RFC 2070
|
||||
map["lrm"] = '\u200e'; // left-to-right mark, U+200E NEW RFC 2070
|
||||
map["rlm"] = '\u200f'; // right-to-left mark, U+200F NEW RFC 2070
|
||||
map["ndash"] = '\u2013'; // en dash, U+2013 ISOpub
|
||||
map["mdash"] = '\u2014'; // em dash, U+2014 ISOpub
|
||||
map["lsquo"] = '\u2018'; // left single quotation mark, U+2018 ISOnum
|
||||
map["rsquo"] = '\u2019'; // right single quotation mark, U+2019 ISOnum
|
||||
map["sbquo"] = '\u201a'; // single low-9 quotation mark, U+201A NEW
|
||||
map["ldquo"] = '\u201c'; // left double quotation mark, U+201C ISOnum
|
||||
map["rdquo"] = '\u201d'; // right double quotation mark, U+201D ISOnum
|
||||
map["bdquo"] = '\u201e'; // double low-9 quotation mark, U+201E NEW
|
||||
map["dagger"] = '\u2020'; // dagger, U+2020 ISOpub
|
||||
map["Dagger"] = '\u2021'; // double dagger, U+2021 ISOpub
|
||||
map["permil"] = '\u2030'; // per mille sign, U+2030 ISOtech
|
||||
map["lsaquo"] = '\u2039'; // single left-pointing angle quotation mark, U+2039 ISO proposed
|
||||
// lsaquo is proposed but not yet ISO standardized
|
||||
map["rsaquo"] = '\u203a'; // single right-pointing angle quotation mark, U+203A ISO proposed
|
||||
// rsaquo is proposed but not yet ISO standardized
|
||||
map["euro"] = '\u20ac'; // euro sign, U+20AC NEW
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Translates an entity to a unicode character.
|
||||
*
|
||||
* @param name the name of the entity
|
||||
* @return the corresponding unicode character
|
||||
*/
|
||||
public static char DecodeEntity(String name) {
|
||||
if (name.StartsWith("#x")) {
|
||||
try {
|
||||
return (char)int.Parse(name.Substring(2), NumberStyles.AllowHexSpecifier);
|
||||
}
|
||||
catch {
|
||||
return '\0';
|
||||
}
|
||||
}
|
||||
if (name.StartsWith("#")) {
|
||||
try {
|
||||
return (char)int.Parse(name.Substring(1));
|
||||
}
|
||||
catch {
|
||||
return '\0';
|
||||
}
|
||||
}
|
||||
object c = map[name];
|
||||
if (c == null)
|
||||
return '\0';
|
||||
else
|
||||
return (char)c;
|
||||
}
|
||||
|
||||
/**
|
||||
* Translates a String with entities (&...;) to a String without entities,
|
||||
* replacing the entity with the right (unicode) character.
|
||||
*/
|
||||
public static String DecodeString(String s) {
|
||||
int pos_amp = s.IndexOf('&');
|
||||
if (pos_amp == -1) return s;
|
||||
|
||||
int pos_sc;
|
||||
int pos_a;
|
||||
StringBuilder buf = new StringBuilder(s.Substring(0, pos_amp));
|
||||
char replace;
|
||||
while (true) {
|
||||
pos_sc = s.IndexOf(';', pos_amp);
|
||||
if (pos_sc == -1) {
|
||||
buf.Append(s.Substring(pos_amp));
|
||||
return buf.ToString();
|
||||
}
|
||||
pos_a = s.IndexOf('&', pos_amp + 1);
|
||||
while (pos_a != -1 && pos_a < pos_sc) {
|
||||
buf.Append(s.Substring(pos_amp, pos_a - pos_amp));
|
||||
pos_amp = pos_a;
|
||||
pos_a = s.IndexOf('&', pos_amp + 1);
|
||||
}
|
||||
replace = DecodeEntity(s.Substring(pos_amp + 1, pos_sc - (pos_amp + 1)));
|
||||
if (s.Length < pos_sc + 1) {
|
||||
return buf.ToString();
|
||||
}
|
||||
if (replace == '\0') {
|
||||
buf.Append(s.Substring(pos_amp, pos_sc + 1 - pos_amp));
|
||||
}
|
||||
else {
|
||||
buf.Append(replace);
|
||||
}
|
||||
pos_amp = s.IndexOf('&', pos_sc);
|
||||
if (pos_amp == -1) {
|
||||
buf.Append(s.Substring(pos_sc + 1));
|
||||
return buf.ToString();
|
||||
}
|
||||
else {
|
||||
buf.Append(s.Substring(pos_sc + 1, pos_amp - (pos_sc + 1)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,81 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
/*
|
||||
* Copyright 2003 Paulo Soares
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
||||
* (the "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the License.
|
||||
*
|
||||
* The Original Code is 'iText, a free JAVA-PDF library'.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
|
||||
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
|
||||
* All Rights Reserved.
|
||||
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
|
||||
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): all the names of the contributors are added in the source code
|
||||
* where applicable.
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of the
|
||||
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
|
||||
* provisions of LGPL are applicable instead of those above. If you wish to
|
||||
* allow use of your version of this file only under the terms of the LGPL
|
||||
* License and not to allow others to use your version of this file under
|
||||
* the MPL, indicate your decision by deleting the provisions above and
|
||||
* replace them with the notice and other provisions required by the LGPL.
|
||||
* If you do not delete the provisions above, a recipient may use your version
|
||||
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the MPL as stated above or under the terms of the GNU
|
||||
* Library General Public License as published by the Free Software Foundation;
|
||||
* either version 2 of the License, or any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
|
||||
* details.
|
||||
*
|
||||
* If you didn't download this code from the following link, you should check if
|
||||
* you aren't using an obsolete version:
|
||||
* http://www.lowagie.com/iText/
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.xml.simpleparser {
|
||||
/**
|
||||
* The handler for the events fired by <CODE>SimpleXMLParser</CODE>.
|
||||
* @author Paulo Soares (psoares@consiste.pt)
|
||||
*/
|
||||
public interface ISimpleXMLDocHandler {
|
||||
/**
|
||||
* Called when a start tag is found.
|
||||
* @param tag the tag name
|
||||
* @param h the tag's attributes
|
||||
*/
|
||||
void StartElement(String tag, Hashtable h);
|
||||
/**
|
||||
* Called when an end tag is found.
|
||||
* @param tag the tag name
|
||||
*/
|
||||
void EndElement(String tag);
|
||||
/**
|
||||
* Called when the document starts to be parsed.
|
||||
*/
|
||||
void StartDocument();
|
||||
/**
|
||||
* Called after the document is parsed.
|
||||
*/
|
||||
void EndDocument();
|
||||
/**
|
||||
* Called when a text element is found.
|
||||
* @param str the text element, probably a fragment.
|
||||
*/
|
||||
void Text(String str);
|
||||
}
|
||||
}
|
@@ -0,0 +1,61 @@
|
||||
using System;
|
||||
/*
|
||||
* Copyright 2003 Paulo Soares
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
||||
* (the "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the License.
|
||||
*
|
||||
* The Original Code is 'iText, a free JAVA-PDF library'.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
|
||||
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
|
||||
* All Rights Reserved.
|
||||
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
|
||||
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): all the names of the contributors are added in the source code
|
||||
* where applicable.
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of the
|
||||
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
|
||||
* provisions of LGPL are applicable instead of those above. If you wish to
|
||||
* allow use of your version of this file only under the terms of the LGPL
|
||||
* License and not to allow others to use your version of this file under
|
||||
* the MPL, indicate your decision by deleting the provisions above and
|
||||
* replace them with the notice and other provisions required by the LGPL.
|
||||
* If you do not delete the provisions above, a recipient may use your version
|
||||
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the MPL as stated above or under the terms of the GNU
|
||||
* Library General Public License as published by the Free Software Foundation;
|
||||
* either version 2 of the License, or any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
|
||||
* details.
|
||||
*
|
||||
* If you didn't download this code from the following link, you should check if
|
||||
* you aren't using an obsolete version:
|
||||
* http://www.lowagie.com/iText/
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.xml.simpleparser {
|
||||
/**
|
||||
* The handler for the events fired by <CODE>SimpleXMLParser</CODE>.
|
||||
* @author Paulo Soares (psoares@consiste.pt)
|
||||
*/
|
||||
public interface ISimpleXMLDocHandlerComment {
|
||||
/**
|
||||
* Called when a comment is found.
|
||||
* @param text the comment text
|
||||
*/
|
||||
void Comment(String text);
|
||||
}
|
||||
}
|
551
iTechSharp/iTextSharp/text/xml/simpleparser/IanaEncodings.cs
Normal file
551
iTechSharp/iTextSharp/text/xml/simpleparser/IanaEncodings.cs
Normal file
@@ -0,0 +1,551 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
/*
|
||||
* $Id: IanaEncodings.cs,v 1.4 2008/05/13 11:26:14 psoares33 Exp $
|
||||
*
|
||||
*
|
||||
* Copyright 2003-2007 Paulo Soares and Bruno Lowagie.
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
||||
* (the "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the License.
|
||||
*
|
||||
* The Original Code is 'iText, a free JAVA-PDF library'.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
|
||||
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
|
||||
* All Rights Reserved.
|
||||
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
|
||||
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): all the names of the contributors are added in the source code
|
||||
* where applicable.
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of the
|
||||
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
|
||||
* provisions of LGPL are applicable instead of those above. If you wish to
|
||||
* allow use of your version of this file only under the terms of the LGPL
|
||||
* License and not to allow others to use your version of this file under
|
||||
* the MPL, indicate your decision by deleting the provisions above and
|
||||
* replace them with the notice and other provisions required by the LGPL.
|
||||
* If you do not delete the provisions above, a recipient may use your version
|
||||
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the MPL as stated above or under the terms of the GNU
|
||||
* Library General Public License as published by the Free Software Foundation;
|
||||
* either version 2 of the License, or any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
|
||||
* details.
|
||||
*
|
||||
* If you didn't download this code from the following link, you should check if
|
||||
* you aren't using an obsolete version:
|
||||
* http://www.lowagie.com/iText/
|
||||
*
|
||||
* The values used in this class are based on class org.apache.xercis.util.EncodingMap
|
||||
* http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/EncodingMap.java?view=markup
|
||||
* This class was originally published under the following license:
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.xml.simpleparser {
|
||||
|
||||
/**
|
||||
* Translates a IANA encoding name to a Java encoding.
|
||||
*/
|
||||
|
||||
public class IanaEncodings {
|
||||
|
||||
/** The object that maps IANA to Java encodings. */
|
||||
private static readonly Hashtable map = new Hashtable();
|
||||
|
||||
static IanaEncodings() {
|
||||
// add IANA to .NET encoding mappings.
|
||||
map["CP037"] = 37;
|
||||
map["CSIBM037"] = 37;
|
||||
map["EBCDIC-CP-CA"] = 37;
|
||||
map["EBCDIC-CP-NL"] = 37;
|
||||
map["EBCDIC-CP-US"] = 37;
|
||||
map["EBCDIC-CP-WT"] = 37;
|
||||
map["IBM037"] = 37;
|
||||
map["CP437"] = 437;
|
||||
map["CSPC8CODEPAGE437"] = 437;
|
||||
map["IBM437"] = 437;
|
||||
map["CP500"] = 500;
|
||||
map["CSIBM500"] = 500;
|
||||
map["EBCDIC-CP-BE"] = 500;
|
||||
map["EBCDIC-CP-CH"] = 500;
|
||||
map["IBM500"] = 500;
|
||||
map["ASMO-708"] = 708;
|
||||
map["DOS-720"] = 720;
|
||||
map["IBM737"] = 737;
|
||||
map["IBM775"] = 775;
|
||||
map["CP850"] = 850;
|
||||
map["IBM850"] = 850;
|
||||
map["CP852"] = 852;
|
||||
map["IBM852"] = 852;
|
||||
map["CP855"] = 855;
|
||||
map["IBM855"] = 855;
|
||||
map["CP857"] = 857;
|
||||
map["IBM857"] = 857;
|
||||
map["CCSID00858"] = 858;
|
||||
map["CP00858"] = 858;
|
||||
map["CP858"] = 858;
|
||||
map["IBM00858"] = 858;
|
||||
map["PC-MULTILINGUAL-850+EURO"] = 858;
|
||||
map["CP860"] = 860;
|
||||
map["IBM860"] = 860;
|
||||
map["CP861"] = 861;
|
||||
map["IBM861"] = 861;
|
||||
map["CP862"] = 862;
|
||||
map["DOS-862"] = 862;
|
||||
map["IBM862"] = 862;
|
||||
map["CP863"] = 863;
|
||||
map["IBM863"] = 863;
|
||||
map["CP864"] = 864;
|
||||
map["IBM864"] = 864;
|
||||
map["CP865"] = 865;
|
||||
map["IBM865"] = 865;
|
||||
map["CP866"] = 866;
|
||||
map["IBM866"] = 866;
|
||||
map["CP869"] = 869;
|
||||
map["IBM869"] = 869;
|
||||
map["CP870"] = 870;
|
||||
map["CSIBM870"] = 870;
|
||||
map["EBCDIC-CP-ROECE"] = 870;
|
||||
map["EBCDIC-CP-YU"] = 870;
|
||||
map["IBM870"] = 870;
|
||||
map["DOS-874"] = 874;
|
||||
map["ISO-8859-11"] = 874;
|
||||
map["MS874"] = 874;
|
||||
map["TIS620"] = 874;
|
||||
map["TIS-620"] = 874;
|
||||
map["WINDOWS-874"] = 874;
|
||||
map["CP875"] = 875;
|
||||
map["CSSHIFTJIS"] = 932;
|
||||
map["CSWINDOWS31J"] = 932;
|
||||
map["MS932"] = 932;
|
||||
map["MS_KANJI"] = 932;
|
||||
map["SHIFT-JIS"] = 932;
|
||||
map["SHIFT_JIS"] = 932;
|
||||
map["SJIS"] = 932;
|
||||
map["X-MS-CP932"] = 932;
|
||||
map["X-SJIS"] = 932;
|
||||
map["CHINESE"] = 936;
|
||||
map["CN-GB"] = 936;
|
||||
map["CSGB2312"] = 936;
|
||||
map["CSGB231280"] = 936;
|
||||
map["CSISO58GB231280"] = 936;
|
||||
map["GB2312"] = 936;
|
||||
map["GB2312-80"] = 936;
|
||||
map["GB231280"] = 936;
|
||||
map["GB_2312-80"] = 936;
|
||||
map["GBK"] = 936;
|
||||
map["ISO-IR-58"] = 936;
|
||||
map["MS936"] = 936;
|
||||
map["CSKSC56011987"] = 949;
|
||||
map["ISO-IR-149"] = 949;
|
||||
map["KOREAN"] = 949;
|
||||
map["KS-C-5601"] = 949;
|
||||
map["KS-C5601"] = 949;
|
||||
map["KS_C_5601"] = 949;
|
||||
map["KS_C_5601-1987"] = 949;
|
||||
map["KS_C_5601-1989"] = 949;
|
||||
map["KS_C_5601_1987"] = 949;
|
||||
map["KSC5601"] = 949;
|
||||
map["KSC_5601"] = 949;
|
||||
map["MS949"] = 949;
|
||||
map["BIG5"] = 950;
|
||||
map["BIG5-HKSCS"] = 950;
|
||||
map["CN-BIG5"] = 950;
|
||||
map["CSBIG5"] = 950;
|
||||
map["MS950"] = 950;
|
||||
map["X-X-BIG5"] = 950;
|
||||
map["CP1026"] = 1026;
|
||||
map["CSIBM1026"] = 1026;
|
||||
map["IBM1026"] = 1026;
|
||||
map["IBM01047"] = 1047;
|
||||
map["CCSID01140"] = 1140;
|
||||
map["CP01140"] = 1140;
|
||||
map["EBCDIC-US-37+EURO"] = 1140;
|
||||
map["IBM01140"] = 1140;
|
||||
map["CCSID01141"] = 1141;
|
||||
map["CP01141"] = 1141;
|
||||
map["EBCDIC-DE-273+EURO"] = 1141;
|
||||
map["IBM01141"] = 1141;
|
||||
map["CCSID01142"] = 1142;
|
||||
map["CP01142"] = 1142;
|
||||
map["EBCDIC-DK-277+EURO"] = 1142;
|
||||
map["EBCDIC-NO-277+EURO"] = 1142;
|
||||
map["IBM01142"] = 1142;
|
||||
map["CCSID01143"] = 1143;
|
||||
map["CP01143"] = 1143;
|
||||
map["EBCDIC-FI-278+EURO"] = 1143;
|
||||
map["EBCDIC-SE-278+EURO"] = 1143;
|
||||
map["IBM01143"] = 1143;
|
||||
map["CCSID01144"] = 1144;
|
||||
map["CP01144"] = 1144;
|
||||
map["EBCDIC-IT-280+EURO"] = 1144;
|
||||
map["IBM01144"] = 1144;
|
||||
map["CCSID01145"] = 1145;
|
||||
map["CP01145"] = 1145;
|
||||
map["EBCDIC-ES-284+EURO"] = 1145;
|
||||
map["IBM01145"] = 1145;
|
||||
map["CCSID01146"] = 1146;
|
||||
map["CP01146"] = 1146;
|
||||
map["EBCDIC-GB-285+EURO"] = 1146;
|
||||
map["IBM01146"] = 1146;
|
||||
map["CCSID01147"] = 1147;
|
||||
map["CP01147"] = 1147;
|
||||
map["EBCDIC-FR-297+EURO"] = 1147;
|
||||
map["IBM01147"] = 1147;
|
||||
map["CCSID01148"] = 1148;
|
||||
map["CP01148"] = 1148;
|
||||
map["EBCDIC-INTERNATIONAL-500+EURO"] = 1148;
|
||||
map["IBM01148"] = 1148;
|
||||
map["CCSID01149"] = 1149;
|
||||
map["CP01149"] = 1149;
|
||||
map["EBCDIC-IS-871+EURO"] = 1149;
|
||||
map["IBM01149"] = 1149;
|
||||
map["ISO-10646-UCS-2"] = 1200;
|
||||
map["UCS-2"] = 1200;
|
||||
map["UNICODE"] = 1200;
|
||||
map["UTF-16"] = 1200;
|
||||
map["UTF-16LE"] = 1200;
|
||||
map["UNICODELITTLEUNMARKED"] = 1200;
|
||||
map["UNICODELITTLE"] = 1200;
|
||||
map["UNICODEFFFE"] = 1201;
|
||||
map["UTF-16BE"] = 1201;
|
||||
map["UNICODEBIGUNMARKED"] = 1201;
|
||||
map["UNICODEBIG"] = 1201;
|
||||
map["CP1250"] = 1250;
|
||||
map["WINDOWS-1250"] = 1250;
|
||||
map["X-CP1250"] = 1250;
|
||||
map["CP1251"] = 1251;
|
||||
map["WINDOWS-1251"] = 1251;
|
||||
map["X-CP1251"] = 1251;
|
||||
map["CP1252"] = 1252;
|
||||
map["WINDOWS-1252"] = 1252;
|
||||
map["X-ANSI"] = 1252;
|
||||
map["CP1253"] = 1253;
|
||||
map["WINDOWS-1253"] = 1253;
|
||||
map["CP1254"] = 1254;
|
||||
map["WINDOWS-1254"] = 1254;
|
||||
map["CP1255"] = 1255;
|
||||
map["WINDOWS-1255"] = 1255;
|
||||
map["CP1256"] = 1256;
|
||||
map["WINDOWS-1256"] = 1256;
|
||||
map["CP1257"] = 1257;
|
||||
map["WINDOWS-1257"] = 1257;
|
||||
map["CP1258"] = 1258;
|
||||
map["WINDOWS-1258"] = 1258;
|
||||
map["JOHAB"] = 1361;
|
||||
map["MACINTOSH"] = 10000;
|
||||
map["MACROMAN"] = 10000;
|
||||
map["X-MAC-JAPANESE"] = 10001;
|
||||
map["X-MAC-CHINESETRAD"] = 10002;
|
||||
map["X-MAC-KOREAN"] = 10003;
|
||||
map["MACARABIC"] = 10004;
|
||||
map["X-MAC-ARABIC"] = 10004;
|
||||
map["MACHEBREW"] = 10005;
|
||||
map["X-MAC-HEBREW"] = 10005;
|
||||
map["MACGREEK"] = 10006;
|
||||
map["X-MAC-GREEK"] = 10006;
|
||||
map["MACCYRILLIC"] = 10007;
|
||||
map["X-MAC-CYRILLIC"] = 10007;
|
||||
map["X-MAC-CHINESESIMP"] = 10008;
|
||||
map["MACROMANIA"] = 10010;
|
||||
map["MACROMANIAN"] = 10010;
|
||||
map["X-MAC-ROMANIAN"] = 10010;
|
||||
map["MACUKRAINE"] = 10017;
|
||||
map["MACUKRAINIAN"] = 10017;
|
||||
map["X-MAC-UKRAINIAN"] = 10017;
|
||||
map["MACTHAI"] = 10021;
|
||||
map["X-MAC-THAI"] = 10021;
|
||||
map["MACCENTRALEUROPE"] = 10029;
|
||||
map["X-MAC-CE"] = 10029;
|
||||
map["MACICELANDIC"] = 10079;
|
||||
map["MACICELAND"] = 10079;
|
||||
map["X-MAC-ICELANDIC"] = 10079;
|
||||
map["MACTURKISH"] = 10081;
|
||||
map["X-MAC-TURKISH"] = 10081;
|
||||
map["MACCROATIAN"] = 10082;
|
||||
map["X-MAC-CROATIAN"] = 10082;
|
||||
map["X-CHINESE-CNS"] = 20000;
|
||||
map["X-CP20001"] = 20001;
|
||||
map["X-CHINESE-ETEN"] = 20002;
|
||||
map["X-CP20003"] = 20003;
|
||||
map["X-CP20004"] = 20004;
|
||||
map["X-CP20005"] = 20005;
|
||||
map["IRV"] = 20105;
|
||||
map["X-IA5"] = 20105;
|
||||
map["DIN_66003"] = 20106;
|
||||
map["GERMAN"] = 20106;
|
||||
map["X-IA5-GERMAN"] = 20106;
|
||||
map["SEN_850200_B"] = 20107;
|
||||
map["SWEDISH"] = 20107;
|
||||
map["X-IA5-SWEDISH"] = 20107;
|
||||
map["NORWEGIAN"] = 20108;
|
||||
map["NS_4551-1"] = 20108;
|
||||
map["X-IA5-NORWEGIAN"] = 20108;
|
||||
map["ANSI_X3.4-1968"] = 20127;
|
||||
map["ANSI_X3.4-1986"] = 20127;
|
||||
map["ASCII"] = 20127;
|
||||
map["CP367"] = 20127;
|
||||
map["CSASCII"] = 20127;
|
||||
map["IBM367"] = 20127;
|
||||
map["ISO-IR-6"] = 20127;
|
||||
map["ISO646-US"] = 20127;
|
||||
map["ISO_646.IRV:1991"] = 20127;
|
||||
map["US"] = 20127;
|
||||
map["US-ASCII"] = 20127;
|
||||
map["X-CP20261"] = 20261;
|
||||
map["X-CP20269"] = 20269;
|
||||
map["CP273"] = 20273;
|
||||
map["CSIBM273"] = 20273;
|
||||
map["IBM273"] = 20273;
|
||||
map["CSIBM277"] = 20277;
|
||||
map["EBCDIC-CP-DK"] = 20277;
|
||||
map["EBCDIC-CP-NO"] = 20277;
|
||||
map["IBM277"] = 20277;
|
||||
map["CP278"] = 20278;
|
||||
map["CSIBM278"] = 20278;
|
||||
map["EBCDIC-CP-FI"] = 20278;
|
||||
map["EBCDIC-CP-SE"] = 20278;
|
||||
map["IBM278"] = 20278;
|
||||
map["CP280"] = 20280;
|
||||
map["CSIBM280"] = 20280;
|
||||
map["EBCDIC-CP-IT"] = 20280;
|
||||
map["IBM280"] = 20280;
|
||||
map["CP284"] = 20284;
|
||||
map["CSIBM284"] = 20284;
|
||||
map["EBCDIC-CP-ES"] = 20284;
|
||||
map["IBM284"] = 20284;
|
||||
map["CP285"] = 20285;
|
||||
map["CSIBM285"] = 20285;
|
||||
map["EBCDIC-CP-GB"] = 20285;
|
||||
map["IBM285"] = 20285;
|
||||
map["CP290"] = 20290;
|
||||
map["CSIBM290"] = 20290;
|
||||
map["EBCDIC-JP-KANA"] = 20290;
|
||||
map["IBM290"] = 20290;
|
||||
map["CP297"] = 20297;
|
||||
map["CSIBM297"] = 20297;
|
||||
map["EBCDIC-CP-FR"] = 20297;
|
||||
map["IBM297"] = 20297;
|
||||
map["CP420"] = 20420;
|
||||
map["CSIBM420"] = 20420;
|
||||
map["EBCDIC-CP-AR1"] = 20420;
|
||||
map["IBM420"] = 20420;
|
||||
map["CP423"] = 20423;
|
||||
map["CSIBM423"] = 20423;
|
||||
map["EBCDIC-CP-GR"] = 20423;
|
||||
map["IBM423"] = 20423;
|
||||
map["CP424"] = 20424;
|
||||
map["CSIBM424"] = 20424;
|
||||
map["EBCDIC-CP-HE"] = 20424;
|
||||
map["IBM424"] = 20424;
|
||||
map["X-EBCDIC-KOREANEXTENDED"] = 20833;
|
||||
map["CSIBMTHAI"] = 20838;
|
||||
map["IBM-THAI"] = 20838;
|
||||
map["CSKOI8R"] = 20866;
|
||||
map["KOI"] = 20866;
|
||||
map["KOI8"] = 20866;
|
||||
map["KOI8-R"] = 20866;
|
||||
map["KOI8R"] = 20866;
|
||||
map["CP871"] = 20871;
|
||||
map["CSIBM871"] = 20871;
|
||||
map["EBCDIC-CP-IS"] = 20871;
|
||||
map["IBM871"] = 20871;
|
||||
map["CP880"] = 20880;
|
||||
map["CSIBM880"] = 20880;
|
||||
map["EBCDIC-CYRILLIC"] = 20880;
|
||||
map["IBM880"] = 20880;
|
||||
map["CP905"] = 20905;
|
||||
map["CSIBM905"] = 20905;
|
||||
map["EBCDIC-CP-TR"] = 20905;
|
||||
map["IBM905"] = 20905;
|
||||
map["CCSID00924"] = 20924;
|
||||
map["CP00924"] = 20924;
|
||||
map["EBCDIC-LATIN9--EURO"] = 20924;
|
||||
map["IBM00924"] = 20924;
|
||||
map["X-CP20936"] = 20936;
|
||||
map["X-CP20949"] = 20949;
|
||||
map["CP1025"] = 21025;
|
||||
map["X-CP21027"] = 21027;
|
||||
map["KOI8-RU"] = 21866;
|
||||
map["KOI8-U"] = 21866;
|
||||
map["CP819"] = 28591;
|
||||
map["CSISOLATIN1"] = 28591;
|
||||
map["IBM819"] = 28591;
|
||||
map["ISO-8859-1"] = 28591;
|
||||
map["ISO-IR-100"] = 28591;
|
||||
map["ISO8859-1"] = 28591;
|
||||
map["ISO_8859-1"] = 28591;
|
||||
map["ISO_8859-1:1987"] = 28591;
|
||||
map["L1"] = 28591;
|
||||
map["LATIN1"] = 28591;
|
||||
map["CSISOLATIN2"] = 28592;
|
||||
map["ISO-8859-2"] = 28592;
|
||||
map["ISO-IR-101"] = 28592;
|
||||
map["ISO8859-2"] = 28592;
|
||||
map["ISO_8859-2"] = 28592;
|
||||
map["ISO_8859-2:1987"] = 28592;
|
||||
map["L2"] = 28592;
|
||||
map["LATIN2"] = 28592;
|
||||
map["CSISOLATIN3"] = 28593;
|
||||
map["ISO-8859-3"] = 28593;
|
||||
map["ISO-IR-109"] = 28593;
|
||||
map["ISO_8859-3"] = 28593;
|
||||
map["ISO_8859-3:1988"] = 28593;
|
||||
map["L3"] = 28593;
|
||||
map["LATIN3"] = 28593;
|
||||
map["CSISOLATIN4"] = 28594;
|
||||
map["ISO-8859-4"] = 28594;
|
||||
map["ISO-IR-110"] = 28594;
|
||||
map["ISO_8859-4"] = 28594;
|
||||
map["ISO_8859-4:1988"] = 28594;
|
||||
map["L4"] = 28594;
|
||||
map["LATIN4"] = 28594;
|
||||
map["CSISOLATINCYRILLIC"] = 28595;
|
||||
map["CYRILLIC"] = 28595;
|
||||
map["ISO-8859-5"] = 28595;
|
||||
map["ISO-IR-144"] = 28595;
|
||||
map["ISO_8859-5"] = 28595;
|
||||
map["ISO_8859-5:1988"] = 28595;
|
||||
map["ARABIC"] = 28596;
|
||||
map["CSISOLATINARABIC"] = 28596;
|
||||
map["ECMA-114"] = 28596;
|
||||
map["ISO-8859-6"] = 28596;
|
||||
map["ISO-IR-127"] = 28596;
|
||||
map["ISO_8859-6"] = 28596;
|
||||
map["ISO_8859-6:1987"] = 28596;
|
||||
map["CSISOLATINGREEK"] = 28597;
|
||||
map["ECMA-118"] = 28597;
|
||||
map["ELOT_928"] = 28597;
|
||||
map["GREEK"] = 28597;
|
||||
map["GREEK8"] = 28597;
|
||||
map["ISO-8859-7"] = 28597;
|
||||
map["ISO-IR-126"] = 28597;
|
||||
map["ISO_8859-7"] = 28597;
|
||||
map["ISO_8859-7:1987"] = 28597;
|
||||
map["CSISOLATINHEBREW"] = 28598;
|
||||
map["HEBREW"] = 28598;
|
||||
map["ISO-8859-8"] = 28598;
|
||||
map["ISO-IR-138"] = 28598;
|
||||
map["ISO_8859-8"] = 28598;
|
||||
map["ISO_8859-8:1988"] = 28598;
|
||||
map["LOGICAL"] = 28598;
|
||||
map["VISUAL"] = 28598;
|
||||
map["CSISOLATIN5"] = 28599;
|
||||
map["ISO-8859-9"] = 28599;
|
||||
map["ISO-IR-148"] = 28599;
|
||||
map["ISO_8859-9"] = 28599;
|
||||
map["ISO_8859-9:1989"] = 28599;
|
||||
map["L5"] = 28599;
|
||||
map["LATIN5"] = 28599;
|
||||
map["ISO-8859-13"] = 28603;
|
||||
map["CSISOLATIN9"] = 28605;
|
||||
map["ISO-8859-15"] = 28605;
|
||||
map["ISO_8859-15"] = 28605;
|
||||
map["L9"] = 28605;
|
||||
map["LATIN9"] = 28605;
|
||||
map["X-EUROPA"] = 29001;
|
||||
map["ISO-8859-8-I"] = 38598;
|
||||
map["ISO-2022-JP"] = 50220;
|
||||
map["CSISO2022JP"] = 50221;
|
||||
map["CSISO2022KR"] = 50225;
|
||||
map["ISO-2022-KR"] = 50225;
|
||||
map["ISO-2022-KR-7"] = 50225;
|
||||
map["ISO-2022-KR-7BIT"] = 50225;
|
||||
map["CP50227"] = 50227;
|
||||
map["X-CP50227"] = 50227;
|
||||
map["CP930"] = 50930;
|
||||
map["X-EBCDIC-JAPANESEANDUSCANADA"] = 50931;
|
||||
map["CP933"] = 50933;
|
||||
map["CP935"] = 50935;
|
||||
map["CP937"] = 50937;
|
||||
map["CP939"] = 50939;
|
||||
map["CSEUCPKDFMTJAPANESE"] = 51932;
|
||||
map["EUC-JP"] = 51932;
|
||||
map["EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE"] = 51932;
|
||||
map["ISO-2022-JPEUC"] = 51932;
|
||||
map["X-EUC"] = 51932;
|
||||
map["X-EUC-JP"] = 51932;
|
||||
map["EUC-CN"] = 51936;
|
||||
map["X-EUC-CN"] = 51936;
|
||||
map["CSEUCKR"] = 51949;
|
||||
map["EUC-KR"] = 51949;
|
||||
map["ISO-2022-KR-8"] = 51949;
|
||||
map["ISO-2022-KR-8BIT"] = 51949;
|
||||
map["HZ-GB-2312"] = 52936;
|
||||
map["GB18030"] = 54936;
|
||||
map["X-ISCII-DE"] = 57002;
|
||||
map["X-ISCII-BE"] = 57003;
|
||||
map["X-ISCII-TA"] = 57004;
|
||||
map["X-ISCII-TE"] = 57005;
|
||||
map["X-ISCII-AS"] = 57006;
|
||||
map["X-ISCII-OR"] = 57007;
|
||||
map["X-ISCII-KA"] = 57008;
|
||||
map["X-ISCII-MA"] = 57009;
|
||||
map["X-ISCII-GU"] = 57010;
|
||||
map["X-ISCII-PA"] = 57011;
|
||||
map["CSUNICODE11UTF7"] = 65000;
|
||||
map["UNICODE-1-1-UTF-7"] = 65000;
|
||||
map["UNICODE-2-0-UTF-7"] = 65000;
|
||||
map["UTF-7"] = 65000;
|
||||
map["X-UNICODE-1-1-UTF-7"] = 65000;
|
||||
map["X-UNICODE-2-0-UTF-7"] = 65000;
|
||||
map["UNICODE-1-1-UTF-8"] = 65001;
|
||||
map["UNICODE-2-0-UTF-8"] = 65001;
|
||||
map["UTF-8"] = 65001;
|
||||
map["X-UNICODE-1-1-UTF-8"] = 65001;
|
||||
map["X-UNICODE-2-0-UTF-8"] = 65001;
|
||||
}
|
||||
|
||||
public static int GetEncodingNumber(string name) {
|
||||
object n = map[name.ToUpper(System.Globalization.CultureInfo.InvariantCulture)];
|
||||
if (n == null)
|
||||
return 0;
|
||||
return (int)n;
|
||||
}
|
||||
|
||||
public static Encoding GetEncodingEncoding(string name) {
|
||||
String nameU = name.ToUpper(System.Globalization.CultureInfo.InvariantCulture);
|
||||
if (nameU.Equals("UNICODEBIGUNMARKED"))
|
||||
return new UnicodeEncoding(true, false);
|
||||
if (nameU.Equals("UNICODEBIG"))
|
||||
return new UnicodeEncoding(true, true);
|
||||
if (nameU.Equals("UNICODELITTLEUNMARKED"))
|
||||
return new UnicodeEncoding(false, false);
|
||||
if (nameU.Equals("UNICODELITTLE"))
|
||||
return new UnicodeEncoding(false, true);
|
||||
if (map.ContainsKey(nameU))
|
||||
return Encoding.GetEncoding((int)map[nameU]);
|
||||
else
|
||||
return Encoding.GetEncoding(name);
|
||||
}
|
||||
}
|
||||
}
|
740
iTechSharp/iTextSharp/text/xml/simpleparser/SimpleXMLParser.cs
Normal file
740
iTechSharp/iTextSharp/text/xml/simpleparser/SimpleXMLParser.cs
Normal file
@@ -0,0 +1,740 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using System.Collections;
|
||||
using System.Globalization;
|
||||
/*
|
||||
* Copyright 2003 Paulo Soares
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
||||
* (the "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the License.
|
||||
*
|
||||
* The Original Code is 'iText, a free JAVA-PDF library'.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
|
||||
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
|
||||
* All Rights Reserved.
|
||||
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
|
||||
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): all the names of the contributors are added in the source code
|
||||
* where applicable.
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of the
|
||||
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
|
||||
* provisions of LGPL are applicable instead of those above. If you wish to
|
||||
* allow use of your version of this file only under the terms of the LGPL
|
||||
* License and not to allow others to use your version of this file under
|
||||
* the MPL, indicate your decision by deleting the provisions above and
|
||||
* replace them with the notice and other provisions required by the LGPL.
|
||||
* If you do not delete the provisions above, a recipient may use your version
|
||||
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the MPL as stated above or under the terms of the GNU
|
||||
* Library General Public License as published by the Free Software Foundation;
|
||||
* either version 2 of the License, or any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
|
||||
* details.
|
||||
*
|
||||
* If you didn't download this code from the following link, you should check if
|
||||
* you aren't using an obsolete version:
|
||||
* http://www.lowagie.com/iText/
|
||||
*
|
||||
* The code to recognize the encoding in this class and in the convenience class IanaEncodings was taken from Apache Xerces published under the following license:
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Part of this code is based on the Quick-and-Dirty XML parser by Steven Brandt.
|
||||
* The code for the Quick-and-Dirty parser was published in JavaWorld (java tip 128).
|
||||
* Steven Brandt and JavaWorld gave permission to use the code for free.
|
||||
* (Bruno Lowagie and Paulo Soares chose to use it under the MPL/LGPL in
|
||||
* conformance with the rest of the code).
|
||||
* The original code can be found on this url: <A HREF="http://www.javaworld.com/javatips/jw-javatip128_p.html">http://www.javaworld.com/javatips/jw-javatip128_p.html</A>.
|
||||
* It was substantially refactored by Bruno Lowagie.
|
||||
*
|
||||
* The method 'private static String getEncodingName(byte[] b4)' was found
|
||||
* in org.apache.xerces.impl.XMLEntityManager, originaly published by the
|
||||
* Apache Software Foundation under the Apache Software License; now being
|
||||
* used in iText under the MPL.
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.xml.simpleparser {
|
||||
/**
|
||||
* A simple XML and HTML parser. This parser is, like the SAX parser,
|
||||
* an event based parser, but with much less functionality.
|
||||
* <p>
|
||||
* The parser can:
|
||||
* <p>
|
||||
* <ul>
|
||||
* <li>It recognizes the encoding used
|
||||
* <li>It recognizes all the elements' start tags and end tags
|
||||
* <li>It lists attributes, where attribute values can be enclosed in single or double quotes
|
||||
* <li>It recognizes the <code><[CDATA[ ... ]]></code> construct
|
||||
* <li>It recognizes the standard entities: &amp;, &lt;, &gt;, &quot;, and &apos;, as well as numeric entities
|
||||
* <li>It maps lines ending in <code>\r\n</code> and <code>\r</code> to <code>\n</code> on input, in accordance with the XML Specification, Section 2.11
|
||||
* </ul>
|
||||
* <p>
|
||||
* The code is based on <A HREF="http://www.javaworld.com/javaworld/javatips/javatip128/">
|
||||
* http://www.javaworld.com/javaworld/javatips/javatip128/</A> with some extra
|
||||
* code from XERCES to recognize the encoding.
|
||||
*/
|
||||
public sealed class SimpleXMLParser {
|
||||
/** possible states */
|
||||
private const int UNKNOWN = 0;
|
||||
private const int TEXT = 1;
|
||||
private const int TAG_ENCOUNTERED = 2;
|
||||
private const int EXAMIN_TAG = 3;
|
||||
private const int TAG_EXAMINED = 4;
|
||||
private const int IN_CLOSETAG = 5;
|
||||
private const int SINGLE_TAG = 6;
|
||||
private const int CDATA = 7;
|
||||
private const int COMMENT = 8;
|
||||
private const int PI = 9;
|
||||
private const int ENTITY = 10;
|
||||
private const int QUOTE = 11;
|
||||
private const int ATTRIBUTE_KEY = 12;
|
||||
private const int ATTRIBUTE_EQUAL = 13;
|
||||
private const int ATTRIBUTE_VALUE = 14;
|
||||
|
||||
/** the state stack */
|
||||
internal Stack stack;
|
||||
/** The current character. */
|
||||
internal int character = 0;
|
||||
/** The previous character. */
|
||||
internal int previousCharacter = -1;
|
||||
/** the line we are currently reading */
|
||||
internal int lines = 1;
|
||||
/** the column where the current character occurs */
|
||||
internal int columns = 0;
|
||||
/** was the last character equivalent to a newline? */
|
||||
internal bool eol = false;
|
||||
/** the current state */
|
||||
internal int state;
|
||||
/** Are we parsing HTML? */
|
||||
internal bool html;
|
||||
/** current text (whatever is encountered between tags) */
|
||||
internal StringBuilder text = new StringBuilder();
|
||||
/** current entity (whatever is encountered between & and ;) */
|
||||
internal StringBuilder entity = new StringBuilder();
|
||||
/** current tagname */
|
||||
internal String tag = null;
|
||||
/** current attributes */
|
||||
internal Hashtable attributes = null;
|
||||
/** The handler to which we are going to forward document content */
|
||||
internal ISimpleXMLDocHandler doc;
|
||||
/** The handler to which we are going to forward comments. */
|
||||
internal ISimpleXMLDocHandlerComment comment;
|
||||
/** Keeps track of the number of tags that are open. */
|
||||
internal int nested = 0;
|
||||
/** the quote character that was used to open the quote. */
|
||||
internal int quoteCharacter = '"';
|
||||
/** the attribute key. */
|
||||
internal String attributekey = null;
|
||||
/** the attribute value. */
|
||||
internal String attributevalue = null;
|
||||
|
||||
/**
|
||||
* Creates a Simple XML parser object.
|
||||
* Call Go(BufferedReader) immediately after creation.
|
||||
*/
|
||||
private SimpleXMLParser(ISimpleXMLDocHandler doc, ISimpleXMLDocHandlerComment comment, bool html) {
|
||||
this.doc = doc;
|
||||
this.comment = comment;
|
||||
this.html = html;
|
||||
stack = new Stack();
|
||||
state = html ? TEXT : UNKNOWN;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the actual parsing. Perform this immediately
|
||||
* after creating the parser object.
|
||||
*/
|
||||
private void Go(TextReader reader) {
|
||||
doc.StartDocument();
|
||||
while (true) {
|
||||
// read a new character
|
||||
if (previousCharacter == -1) {
|
||||
character = reader.Read();
|
||||
}
|
||||
// or re-examin the previous character
|
||||
else {
|
||||
character = previousCharacter;
|
||||
previousCharacter = -1;
|
||||
}
|
||||
|
||||
// the end of the file was reached
|
||||
if (character == -1) {
|
||||
if (html) {
|
||||
if (html && state == TEXT)
|
||||
Flush();
|
||||
doc.EndDocument();
|
||||
} else {
|
||||
ThrowException("Missing end tag");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// dealing with \n and \r
|
||||
if (character == '\n' && eol) {
|
||||
eol = false;
|
||||
continue;
|
||||
} else if (eol) {
|
||||
eol = false;
|
||||
} else if (character == '\n') {
|
||||
lines++;
|
||||
columns = 0;
|
||||
} else if (character == '\r') {
|
||||
eol = true;
|
||||
character = '\n';
|
||||
lines++;
|
||||
columns = 0;
|
||||
} else {
|
||||
columns++;
|
||||
}
|
||||
|
||||
switch (state) {
|
||||
// we are in an unknown state before there's actual content
|
||||
case UNKNOWN:
|
||||
if (character == '<') {
|
||||
SaveState(TEXT);
|
||||
state = TAG_ENCOUNTERED;
|
||||
}
|
||||
break;
|
||||
// we can encounter any content
|
||||
case TEXT:
|
||||
if (character == '<') {
|
||||
Flush();
|
||||
SaveState(state);
|
||||
state = TAG_ENCOUNTERED;
|
||||
} else if (character == '&') {
|
||||
SaveState(state);
|
||||
entity.Length = 0;
|
||||
state = ENTITY;
|
||||
} else
|
||||
text.Append((char)character);
|
||||
break;
|
||||
// we have just seen a < and are wondering what we are looking at
|
||||
// <foo>, </foo>, <!-- ... --->, etc.
|
||||
case TAG_ENCOUNTERED:
|
||||
InitTag();
|
||||
if (character == '/') {
|
||||
state = IN_CLOSETAG;
|
||||
} else if (character == '?') {
|
||||
RestoreState();
|
||||
state = PI;
|
||||
} else {
|
||||
text.Append((char)character);
|
||||
state = EXAMIN_TAG;
|
||||
}
|
||||
break;
|
||||
// we are processing something like this <foo ... >.
|
||||
// It could still be a <!-- ... --> or something.
|
||||
case EXAMIN_TAG:
|
||||
if (character == '>') {
|
||||
DoTag();
|
||||
ProcessTag(true);
|
||||
InitTag();
|
||||
state = RestoreState();
|
||||
} else if (character == '/') {
|
||||
state = SINGLE_TAG;
|
||||
} else if (character == '-' && text.ToString().Equals("!-")) {
|
||||
Flush();
|
||||
state = COMMENT;
|
||||
} else if (character == '[' && text.ToString().Equals("![CDATA")) {
|
||||
Flush();
|
||||
state = CDATA;
|
||||
} else if (character == 'E' && text.ToString().Equals("!DOCTYP")) {
|
||||
Flush();
|
||||
state = PI;
|
||||
} else if (char.IsWhiteSpace((char)character)) {
|
||||
DoTag();
|
||||
state = TAG_EXAMINED;
|
||||
} else {
|
||||
text.Append((char)character);
|
||||
}
|
||||
break;
|
||||
// we know the name of the tag now.
|
||||
case TAG_EXAMINED:
|
||||
if (character == '>') {
|
||||
ProcessTag(true);
|
||||
InitTag();
|
||||
state = RestoreState();
|
||||
} else if (character == '/') {
|
||||
state = SINGLE_TAG;
|
||||
} else if (char.IsWhiteSpace((char)character)) {
|
||||
// empty
|
||||
} else {
|
||||
text.Append((char)character);
|
||||
state = ATTRIBUTE_KEY;
|
||||
}
|
||||
break;
|
||||
|
||||
// we are processing a closing tag: e.g. </foo>
|
||||
case IN_CLOSETAG:
|
||||
if (character == '>') {
|
||||
DoTag();
|
||||
ProcessTag(false);
|
||||
if (!html && nested==0) return;
|
||||
state = RestoreState();
|
||||
} else {
|
||||
if (!char.IsWhiteSpace((char)character))
|
||||
text.Append((char)character);
|
||||
}
|
||||
break;
|
||||
|
||||
// we have just seen something like this: <foo a="b"/
|
||||
// and are looking for the final >.
|
||||
case SINGLE_TAG:
|
||||
if (character != '>')
|
||||
ThrowException("Expected > for tag: <"+tag+"/>");
|
||||
DoTag();
|
||||
ProcessTag(true);
|
||||
ProcessTag(false);
|
||||
InitTag();
|
||||
if (!html && nested==0) {
|
||||
doc.EndDocument();
|
||||
return;
|
||||
}
|
||||
state = RestoreState();
|
||||
break;
|
||||
|
||||
// we are processing CDATA
|
||||
case CDATA:
|
||||
if (character == '>'
|
||||
&& text.ToString().EndsWith("]]")) {
|
||||
text.Length = text.Length - 2;
|
||||
Flush();
|
||||
state = RestoreState();
|
||||
} else
|
||||
text.Append((char)character);
|
||||
break;
|
||||
|
||||
// we are processing a comment. We are inside
|
||||
// the <!-- .... --> looking for the -->.
|
||||
case COMMENT:
|
||||
if (character == '>'
|
||||
&& text.ToString().EndsWith("--")) {
|
||||
text.Length = text.Length - 2;
|
||||
Flush();
|
||||
state = RestoreState();
|
||||
} else
|
||||
text.Append((char)character);
|
||||
break;
|
||||
|
||||
// We are inside one of these <? ... ?> or one of these <!DOCTYPE ... >
|
||||
case PI:
|
||||
if (character == '>') {
|
||||
state = RestoreState();
|
||||
if (state == TEXT) state = UNKNOWN;
|
||||
}
|
||||
break;
|
||||
|
||||
// we are processing an entity, e.g. <, », etc.
|
||||
case ENTITY:
|
||||
if (character == ';') {
|
||||
state = RestoreState();
|
||||
String cent = entity.ToString();
|
||||
entity.Length = 0;
|
||||
char ce = EntitiesToUnicode.DecodeEntity(cent);
|
||||
if (ce == '\0')
|
||||
text.Append('&').Append(cent).Append(';');
|
||||
else
|
||||
text.Append(ce);
|
||||
} else if ((character != '#' && (character < '0' || character > '9') && (character < 'a' || character > 'z')
|
||||
&& (character < 'A' || character > 'Z')) || entity.Length >= 7) {
|
||||
state = RestoreState();
|
||||
previousCharacter = character;
|
||||
text.Append('&').Append(entity.ToString());
|
||||
entity.Length = 0;
|
||||
}
|
||||
else {
|
||||
entity.Append((char)character);
|
||||
}
|
||||
break;
|
||||
// We are processing the quoted right-hand side of an element's attribute.
|
||||
case QUOTE:
|
||||
if (html && quoteCharacter == ' ' && character == '>') {
|
||||
Flush();
|
||||
ProcessTag(true);
|
||||
InitTag();
|
||||
state = RestoreState();
|
||||
}
|
||||
else if (html && quoteCharacter == ' ' && char.IsWhiteSpace((char)character)) {
|
||||
Flush();
|
||||
state = TAG_EXAMINED;
|
||||
}
|
||||
else if (html && quoteCharacter == ' ') {
|
||||
text.Append((char)character);
|
||||
}
|
||||
else if (character == quoteCharacter) {
|
||||
Flush();
|
||||
state = TAG_EXAMINED;
|
||||
} else if (" \r\n\u0009".IndexOf((char)character)>=0) {
|
||||
text.Append(' ');
|
||||
} else if (character == '&') {
|
||||
SaveState(state);
|
||||
state = ENTITY;
|
||||
entity.Length = 0;
|
||||
} else {
|
||||
text.Append((char)character);
|
||||
}
|
||||
break;
|
||||
|
||||
case ATTRIBUTE_KEY:
|
||||
if (char.IsWhiteSpace((char)character)) {
|
||||
Flush();
|
||||
state = ATTRIBUTE_EQUAL;
|
||||
} else if (character == '=') {
|
||||
Flush();
|
||||
state = ATTRIBUTE_VALUE;
|
||||
} else if (html && character == '>') {
|
||||
text.Length = 0;
|
||||
ProcessTag(true);
|
||||
InitTag();
|
||||
state = RestoreState();
|
||||
} else {
|
||||
text.Append((char)character);
|
||||
}
|
||||
break;
|
||||
|
||||
case ATTRIBUTE_EQUAL:
|
||||
if (character == '=') {
|
||||
state = ATTRIBUTE_VALUE;
|
||||
} else if (char.IsWhiteSpace((char)character)) {
|
||||
// empty
|
||||
} else if (html && character == '>') {
|
||||
text.Length = 0;
|
||||
ProcessTag(true);
|
||||
InitTag();
|
||||
state = RestoreState();
|
||||
} else if (html && character == '/') {
|
||||
Flush();
|
||||
state = SINGLE_TAG;
|
||||
} else if (html) {
|
||||
Flush();
|
||||
text.Append((char)character);
|
||||
state = ATTRIBUTE_KEY;
|
||||
} else {
|
||||
ThrowException("Error in attribute processing.");
|
||||
}
|
||||
break;
|
||||
|
||||
case ATTRIBUTE_VALUE:
|
||||
if (character == '"' || character == '\'') {
|
||||
quoteCharacter = character;
|
||||
state = QUOTE;
|
||||
} else if (char.IsWhiteSpace((char)character)) {
|
||||
// empty
|
||||
} else if (html && character == '>') {
|
||||
Flush();
|
||||
ProcessTag(true);
|
||||
InitTag();
|
||||
state = RestoreState();
|
||||
} else if (html) {
|
||||
text.Append((char)character);
|
||||
quoteCharacter = ' ';
|
||||
state = QUOTE;
|
||||
} else {
|
||||
ThrowException("Error in attribute processing");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a state from the stack
|
||||
* @return the previous state
|
||||
*/
|
||||
private int RestoreState() {
|
||||
if (stack.Count != 0)
|
||||
return (int)stack.Pop();
|
||||
else
|
||||
return UNKNOWN;
|
||||
}
|
||||
/**
|
||||
* Adds a state to the stack.
|
||||
* @param s a state to add to the stack
|
||||
*/
|
||||
private void SaveState(int s) {
|
||||
stack.Push(s);
|
||||
}
|
||||
/**
|
||||
* Flushes the text that is currently in the buffer.
|
||||
* The text can be ignored, added to the document
|
||||
* as content or as comment,... depending on the current state.
|
||||
*/
|
||||
private void Flush() {
|
||||
switch (state){
|
||||
case TEXT:
|
||||
case CDATA:
|
||||
if (text.Length > 0) {
|
||||
doc.Text(text.ToString());
|
||||
}
|
||||
break;
|
||||
case COMMENT:
|
||||
if (comment != null) {
|
||||
comment.Comment(text.ToString());
|
||||
}
|
||||
break;
|
||||
case ATTRIBUTE_KEY:
|
||||
attributekey = text.ToString();
|
||||
if (html)
|
||||
attributekey = attributekey.ToLower(CultureInfo.InvariantCulture);
|
||||
break;
|
||||
case QUOTE:
|
||||
case ATTRIBUTE_VALUE:
|
||||
attributevalue = text.ToString();
|
||||
attributes[attributekey] = attributevalue;
|
||||
break;
|
||||
default:
|
||||
// do nothing
|
||||
break;
|
||||
}
|
||||
text.Length = 0;
|
||||
}
|
||||
/**
|
||||
* Initialized the tag name and attributes.
|
||||
*/
|
||||
private void InitTag() {
|
||||
tag = null;
|
||||
attributes = new Hashtable();
|
||||
}
|
||||
/** Sets the name of the tag. */
|
||||
private void DoTag() {
|
||||
if (tag == null)
|
||||
tag = text.ToString();
|
||||
if (html)
|
||||
tag = tag.ToLower(CultureInfo.InvariantCulture);
|
||||
text.Length = 0;
|
||||
}
|
||||
/**
|
||||
* processes the tag.
|
||||
* @param start if true we are dealing with a tag that has just been opened; if false we are closing a tag.
|
||||
*/
|
||||
private void ProcessTag(bool start) {
|
||||
if (start) {
|
||||
nested++;
|
||||
doc.StartElement(tag,attributes);
|
||||
}
|
||||
else {
|
||||
nested--;
|
||||
doc.EndElement(tag);
|
||||
}
|
||||
}
|
||||
/** Throws an exception */
|
||||
private void ThrowException(String s) {
|
||||
throw new IOException(s+" near line " + lines + ", column " + columns);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the XML document firing the events to the handler.
|
||||
* @param doc the document handler
|
||||
* @param r the document. The encoding is already resolved. The reader is not closed
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public static void Parse(ISimpleXMLDocHandler doc, ISimpleXMLDocHandlerComment comment, TextReader r, bool html) {
|
||||
SimpleXMLParser parser = new SimpleXMLParser(doc, comment, html);
|
||||
parser.Go(r);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the XML document firing the events to the handler.
|
||||
* @param doc the document handler
|
||||
* @param in the document. The encoding is deduced from the stream. The stream is not closed
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public static void Parse(ISimpleXMLDocHandler doc, Stream inp) {
|
||||
byte[] b4 = new byte[4];
|
||||
int count = inp.Read(b4, 0, b4.Length);
|
||||
if (count != 4)
|
||||
throw new IOException("Insufficient length.");
|
||||
String encoding = GetEncodingName(b4);
|
||||
String decl = null;
|
||||
if (encoding.Equals("UTF-8")) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int c;
|
||||
while ((c = inp.ReadByte()) != -1) {
|
||||
if (c == '>')
|
||||
break;
|
||||
sb.Append((char)c);
|
||||
}
|
||||
decl = sb.ToString();
|
||||
}
|
||||
else if (encoding.Equals("CP037")) {
|
||||
MemoryStream bi = new MemoryStream();
|
||||
int c;
|
||||
while ((c = inp.ReadByte()) != -1) {
|
||||
if (c == 0x6e) // that's '>' in ebcdic
|
||||
break;
|
||||
bi.WriteByte((byte)c);
|
||||
}
|
||||
decl = Encoding.GetEncoding(37).GetString(bi.ToArray());//cp037 ebcdic
|
||||
}
|
||||
if (decl != null) {
|
||||
decl = GetDeclaredEncoding(decl);
|
||||
if (decl != null)
|
||||
encoding = decl;
|
||||
}
|
||||
Parse(doc, new StreamReader(inp, IanaEncodings.GetEncodingEncoding(encoding)));
|
||||
}
|
||||
|
||||
private static String GetDeclaredEncoding(String decl) {
|
||||
if (decl == null)
|
||||
return null;
|
||||
int idx = decl.IndexOf("encoding");
|
||||
if (idx < 0)
|
||||
return null;
|
||||
int idx1 = decl.IndexOf('"', idx);
|
||||
int idx2 = decl.IndexOf('\'', idx);
|
||||
if (idx1 == idx2)
|
||||
return null;
|
||||
if ((idx1 < 0 && idx2 > 0) || (idx2 > 0 && idx2 < idx1)) {
|
||||
int idx3 = decl.IndexOf('\'', idx2 + 1);
|
||||
if (idx3 < 0)
|
||||
return null;
|
||||
return decl.Substring(idx2 + 1, idx3 - (idx2 + 1));
|
||||
}
|
||||
if ((idx2 < 0 && idx1 > 0) || (idx1 > 0 && idx1 < idx2)) {
|
||||
int idx3 = decl.IndexOf('"', idx1 + 1);
|
||||
if (idx3 < 0)
|
||||
return null;
|
||||
return decl.Substring(idx1 + 1, idx3 - (idx1 + 1));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static void Parse(ISimpleXMLDocHandler doc, TextReader r) {
|
||||
Parse(doc, null, r, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Escapes a string with the appropriated XML codes.
|
||||
* @param s the string to be escaped
|
||||
* @param onlyASCII codes above 127 will always be escaped with &#nn; if <CODE>true</CODE>
|
||||
* @return the escaped string
|
||||
*/
|
||||
public static String EscapeXML(String s, bool onlyASCII) {
|
||||
char[] cc = s.ToCharArray();
|
||||
int len = cc.Length;
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int k = 0; k < len; ++k) {
|
||||
int c = cc[k];
|
||||
switch (c) {
|
||||
case '<':
|
||||
sb.Append("<");
|
||||
break;
|
||||
case '>':
|
||||
sb.Append(">");
|
||||
break;
|
||||
case '&':
|
||||
sb.Append("&");
|
||||
break;
|
||||
case '"':
|
||||
sb.Append(""");
|
||||
break;
|
||||
case '\'':
|
||||
sb.Append("'");
|
||||
break;
|
||||
default:
|
||||
if (onlyASCII && c > 127)
|
||||
sb.Append("&#").Append(c).Append(';');
|
||||
else
|
||||
sb.Append((char)c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the IANA encoding name that is auto-detected from
|
||||
* the bytes specified, with the endian-ness of that encoding where appropriate.
|
||||
* (method found in org.apache.xerces.impl.XMLEntityManager, originaly published
|
||||
* by the Apache Software Foundation under the Apache Software License; now being
|
||||
* used in iText under the MPL)
|
||||
* @param b4 The first four bytes of the input.
|
||||
* @return an IANA-encoding string
|
||||
*/
|
||||
private static String GetEncodingName(byte[] b4) {
|
||||
// UTF-16, with BOM
|
||||
int b0 = b4[0] & 0xFF;
|
||||
int b1 = b4[1] & 0xFF;
|
||||
if (b0 == 0xFE && b1 == 0xFF) {
|
||||
// UTF-16, big-endian
|
||||
return "UTF-16BE";
|
||||
}
|
||||
if (b0 == 0xFF && b1 == 0xFE) {
|
||||
// UTF-16, little-endian
|
||||
return "UTF-16LE";
|
||||
}
|
||||
|
||||
// UTF-8 with a BOM
|
||||
int b2 = b4[2] & 0xFF;
|
||||
if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
|
||||
return "UTF-8";
|
||||
}
|
||||
|
||||
// other encodings
|
||||
int b3 = b4[3] & 0xFF;
|
||||
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
|
||||
// UCS-4, big endian (1234)
|
||||
return "ISO-10646-UCS-4";
|
||||
}
|
||||
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
|
||||
// UCS-4, little endian (4321)
|
||||
return "ISO-10646-UCS-4";
|
||||
}
|
||||
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
|
||||
// UCS-4, unusual octet order (2143)
|
||||
// REVISIT: What should this be?
|
||||
return "ISO-10646-UCS-4";
|
||||
}
|
||||
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
|
||||
// UCS-4, unusual octect order (3412)
|
||||
// REVISIT: What should this be?
|
||||
return "ISO-10646-UCS-4";
|
||||
}
|
||||
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
|
||||
// UTF-16, big-endian, no BOM
|
||||
// (or could turn out to be UCS-2...
|
||||
// REVISIT: What should this be?
|
||||
return "UTF-16BE";
|
||||
}
|
||||
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
|
||||
// UTF-16, little-endian, no BOM
|
||||
// (or could turn out to be UCS-2...
|
||||
return "UTF-16LE";
|
||||
}
|
||||
if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
|
||||
// EBCDIC
|
||||
// a la xerces1, return CP037 instead of EBCDIC here
|
||||
return "CP037";
|
||||
}
|
||||
|
||||
// default encoding
|
||||
return "UTF-8";
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user