372 lines
13 KiB
C#
372 lines
13 KiB
C#
// ========================================================================
|
|
// Copyright 2006 - Volian Enterprises, Inc. All rights reserved.
|
|
// Volian Enterprises - Proprietary Information - DO NOT COPY OR DISTRIBUTE
|
|
// ------------------------------------------------------------------------
|
|
// $Workfile: $ $Revision: $
|
|
// $Author: $ $Date: $
|
|
//
|
|
// $History: $
|
|
// ========================================================================
|
|
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace DataLoader
|
|
{
|
|
public static class TextConvert
|
|
{
|
|
static TextConvert()
|
|
{
|
|
BuildDictionarySeq();
|
|
BuildDictionaryText();
|
|
}
|
|
|
|
private static Dictionary<int, int> dicChar;
|
|
public static void BuildDictionarySeq()
|
|
{
|
|
dicChar = new Dictionary<int, int>();
|
|
for (int i = 0; i < 128; i++) dicChar[i] = i;
|
|
dicChar[199] = 128; dicChar[252] = 129; dicChar[233] = 130; dicChar[226] = 131;
|
|
dicChar[228] = 132; dicChar[224] = 133; dicChar[229] = 134; dicChar[231] = 135;
|
|
dicChar[234] = 136; dicChar[235] = 137; dicChar[232] = 138; dicChar[239] = 139;
|
|
dicChar[238] = 140; dicChar[236] = 141; dicChar[196] = 142; dicChar[197] = 143;
|
|
dicChar[201] = 144; dicChar[230] = 145; dicChar[198] = 146; dicChar[244] = 147;
|
|
dicChar[246] = 148; dicChar[242] = 149; dicChar[251] = 150; dicChar[249] = 151;
|
|
dicChar[255] = 152; dicChar[214] = 153; dicChar[220] = 154; dicChar[162] = 155;
|
|
dicChar[163] = 156; dicChar[165] = 157; dicChar[8359] = 158; dicChar[402] = 159;
|
|
dicChar[225] = 160; dicChar[237] = 161; dicChar[243] = 162; dicChar[250] = 163;
|
|
dicChar[241] = 164; dicChar[209] = 165; dicChar[170] = 166; dicChar[186] = 167;
|
|
dicChar[191] = 168; dicChar[8976] = 169; dicChar[172] = 170; dicChar[189] = 171;
|
|
dicChar[188] = 172; dicChar[161] = 173; dicChar[171] = 174; dicChar[187] = 175;
|
|
dicChar[9617] = 176; dicChar[9618] = 177; dicChar[9619] = 178; dicChar[9474] = 179;
|
|
dicChar[9508] = 180; dicChar[9569] = 181; dicChar[9570] = 182; dicChar[9558] = 183;
|
|
dicChar[9557] = 184; dicChar[9571] = 185; dicChar[9553] = 186; dicChar[9559] = 187;
|
|
dicChar[9565] = 188; dicChar[9564] = 189; dicChar[9563] = 190; dicChar[9488] = 191;
|
|
dicChar[9492] = 192; dicChar[9524] = 193; dicChar[9516] = 194; dicChar[9500] = 195;
|
|
dicChar[9472] = 196; dicChar[9532] = 197; dicChar[9566] = 198; dicChar[9567] = 199;
|
|
dicChar[9562] = 200; dicChar[9556] = 201; dicChar[9577] = 202; dicChar[9574] = 203;
|
|
dicChar[9568] = 204; dicChar[9552] = 205; dicChar[9580] = 206; dicChar[9575] = 207;
|
|
dicChar[9576] = 208; dicChar[9572] = 209; dicChar[9573] = 210; dicChar[9561] = 211;
|
|
dicChar[9560] = 212; dicChar[9554] = 213; dicChar[9555] = 214; dicChar[9579] = 215;
|
|
dicChar[9578] = 216; dicChar[9496] = 217; dicChar[9484] = 218; dicChar[9608] = 219;
|
|
dicChar[9604] = 220; dicChar[9612] = 221; dicChar[9616] = 222; dicChar[9600] = 223;
|
|
dicChar[945] = 224; dicChar[223] = 225; dicChar[915] = 226; dicChar[960] = 227;
|
|
dicChar[931] = 228; dicChar[963] = 229; dicChar[181] = 230; dicChar[964] = 231;
|
|
dicChar[934] = 232; dicChar[920] = 233; dicChar[937] = 234; dicChar[948] = 235;
|
|
dicChar[8734] = 236; dicChar[966] = 237; dicChar[949] = 238; dicChar[8745] = 239;
|
|
dicChar[8801] = 240; dicChar[177] = 241; dicChar[8805] = 242; dicChar[8804] = 243;
|
|
dicChar[8992] = 244; dicChar[8993] = 245; dicChar[247] = 246; dicChar[8776] = 247;
|
|
dicChar[176] = 248; dicChar[8729] = 249; dicChar[183] = 250; dicChar[8730] = 251;
|
|
dicChar[8319] = 252; dicChar[178] = 253; dicChar[9632] = 254; dicChar[160] = 255;
|
|
}
|
|
public static string ConvertSeq(string s1)
|
|
{
|
|
Encoding Eibm437 = Encoding.GetEncoding(437);
|
|
Encoding Eunicode = Encoding.Unicode;
|
|
Decoder d = Eibm437.GetDecoder();
|
|
Byte[] bs1 = Eunicode.GetBytes(s1);
|
|
Byte[] bs2 = Encoding.Convert(Eunicode, Eibm437, bs1);
|
|
char[] cs2 = new char[Eibm437.GetCharCount(bs2)];
|
|
for (int i = 0; i < cs2.Length; i++) cs2[i] = (char)bs2[i];
|
|
return new string(cs2);
|
|
}
|
|
|
|
public static Regex Reg2;
|
|
public static void BuildDictionaryText()
|
|
{
|
|
dicChar = new Dictionary<int, int>();
|
|
dicChar[966] = 216;
|
|
dicChar[201] = 274;
|
|
dicChar[127] = 916;
|
|
dicChar[964] = 947;
|
|
dicChar[920] = 952;
|
|
dicChar[915] = 961;
|
|
dicChar[191] = 964;
|
|
dicChar[8801] = 8773;
|
|
dicChar[8734] = 8857;
|
|
dicChar[7] = 9679;
|
|
dicChar[8976] = 9830;
|
|
dicChar[9632] = 9604;
|
|
//dicChar[236] = 38914;
|
|
char[] creg = new char[dicChar.Count];
|
|
int i = 0;
|
|
foreach (int ic in dicChar.Keys)
|
|
{
|
|
creg[i] = (char)ic;
|
|
i++;
|
|
}
|
|
Reg2 = new Regex("[" + new string(creg) + "]");
|
|
}
|
|
public static string ReplaceChars(Match m)
|
|
{
|
|
char[] cs = m.Value.ToCharArray();
|
|
for (int i = 0; i < cs.Length; i++)
|
|
{
|
|
if (dicChar.ContainsKey((int)(cs[i])))
|
|
{
|
|
int iKey = (int)cs[i];
|
|
int iValue = dicChar[iKey];
|
|
cs[i] = (char)iValue;
|
|
}
|
|
}
|
|
return new string(cs);
|
|
}
|
|
public static string ConvertText(string s1, bool DoCaret)
|
|
{
|
|
string s2 = s1;
|
|
if (DoCaret) s2 = s2.Replace("^", @"\u916");
|
|
return ConvertText(s2);
|
|
}
|
|
public static string ReplaceUnicode(string s2)
|
|
{
|
|
return ReplaceUnicode(s2, false);
|
|
}
|
|
public static string ReplaceUnicode(string s2, bool DoCaret)
|
|
{
|
|
char[] tmp;
|
|
tmp = s2.ToCharArray();
|
|
s2 = s2.Replace("`", @"\'b0"); // convert backquote to degree - left over from DOS days.
|
|
s2 = s2.Replace("\xa0",@"\u160?"); // hardspace
|
|
s2 = s2.Replace("\xb0", @"\'b0"); // degree
|
|
s2 = s2.Replace("\x7f", @"\u916?"); // delta
|
|
s2 = s2.Replace("\x2265",@"\u8805?"); // greater than or equal
|
|
s2 = s2.Replace("\x2264",@"\u8804?"); // less than or equal
|
|
s2 = s2.Replace("\xB1",@"\'b1"); // plus minus
|
|
s2 = s2.Replace("\x3A3",@"\u931?"); // sigma
|
|
s2 = s2.Replace("\x3C4",@"\u947?"); // gamma
|
|
s2 = s2.Replace("\xBD",@"\'bd"); // half
|
|
s2 = s2.Replace("\x25A0",@"\u9604?"); // accum 2584
|
|
s2 = s2.Replace("\x7",@"\u9679?"); // bullet 25CF
|
|
s2 = s2.Replace("\x2248",@"\u8776?"); // approx eq
|
|
s2 = s2.Replace("\x2261",@"\u8773?"); // similar eq 2245
|
|
s2 = s2.Replace("\xF7",@"\'f7"); // division
|
|
s2 = s2.Replace("\x221A",@"\u8730?"); // square root
|
|
s2 = s2.Replace("\x393",@"\u961?"); // rho 3C1
|
|
s2 = s2.Replace("\x3C0",@"\u960?"); // pi
|
|
s2 = s2.Replace("\xb5", @"\u956?"); // micro 3BC (try e6, if not work try 109)
|
|
s2 = s2.Replace("\x3B4", @"\u948?"); // lower case delta
|
|
s2 = s2.Replace("\x3C3", @"\u963?"); // lower case sigma
|
|
s2 = s2.Replace("\xBC", @"\'bc"); // quarter
|
|
s2 = s2.Replace("\x3C6", @"\'d8"); // dist zero, D8
|
|
s2 = s2.Replace("\xC9", @"\u274?"); // energy, 112
|
|
s2 = s2.Replace("\xEC", @"\'ec"); // grave
|
|
s2 = s2.Replace("\x2502", @"\u9474?"); // bar
|
|
s2 = s2.Replace("\x3B5", @"\u949?"); // epsilon
|
|
s2 = s2.Replace("\x398", @"\u952?"); // theta, 3B8
|
|
s2 = s2.Replace("\x221E", @"\u8857?"); // dot in oval, 2299
|
|
s2 = s2.Replace("\xBF", @"\u964?"); // tau, 3C4
|
|
s2 = s2.Replace("\x2310", @"\u9830?"); // diamond, 2666
|
|
s2 = s2.Replace("\x2192", @"\u8594?");
|
|
s2 = s2.Replace("\x2190", @"\u8592?");
|
|
s2 = s2.Replace("\x2191", @"\u8593?");
|
|
s2 = s2.Replace("\x2193", @"\u8595?");
|
|
s2 = s2.Replace("\x2207", @"\u8711?");
|
|
|
|
if (DoCaret) s2 = s2.Replace("^", @"\u916");
|
|
//s2 = s2.Replace("^", @"\u916");
|
|
|
|
//s2 = ConvertDOSSuperAndSubScripts(s2);
|
|
|
|
s2 = ConvertFortranFormatToScienctificNotation(s2);
|
|
|
|
// Convert dash to a non-breaking dash. This is a unicode character.
|
|
// This character will be used in veproms rather than a dash.
|
|
s2 = s2.Replace("-", @"\u8209?");
|
|
|
|
return s2;
|
|
}
|
|
private static string ConvertFortranFormatToScienctificNotation(string str)
|
|
{
|
|
string outstr = "";
|
|
int orglen = str.Length;
|
|
int cnt = 0;
|
|
int ptr;
|
|
|
|
int nbytes;
|
|
int tstr, tstr2, rptr, start = 0;
|
|
|
|
while (cnt < orglen)
|
|
{
|
|
// position up to the the next number, sign, or period
|
|
ptr = str.IndexOfAny("+-0123456789.".ToCharArray(), cnt);
|
|
if (ptr == -1)
|
|
{
|
|
outstr += str.Substring(cnt);
|
|
break; // jump out of while loop - nothing else to process
|
|
}
|
|
if ((ptr - cnt) > 0)
|
|
{
|
|
outstr += str.Substring(cnt, ptr - cnt);
|
|
cnt = ptr;
|
|
}
|
|
|
|
if (cnt > start && str[cnt - 1] == '\'')
|
|
{
|
|
//B2003-053: only remove the single quote character
|
|
// if str ptr is not at the end of the string or
|
|
// the next char (after the str ptr) is not a space
|
|
// or newline... (as per Paul Linn on 7/17/03)
|
|
int len = orglen - cnt;
|
|
if (len <= 1 || str[cnt + 1] == ' ' || str[cnt + 1] == '\n')
|
|
start = cnt;
|
|
else
|
|
start = cnt - 1;
|
|
}
|
|
else start = cnt;
|
|
tstr = cnt;
|
|
|
|
//Skip preceeding signs
|
|
if (str[cnt] == '+' || str[cnt] == '-')
|
|
cnt++;
|
|
|
|
cnt = NextNonNumber(str, cnt);
|
|
if ((cnt < str.Length -1) && str[cnt] == '.')
|
|
{
|
|
cnt = NextNonNumber(str, cnt + 1);
|
|
if (str[start] == '\'')
|
|
{
|
|
start++;
|
|
}
|
|
else if ((cnt < str.Length -1) && (str[cnt] == 'E') && (cnt > tstr))
|
|
{
|
|
nbytes = (cnt - tstr); // don't include the 'E'
|
|
outstr += str.Substring(tstr, nbytes);
|
|
cnt++;
|
|
|
|
rptr = outstr.Length - 1;
|
|
while (outstr[rptr] == '0') rptr--;
|
|
if (outstr[rptr] != '.') rptr++;
|
|
if (rptr < (outstr.Length - 1))
|
|
outstr = outstr.Substring(0, rptr + 1); // trim trailing 0's
|
|
|
|
int poutstr = 0;
|
|
if (outstr[poutstr] == '+' || outstr[poutstr] == '-') poutstr++;
|
|
if (!outstr[poutstr].Equals("1"))
|
|
{
|
|
outstr += "x1";
|
|
}
|
|
outstr += "0\\super ";
|
|
|
|
tstr2 = cnt;
|
|
if (str[cnt] == '+' || str[cnt] == '-') cnt++;
|
|
cnt = NextNonNumber(str, cnt);
|
|
|
|
if (str[cnt] == '.' && char.IsDigit(str, cnt + 1))
|
|
cnt = NextNonNumber(str, cnt + 1);
|
|
|
|
nbytes = cnt - tstr2; // +1;
|
|
outstr += str.Substring(tstr2, nbytes);
|
|
outstr += "\\nosupersub ";
|
|
|
|
if (!char.IsLetterOrDigit(str, cnt) && !char.IsWhiteSpace(str, cnt))
|
|
return (str.Substring(tstr));
|
|
}
|
|
else if (cnt > 0)
|
|
{
|
|
outstr += str.Substring(start, cnt - start + ((cnt < str.Length) ? 1 : 0));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
outstr += str.Substring(start, cnt - start + ((cnt < str.Length)?1:0));
|
|
cnt++;
|
|
}
|
|
}
|
|
return (outstr);
|
|
}
|
|
|
|
private static int NextNonNumber(string str, int cnt)
|
|
{
|
|
int rtn = 0;
|
|
string tstr = str.Substring(cnt);
|
|
int len = tstr.Length;
|
|
while (rtn < len && char.IsDigit(tstr, rtn)) rtn++;
|
|
return rtn + cnt;
|
|
}
|
|
|
|
public static string ConvertDOSSuperAndSubScripts(string instr)
|
|
{
|
|
string outstr = "";
|
|
string tstr = instr;
|
|
int cnt = 0;
|
|
int ptr = 0;
|
|
bool issupper = false, issub = false;
|
|
|
|
while (tstr != null && (ptr = tstr.IndexOfAny("#~".ToCharArray(), cnt)) >= 0)
|
|
{
|
|
if (ptr > cnt)
|
|
outstr += tstr.Substring(cnt, ptr - cnt);
|
|
switch (tstr[ptr])
|
|
{
|
|
case '#':
|
|
if (issub || issupper)
|
|
outstr += "\\nosupersub ";
|
|
else
|
|
outstr += "\\super ";
|
|
issupper = !issupper;
|
|
issub = false;
|
|
break;
|
|
case '~':
|
|
if (issupper || issub)
|
|
outstr += "\\nosupersub ";
|
|
else
|
|
outstr += "\\sub ";
|
|
issub = !issub;
|
|
issupper = false;
|
|
break;
|
|
}
|
|
cnt = ptr + 1;
|
|
if (cnt >= tstr.Length)
|
|
tstr = null;
|
|
else
|
|
tstr = instr.Substring(cnt);
|
|
cnt = 0;
|
|
}
|
|
if (tstr != null)
|
|
outstr += tstr;
|
|
return outstr;
|
|
}
|
|
public static string ConvertText(string s1)
|
|
{
|
|
string s2 = s1;
|
|
|
|
s2 = ReplaceUnicode(s2);
|
|
// now replace underline on/off (AE,AF), super on/off (C6,C7)
|
|
// bold on/off (D5, D6), subscript on/off (D1 A6), and
|
|
// italics on/off (B2, DD)
|
|
s2 = s2.Replace("\xAB", "\\ul ");
|
|
s2 = s2.Replace("\xBB", "\\ul0 ");
|
|
s2 = s2.Replace("\x255E", "\\super ");
|
|
s2 = s2.Replace("\x255F", "\\nosupersub ");
|
|
s2 = s2.Replace("\x2552", "\\b ");
|
|
s2 = s2.Replace("\x2553", "\\b0 ");
|
|
s2 = s2.Replace("\x2564", "\\sub ");
|
|
s2 = s2.Replace("\xAA", "\\nosupersub ");
|
|
s2 = s2.Replace("\x2593", "\\i ");
|
|
s2 = s2.Replace("\x258C", "\\i0 ");
|
|
s2 = s2.Replace("\x2559", "\\ul\\b ");
|
|
s2 = s2.Replace("\x2558", "\\b0\\ul0 ");
|
|
|
|
// underline next word is 0x17
|
|
// superscript next is 0x18
|
|
// subscript next is 0x19
|
|
// bold next is 0x13
|
|
s2 = Regex.Replace(s2, @"\x17([^\x17 ]*?)(?:[\x17]|(?= )|\Z)(.*?)", @"\ul $1\ul0 $2");
|
|
s2 = Regex.Replace(s2, @"\x18([^\x18 ]*?)(?:[\x18]|(?= )|\Z)(.*?)", @"\super $1\nosupersub $2");
|
|
s2 = Regex.Replace(s2, @"\x19([^\x19 ]*?)(?:[\x19]|(?= )|\Z)(.*?)", @"\sub $1\nosupersub $2");
|
|
s2 = Regex.Replace(s2, @"\x13([^\x13 ]*?)(?:[\x13]|(?= )|\Z)(.*?)", @"\b $1\b0 $2");
|
|
|
|
s2 = Reg2.Replace(s2, new MatchEvaluator(ReplaceChars));
|
|
|
|
// Now prepend an escape character, '\', to any curly brace. The curly brace
|
|
// is used in rtf land.
|
|
s2 = s2.Replace(@"{", @"\{");
|
|
s2 = s2.Replace(@"}", @"\}");
|
|
s2 = s2.Replace("\n", @"\par "); // line break in tables
|
|
return s2;
|
|
}
|
|
}
|
|
}
|