522 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
			
		
		
	
	
			522 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
| // ========================================================================
 | |
| // Copyright 2006 - Volian Enterprises, Inc. All rights reserved.          
 | |
| // Volian Enterprises - Proprietary Information - DO NOT COPY OR DISTRIBUTE
 | |
| // ------------------------------------------------------------------------
 | |
| // $Workfile: $     $Revision: $                                           
 | |
| // $Author: $   $Date: $                                                   
 | |
| //                                                                         
 | |
| // $History: $                                                             
 | |
| // ========================================================================
 | |
| 
 | |
| using System;
 | |
| using System.Collections.Generic;
 | |
| using System.Text;
 | |
| using System.Text.RegularExpressions;
 | |
| using Volian.Base.Library;
 | |
| 
 | |
| namespace DataLoader
 | |
| {
 | |
| 	public static class TextConvert
 | |
| 	{
 | |
| 		static TextConvert()
 | |
| 		{
 | |
| 			BuildDictionarySeq();
 | |
| 			BuildDictionaryText();
 | |
| 		}
 | |
| 
 | |
| 		private static Dictionary<int, int> dicChar;
 | |
| 		public static void BuildDictionarySeq()
 | |
| 		{
 | |
| 			dicChar = new Dictionary<int, int>();
 | |
| 			for (int i = 0; i < 128; i++) dicChar[i] = i;
 | |
| 			dicChar[199] = 128; dicChar[252] = 129; dicChar[233] = 130; dicChar[226] = 131;
 | |
| 			dicChar[228] = 132; dicChar[224] = 133; dicChar[229] = 134; dicChar[231] = 135;
 | |
| 			dicChar[234] = 136; dicChar[235] = 137; dicChar[232] = 138; dicChar[239] = 139;
 | |
| 			dicChar[238] = 140; dicChar[236] = 141; dicChar[196] = 142; dicChar[197] = 143;
 | |
| 			dicChar[201] = 144; dicChar[230] = 145; dicChar[198] = 146; dicChar[244] = 147;
 | |
| 			dicChar[246] = 148; dicChar[242] = 149; dicChar[251] = 150; dicChar[249] = 151;
 | |
| 			dicChar[255] = 152; dicChar[214] = 153; dicChar[220] = 154; dicChar[162] = 155;
 | |
| 			dicChar[163] = 156; dicChar[165] = 157; dicChar[8359] = 158; dicChar[402] = 159;
 | |
| 			dicChar[225] = 160; dicChar[237] = 161; dicChar[243] = 162; dicChar[250] = 163;
 | |
| 			dicChar[241] = 164; dicChar[209] = 165; dicChar[170] = 166; dicChar[186] = 167;
 | |
| 			dicChar[191] = 168; dicChar[8976] = 169; dicChar[172] = 170; dicChar[189] = 171;
 | |
| 			dicChar[188] = 172; dicChar[161] = 173; dicChar[171] = 174; dicChar[187] = 175;
 | |
| 			dicChar[9617] = 176; dicChar[9618] = 177; dicChar[9619] = 178; dicChar[9474] = 179;
 | |
| 			dicChar[9508] = 180; dicChar[9569] = 181; dicChar[9570] = 182; dicChar[9558] = 183;
 | |
| 			dicChar[9557] = 184; dicChar[9571] = 185; dicChar[9553] = 186; dicChar[9559] = 187;
 | |
| 			dicChar[9565] = 188; dicChar[9564] = 189; dicChar[9563] = 190; dicChar[9488] = 191;
 | |
| 			dicChar[9492] = 192; dicChar[9524] = 193; dicChar[9516] = 194; dicChar[9500] = 195;
 | |
| 			dicChar[9472] = 196; dicChar[9532] = 197; dicChar[9566] = 198; dicChar[9567] = 199;
 | |
| 			dicChar[9562] = 200; dicChar[9556] = 201; dicChar[9577] = 202; dicChar[9574] = 203;
 | |
| 			dicChar[9568] = 204; dicChar[9552] = 205; dicChar[9580] = 206; dicChar[9575] = 207;
 | |
| 			dicChar[9576] = 208; dicChar[9572] = 209; dicChar[9573] = 210; dicChar[9561] = 211;
 | |
| 			dicChar[9560] = 212; dicChar[9554] = 213; dicChar[9555] = 214; dicChar[9579] = 215;
 | |
| 			dicChar[9578] = 216; dicChar[9496] = 217; dicChar[9484] = 218; dicChar[9608] = 219;
 | |
| 			dicChar[9604] = 220; dicChar[9612] = 221; dicChar[9616] = 222; dicChar[9600] = 223;
 | |
| 			dicChar[945] = 224; dicChar[223] = 225; dicChar[915] = 226; dicChar[960] = 227;
 | |
| 			dicChar[931] = 228; dicChar[963] = 229; dicChar[181] = 230; dicChar[964] = 231;
 | |
| 			dicChar[934] = 232; dicChar[920] = 233; dicChar[937] = 234; dicChar[948] = 235;
 | |
| 			dicChar[8734] = 236; dicChar[966] = 237; dicChar[949] = 238; dicChar[8745] = 239;
 | |
| 			dicChar[8801] = 240; dicChar[177] = 241; dicChar[8805] = 242; dicChar[8804] = 243;
 | |
| 			dicChar[8992] = 244; dicChar[8993] = 245; dicChar[247] = 246; dicChar[8776] = 247;
 | |
| 			dicChar[176] = 248; dicChar[8729] = 249; dicChar[183] = 250; dicChar[8730] = 251;
 | |
| 			dicChar[8319] = 252; dicChar[178] = 253; dicChar[9632] = 254; dicChar[160] = 255;
 | |
| 		}
 | |
| 		public static string ConvertSeq(string s1)
 | |
| 		{
 | |
| 			Encoding Eibm437 = Encoding.GetEncoding(437);
 | |
| 			Encoding Eunicode = Encoding.Unicode;
 | |
| 			Decoder d = Eibm437.GetDecoder();
 | |
| 			Byte[] bs1 = Eunicode.GetBytes(s1);
 | |
| 			Byte[] bs2 = Encoding.Convert(Eunicode, Eibm437, bs1);
 | |
| 			char[] cs2 = new char[Eibm437.GetCharCount(bs2)];
 | |
| 			for (int i = 0; i < cs2.Length; i++) cs2[i] = (char)bs2[i];
 | |
| 			return new string(cs2);
 | |
| 		}
 | |
| 
 | |
| 		public static Regex Reg2;
 | |
| 		public static void BuildDictionaryText()
 | |
| 		{
 | |
| 			dicChar = new Dictionary<int, int>();
 | |
| 			dicChar[966] = 216;
 | |
| 			dicChar[201] = 274;
 | |
| 			dicChar[127] = 916;
 | |
| 			dicChar[964] = 947;
 | |
| 			dicChar[920] = 952;
 | |
| 			dicChar[915] = 961;
 | |
| 			dicChar[191] = 964;
 | |
| 			dicChar[8801] = 8773;
 | |
| 			dicChar[8734] = 8857;
 | |
| 			dicChar[7] = 9679;
 | |
| 			dicChar[8976] = 9830;
 | |
| 			dicChar[9632] = 9604;
 | |
| 			//dicChar[236] = 38914;
 | |
| 			char[] creg = new char[dicChar.Count];
 | |
| 			int i = 0;
 | |
| 			foreach (int ic in dicChar.Keys)
 | |
| 			{
 | |
| 				creg[i] = (char)ic;
 | |
| 				i++;
 | |
| 			}
 | |
| 			Reg2 = new Regex("[" + new string(creg) + "]");
 | |
| 		}
 | |
| 		public static string ReplaceChars(Match m)
 | |
| 		{
 | |
| 			char[] cs = m.Value.ToCharArray();
 | |
| 			for (int i = 0; i < cs.Length; i++)
 | |
| 			{
 | |
| 				if (dicChar.ContainsKey((int)(cs[i])))
 | |
| 				{
 | |
| 					int iKey = (int)cs[i];
 | |
| 					int iValue = dicChar[iKey];
 | |
| 					cs[i] = (char)iValue;
 | |
| 				}
 | |
| 			}
 | |
| 			return new string(cs);
 | |
| 		}
 | |
| 		public static string ConvertText(string s1, bool DoCaret)
 | |
| 		{
 | |
| 			string s2 = s1;
 | |
| 			if (DoCaret) s2 = s2.Replace("^", @"\u916?");
 | |
| 			return ConvertText(s2);
 | |
| 		}
 | |
| 		private static void ShowRawString(string str, string title)
 | |
| 		{
 | |
| 			Console.WriteLine("Raw Start --{0}:\n", title);
 | |
| 			foreach (char c in str)
 | |
| 			{
 | |
| 				int ic = (int)c;
 | |
| 				if (c != '\n' && (ic > 126 || ic < 32))
 | |
| 					Console.Write("<<{0:x4}>>", ic);
 | |
| 				else
 | |
| 					Console.Write(c);
 | |
| 			}
 | |
| 			Console.WriteLine("\n-- Raw End:{0}", title);
 | |
| 		}
 | |
| 
 | |
| 		public static string ReplaceUnicode(string s2)
 | |
| 		{
 | |
| 			return ReplaceUnicode(s2, false);
 | |
| 		}
 | |
| 		public static string ReplaceUnicode(string s2, bool DoCaret)
 | |
| 		{
 | |
| 			//char[] tmp;
 | |
| 			//tmp = s2.ToCharArray();
 | |
| 			string orig = s2;
 | |
| 			//ShowRawString(s2, "ReplaceUnicode");
 | |
| 			s2 = s2.Replace("`", @"\'b0");			// convert backquote to degree - left over from DOS days.
 | |
| 			s2 = s2.Replace("\xa0",@"\u160?");		// hardspace
 | |
| 			s2 = s2.Replace("\xb0", @"\'b0");		// degree
 | |
| 			s2 = s2.Replace("\x7f", @"\u916?");		// delta 
 | |
| 			s2 = s2.Replace("\x2265",@"\u8805?");	// greater than or equal
 | |
| 			s2 = s2.Replace("\x2264",@"\u8804?");	// less than or equal
 | |
| 			s2 = s2.Replace("\xB1",@"\'b1");		// plus minus
 | |
| 			s2 = s2.Replace("\x3A3",@"\u931?");		// sigma
 | |
| 			s2 = s2.Replace("\x3C4",@"\u947?");		// gamma
 | |
| 			s2 = s2.Replace("\xBD",@"\'bd");		// half
 | |
| 			s2 = s2.Replace("\x25A0",@"\u9604?");	// accum 2584
 | |
| 			s2 = s2.Replace("\x7",@"\u9679?");		// bullet 25CF
 | |
| 			s2 = s2.Replace("\x2248",@"\u8776?");	// approx eq
 | |
| 			s2 = s2.Replace("\x2261",@"\u8773?");	// similar eq 2245
 | |
| 			s2 = s2.Replace("\xF7",@"\'f7");		// division
 | |
| 			s2 = s2.Replace("\x221A",@"\u8730?");	// square root
 | |
| 			s2 = s2.Replace("\x393",@"\u961?");		// rho 3C1
 | |
| 			s2 = s2.Replace("\x3C0",@"\u960?");		// pi
 | |
| 			s2 = s2.Replace("\xb5", @"\u956?");		// micro 3BC  (try e6, if not work try 109)
 | |
| 			s2 = s2.Replace("\x3B4", @"\u948?");	// lower case delta
 | |
| 			s2 = s2.Replace("\x3C3", @"\u963?");	// lower case sigma
 | |
| 			s2 = s2.Replace("\xBC", @"\'bc");		// quarter
 | |
| 			s2 = s2.Replace("\x256A", @"\'d8");		// dist zero, D8
 | |
| 			s2 = s2.Replace("\x3C6", @"\'d8");		// dist zero, D8
 | |
| 			s2 = s2.Replace("\xC9", @"\u274?");		// energy, 112
 | |
| 			s2 = s2.Replace("\xEC", @"\'ec");		// grave
 | |
| 			s2 = s2.Replace("\x2502", @"\u9474?");	// bar 
 | |
| 			s2 = s2.Replace("\x3B5", @"\u949?");	// epsilon	
 | |
| 			s2 = s2.Replace("\x398", @"\u952?");	// theta, 3B8
 | |
| 			s2 = s2.Replace("\x221E", @"\u8857?");	// dot in oval, 2299
 | |
| 			s2 = s2.Replace("\xBF", @"\u964?");		// tau, 3C4
 | |
| 			s2 = s2.Replace("\x2310", @"\u9830?");	// diamond, 2666
 | |
| 			s2 = s2.Replace("\x2192", @"\u8594?");
 | |
| 			s2 = s2.Replace("\x2190", @"\u8592?");
 | |
| 			s2 = s2.Replace("\x2191", @"\u8593?");
 | |
| 			s2 = s2.Replace("\x2193", @"\u8595?");
 | |
| 			s2 = s2.Replace("\x2207", @"\u8711?");
 | |
| 			s2 = s2.Replace("\x2591", @"\'b0"); // Degree Symbol
 | |
| 			s2 = s2.Replace("\xFF", @"\u8593?"); // Up Arrow
 | |
| 			s2 = s2.Replace("\xD6", @"\u8595?"); // Down Arrow
 | |
| 
 | |
| 			if (DoCaret) s2 = s2.Replace("^", @"\u916?");
 | |
| 			//s2 = s2.Replace("^", @"\u916");
 | |
| 
 | |
| 			//s2 = ConvertDOSSuperAndSubScripts(s2);
 | |
| 			string sBefore = s2;
 | |
| 			s2 = ConvertFortranFormatToScienctificNotation(s2);
 | |
| 			if (sBefore != s2)
 | |
| 				MyGlitches.Add("ConvertFortranFormatToScienctificNotation", sBefore, s2);
 | |
| 			// Convert dash to a non-breaking dash.  This is a unicode character.  
 | |
| 			// This character will be used in veproms rather than a dash.
 | |
| 			//if the dash is preceeded byte a token remove the space following the token
 | |
| //#if DEBUG
 | |
| 			if (VlnSettings.DebugMode)
 | |
| 			{
 | |
| 				if (s2.Contains(@"\super "))
 | |
| 					Console.WriteLine("RTF Super token");
 | |
| 			}
 | |
| //#endif
 | |
| 			s2 = Regex.Replace(s2, @"(\\[^ \\?]*) \-", @"$1\u8209?");
 | |
| 			//KBR s2 = s2.Replace("-", @"\u8209?");
 | |
| 			//Remove spaces between comment end nad Next token
 | |
| 			s2 = s2.Replace(@"\v0 \", @"\v0\");
 | |
| 			//Change Token Order to match RTB output
 | |
| 			s2 = s2.Replace(@"\v0\b0", @"\b0\v0");
 | |
| 			s2 = s2.Replace(@"\b0\ulnone", @"\ulnone\b0");
 | |
| 			s2 = s2.Replace(@"\par ", "\r\n");
 | |
| 			return s2;
 | |
| 		}
 | |
| 		private static DataLoaderGlitches _MyGlitches;
 | |
| 		public static DataLoaderGlitches MyGlitches
 | |
| 		{
 | |
| 			get 
 | |
| 			{
 | |
| 				if (_MyGlitches == null)
 | |
| 					_MyGlitches = new DataLoaderGlitches();
 | |
| 				return _MyGlitches; 
 | |
| 			}
 | |
| 			set { _MyGlitches = value; }
 | |
| 		}
 | |
| 		public static string ConvertFortranFormatToScienctificNotation(string str)
 | |
| 		{
 | |
| 			// Convert E style numbers to RTF with \super and \nosupersub
 | |
| 			string retval = Regex.Replace(str, "([+-]?)([0-9]+)[.]([0-9]*?)0*E([+-]?[0-9]+)", new MatchEvaluator(FixFortranNumber));
 | |
| 			return retval;
 | |
| 		}
 | |
| 
 | |
| 		public static string ConvertDOSSuperAndSubScripts(string instr)
 | |
| 		{
 | |
| 			try
 | |
| 			{
 | |
| 				string retval = Regex.Replace(instr, "[#](.*?)[#]", "\\up2 $1\\up0 ");// DOS Superscript
 | |
| 				retval = Regex.Replace(retval, "[~](.*?)[~]", "\\dn2 $1\\up0 ");// DOS Subscript
 | |
| 				return retval;
 | |
| 			}
 | |
| 			catch (Exception ex)
 | |
| 			{
 | |
| 				Console.WriteLine("Error in ConvertDOSSuperAndSubScripts");
 | |
| 			}
 | |
| 			return "";
 | |
| 		}
 | |
| 		private static string FixFortranNumber(Match match)
 | |
| 		{
 | |
| 			StringBuilder sb = new StringBuilder(match.Groups[1].Value);
 | |
| 			if (match.Groups[3].Length == 0) // Nothing to the right of the decimal
 | |
| 				if (match.Groups[2].Value != "1") // Other than "1", multiply it times 10 raised to a power
 | |
| 					sb.Append(match.Groups[2].Value + "x10");
 | |
| 				else // The number is simply 1 so it can be ignored and 10 can be raised to a power
 | |
| 					sb.Append("10");
 | |
| 			else // A number with a decimal point
 | |
| 				sb.Append(match.Groups[2].Value + "." + match.Groups[3].Value + "x10");
 | |
| 			// Add the exponent as superscript
 | |
| 			return sb.ToString() + "\\up2 " + match.Groups[4].Value + "\\up0 ";
 | |
| 		}
 | |
| 		#region CommentedOut
 | |
| 		//private static string ConvertFortranFormatToScienctificNotation(string str)
 | |
| 		//{
 | |
| 		//  string outstr = "";
 | |
| 		//  int orglen = str.Length;
 | |
| 		//  int cnt = 0;
 | |
| 		//  int ptr;
 | |
| 
 | |
| 		//  int nbytes;
 | |
| 		//  int tstr, tstr2, rptr, start = 0;
 | |
| 
 | |
| 		//  while (cnt < orglen)
 | |
| 		//  {
 | |
| 		//    // position up to the the next number, sign, or period
 | |
| 		//    ptr = str.IndexOfAny("+-0123456789.".ToCharArray(), cnt);
 | |
| 		//    if (ptr == -1)
 | |
| 		//    {
 | |
| 		//      outstr += str.Substring(cnt);
 | |
| 		//      break; // jump out of while loop - nothing else to process
 | |
| 		//    }
 | |
| 		//    if ((ptr - cnt) > 0)
 | |
| 		//    {
 | |
| 		//      outstr += str.Substring(cnt, ptr - cnt);
 | |
| 		//      cnt = ptr;
 | |
| 		//    }
 | |
| 
 | |
| 		//    if (cnt > start && str[cnt - 1] == '\'')
 | |
| 		//    {
 | |
| 		//      //B2003-053: only remove the single quote character
 | |
| 		//      // if str ptr is not at the end of the string or
 | |
| 		//      // the next char (after the str ptr) is not a space
 | |
| 		//      // or newline... (as per Paul Linn on 7/17/03)
 | |
| 		//      int len = orglen - cnt;
 | |
| 		//      if (len <= 1 || str[cnt + 1] == ' ' || str[cnt + 1] == '\n')
 | |
| 		//        start = cnt;
 | |
| 		//      else
 | |
| 		//        start = cnt - 1;
 | |
| 		//    }
 | |
| 		//    else start = cnt;
 | |
| 		//    tstr = cnt;
 | |
| 
 | |
| 		//    //Skip preceeding signs
 | |
| 		//    if (str[cnt] == '+' || str[cnt] == '-')
 | |
| 		//      cnt++;
 | |
| 
 | |
| 		//    cnt = NextNonNumber(str, cnt);
 | |
| 		//    if ((cnt < str.Length -1) && str[cnt] == '.')
 | |
| 		//    {
 | |
| 		//      cnt = NextNonNumber(str, cnt + 1);
 | |
| 		//      if (str[start] == '\'')
 | |
| 		//      {
 | |
| 		//        start++;
 | |
| 		//      }
 | |
| 		//      else if ((cnt < str.Length -1) && (str[cnt] == 'E') && (cnt > tstr))
 | |
| 		//      {
 | |
| 		//        nbytes = (cnt - tstr); // don't include the 'E'
 | |
| 		//        outstr += str.Substring(tstr, nbytes);
 | |
| 		//        cnt++;
 | |
| 
 | |
| 		//        rptr = outstr.Length - 1;
 | |
| 		//        while (outstr[rptr] == '0') rptr--;
 | |
| 		//        if (outstr[rptr] != '.') rptr++;
 | |
| 		//        if (rptr < (outstr.Length - 1))
 | |
| 		//          outstr = outstr.Substring(0, rptr + 1); // trim trailing 0's
 | |
| 
 | |
| 		//        int poutstr = 0;
 | |
| 		//        if (outstr[poutstr] == '+' || outstr[poutstr] == '-') poutstr++;
 | |
| 		//        if (!outstr[poutstr].Equals("1"))
 | |
| 		//        {
 | |
| 		//          outstr += "x1";
 | |
| 		//        }
 | |
| 		//        outstr += "0\\super ";
 | |
| 
 | |
| 		//        tstr2 = cnt;
 | |
| 		//        if (str[cnt] == '+' || str[cnt] == '-') cnt++;
 | |
| 		//        cnt = NextNonNumber(str, cnt);
 | |
| 
 | |
| 		//        if (str[cnt] == '.' && char.IsDigit(str, cnt + 1))
 | |
| 		//          cnt = NextNonNumber(str, cnt + 1);
 | |
| 
 | |
| 		//        nbytes = cnt - tstr2; // +1;
 | |
| 		//        outstr += str.Substring(tstr2, nbytes);
 | |
| 		//        outstr += "\\nosupersub ";
 | |
| 
 | |
| 		//        if (!char.IsLetterOrDigit(str, cnt) && !char.IsWhiteSpace(str, cnt))
 | |
| 		//          return (str.Substring(tstr));
 | |
| 		//      }
 | |
| 		//      else if (cnt > 0)
 | |
| 		//      {
 | |
| 		//        outstr += str.Substring(start, cnt - start + ((cnt < str.Length) ? 1 : 0));
 | |
| 		//      }
 | |
| 		//    }
 | |
| 		//    else
 | |
| 		//    {
 | |
| 		//      outstr += str.Substring(start, cnt - start + ((cnt < str.Length)?1:0));
 | |
| 		//      cnt++;
 | |
| 		//    }
 | |
| 		//  }
 | |
| 		//  return (outstr);
 | |
| 		//}
 | |
| 
 | |
| 		//private static int NextNonNumber(string str, int cnt)
 | |
| 		//{
 | |
| 		//  int rtn = 0;
 | |
| 		//  string tstr = str.Substring(cnt);
 | |
| 		//  int len = tstr.Length;
 | |
| 		//  while (rtn < len && char.IsDigit(tstr, rtn)) rtn++;
 | |
| 		//  return rtn + cnt;
 | |
| 		//}
 | |
| 
 | |
| 		//public static string ConvertDOSSuperAndSubScripts(string instr)
 | |
| 		//{
 | |
| 		//  string outstr = "";
 | |
| 		//  string tstr = instr;
 | |
| 		//  int cnt = 0;
 | |
| 		//  int ptr = 0;
 | |
| 		//  bool issupper = false, issub = false;
 | |
| 
 | |
| 		//  while (tstr != null && (ptr = tstr.IndexOfAny("#~".ToCharArray(), cnt)) >= 0)
 | |
| 		//  {
 | |
| 		//    if (ptr > cnt)
 | |
| 		//      outstr += tstr.Substring(cnt, ptr - cnt);
 | |
| 		//    switch (tstr[ptr])
 | |
| 		//    {
 | |
| 		//      case '#':
 | |
| 		//        if (issub || issupper)
 | |
| 		//          outstr += "\\nosupersub ";
 | |
| 		//        else
 | |
| 		//          outstr += "\\super ";
 | |
| 		//        issupper = !issupper;
 | |
| 		//        issub = false;
 | |
| 		//        break;
 | |
| 		//      case '~':
 | |
| 		//        if (issupper || issub)
 | |
| 		//          outstr += "\\nosupersub ";
 | |
| 		//        else
 | |
| 		//          outstr += "\\sub ";
 | |
| 		//        issub = !issub;
 | |
| 		//        issupper = false;
 | |
| 		//        break;
 | |
| 		//    }
 | |
| 		//    cnt = ptr + 1;
 | |
| 		//    if (cnt >= tstr.Length)
 | |
| 		//      tstr = null;
 | |
| 		//    else
 | |
| 		//      tstr = instr.Substring(cnt);
 | |
| 		//    cnt = 0;
 | |
| 		//  }
 | |
| 		//  if (tstr != null)
 | |
| 		//    outstr += tstr;
 | |
| 		//  return outstr;
 | |
| 		//}
 | |
| 		#endregion
 | |
| 		public static string ConvertText(string s1)
 | |
| 		{
 | |
|             string s2 = s1;
 | |
| 						//CompareBeforeAndAfter(s1);
 | |
| 			s2 = ReplaceUnicode(s2);
 | |
|             // now replace underline on/off (AE,AF), super on/off (C6,C7)
 | |
|             // bold on/off (D5, D6), subscript on/off (D1 A6), and
 | |
|             // italics on/off (B2, DD)
 | |
| 			//s2 = s2.Replace("\xAB", "\\ul ");
 | |
| 			//s2 = s2.Replace("\xBB", "\\ulnone ");
 | |
| 			//s2 = s2.Replace("\x255E", "\\super ");
 | |
| 			//s2 = s2.Replace("\x255F", "\\nosupersub ");
 | |
| 			//\xAB -> \\ul   and \xBB -> \\ulnone - look for pairs:
 | |
| 			s2 = Regex.Replace(s2, @"\xAB([^\xBB]*?)\xBB", @"\ul $1\ulnone ");
 | |
| 			// if there is an underline on without underline off or vice versa, just remove it.
 | |
| 			if (s2.Contains("\xAB") || s2.Contains("\xBB")) s2 = s2.Replace("\xAB", "").Replace("\xBB", "");
 | |
| 			s2 = s2.Replace("\x255E", "\\up2 ");
 | |
| 			s2 = s2.Replace("\x255F", "\\up0 ");
 | |
| 			s2 = s2.Replace("\x2552", "\\b ");
 | |
|             s2 = s2.Replace("\x2553", "\\b0 ");
 | |
| 			//s2 = s2.Replace("\x2564", "\\sub ");
 | |
| 			//s2 = s2.Replace("\xAA", "\\nosupersub ");
 | |
| 			s2 = s2.Replace("\x2564", "\\dn2 ");
 | |
| 			s2 = s2.Replace("\xAA", "\\up0 ");
 | |
| 			s2 = s2.Replace("\x2593", "\\i ");
 | |
|             s2 = s2.Replace("\x258C", "\\i0 ");
 | |
|             s2 = s2.Replace("\x2559", "\\ul\\b ");
 | |
|             s2 = s2.Replace("\x2558", "\\b0\\ulnone ");
 | |
| 
 | |
|             // underline next word is 0x17
 | |
|             // superscript next is 0x18
 | |
|             // subscript next is 0x19
 | |
|             // bold next is 0x13
 | |
| 			
 | |
| 			//  \x18([A-Za-z0-9]+)(?:[\x18]|(?= )|\Z|(?=[^A-Za-z0-9]))(.*?)
 | |
| 			s2 = Regex.Replace(s2, @"\x18([A-Za-z0-9\-]+)(?:[\x18]|(?= )|\Z|(?=[^A-Za-z0-9]))(.*?)", @"\up2 $1\up0 $2");
 | |
| 			s2 = Regex.Replace(s2, @"\x19([A-Za-z0-9\-]+)(?:[\x19]|(?= )|\Z|(?=[^A-Za-z0-9]))(.*?)", @"\dn2 $1\up0 $2");
 | |
| 			s2 = Regex.Replace(s2, @"\x13([A-Za-z0-9\-]+)(?:[\x13]|(?= )|\Z|(?=[^A-Za-z0-9]))(.*?)", @"\b $1\b0 $2");
 | |
| 			s2 = s2.Replace("\x11", "");   // this was an 'end' string for the above, 16bit just removed this char
 | |
| 			s2 = s2.Replace("-", @"\u8209?");	// do this here so that super/sub & bold next work.
 | |
| 			// if the underline is at beginning of text, don't replace the underline token with a space:
 | |
| 			s2 = Regex.Replace(s2, @"^\x17(([A-Za-z0-9]|\\u[0-9]+\?)+)", @"\ul $1\ulnone ");
 | |
| 			s2 = Regex.Replace(s2, @"\x17(([A-Za-z0-9]|\\u[0-9]+\?)+)", @" \ul $1\ulnone ");
 | |
| 
 | |
|             s2 = Reg2.Replace(s2, new MatchEvaluator(ReplaceChars));
 | |
| 
 | |
| 			// Now prepend an escape character, '\', to any curly brace.  The curly brace
 | |
| 			// is used in rtf land.
 | |
| 			s2 = s2.Replace(@"{", @"\{");
 | |
| 			s2 = s2.Replace(@"}", @"\}");
 | |
| 			s2 = s2.Replace("\n", @"\par ");			// line break in tables
 | |
| 			s2 = s2.Replace(@"\up0 \up2 ", @"\up2 "); //jsj - 18MAR2010 - rbt.Save() seems to do this automatically
 | |
| 			s2 = s2.Replace(@"\up0 \dn2 ", @"\dn2 "); //jsj - 18MAR2010 - rbt.Save() seems to do this automatically
 | |
| 			return s2;
 | |
| 		}
 | |
| 
 | |
| 		private static void CompareBeforeAndAfter(string txtBefore)
 | |
| 		{
 | |
| 			string txtAfter = ConvertSeq(txtBefore);
 | |
| 			if (txtAfter != txtBefore)
 | |
| 			{
 | |
| 				ListDifference(txtBefore, txtAfter);
 | |
| 			}
 | |
| 		}
 | |
| 		private static Dictionary<int, int> _SpecialChars = new Dictionary<int, int>();
 | |
| 		private static void ListDifference(string txtBefore, string txtAfter)
 | |
| 		{
 | |
| 			int nBefore = txtBefore.Length;
 | |
| 			int nAfter = txtAfter.Length;
 | |
| 			int n = nBefore > nAfter ? nAfter : nBefore;
 | |
| 			for (int i = 0; i < n; i++)
 | |
| 			{
 | |
| 				int chrBefore = (int)(txtBefore[i]);
 | |
| 				int chrAfter = (int)(txtAfter[i]);
 | |
| 				if (chrBefore != chrAfter)
 | |
| 				{
 | |
| 					if (_SpecialChars.ContainsKey(chrBefore))
 | |
| 						_SpecialChars[chrBefore]++;
 | |
| 					else
 | |
| 					{
 | |
| 						_SpecialChars.Add(chrBefore, 1);
 | |
| 						Console.WriteLine("Character Difference 0x{0:X0000} 0x{1:X0000} @ {2}", chrBefore, chrAfter, i);
 | |
| 						//Console.WriteLine("Text Difference\r\nBefore '{0}'\r\nAfter  '{0}'", txtBefore, txtAfter);
 | |
| 						string prefix = i == 0 ? "" : txtBefore.Substring(0, i - 1);
 | |
| 						string suffix = i == txtBefore.Length - 1 ? "" : txtBefore.Substring(i + 1);
 | |
| 						Console.WriteLine("Found in '{0}' 0x{1:X} '{2}'", prefix, chrBefore, suffix);
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 			if (nBefore != nAfter)
 | |
| 			{
 | |
| 				Console.WriteLine("Length Difference\r\nBefore '{0}'\r\nAfter  '{0}'", txtBefore, txtAfter);
 | |
| 			}
 | |
| 		}
 | |
| 		public static void ListSpecialCharacters()
 | |
| 		{
 | |
| 			Console.WriteLine("Special Characters");
 | |
| 			foreach (int chr in _SpecialChars.Keys)
 | |
| 			{
 | |
| 				Console.WriteLine("0x{0:X0000} - {1} occurances", chr, _SpecialChars[chr]);
 | |
| 			}
 | |
| 		}
 | |
| 		public static void ResetSpecialCharacters()
 | |
| 		{
 | |
| 			_SpecialChars = new Dictionary<int, int>();
 | |
| 		}
 | |
| 	}
 | |
| }
 |