New LBDocumentClass Properties:

string Ascii - Ascii of the entire document text - used for searching
bool HasSymbolCharacters - Looks for symbol characters in the text
Method
ReplaceSymbolCharacters - Replaces any symbol characters with standard characters.
This commit is contained in:
Rich 2010-04-08 17:42:14 +00:00
parent c5eb494668
commit 88fcff5852

View File

@ -273,6 +273,154 @@ namespace LBWordLibrary
return retval;
}
}
public string Ascii
{
get
{
LBRange myRange = Range();
myRange = myRange.GoTo(LBWdGoToItem.wdGoToPercent, LBWdGoToDirection.wdGoToLast, 100);
myRange.Start = 0;
return ReplaceSymbolCharacters(GetRangeText(myRange));
}
}
/// <summary>
/// Checks to see if the document contains symbol characters
/// </summary>
/// <returns></returns>
public bool HasSymbolCharacters
{
get
{
LBRange myRange = Range();
myRange = myRange.GoTo(LBWdGoToItem.wdGoToPercent, LBWdGoToDirection.wdGoToLast, 100);
myRange.Start = 0;
string myText = GetRangeText(myRange);
return _RegFindSymbol.IsMatch(myText);
}
}
Regex _RegFindSymbol = new Regex("[\\uF000-\\uF0FF]+");
/// <summary>
/// FixSymbolCharacters - Fix any symbol characters in the document
/// </summary>
public void FixSymbolCharacters()
{
// Set up range object to be used to process text
LBRange myRange = Range();
myRange = myRange.GoTo(LBWdGoToItem.wdGoToPercent, LBWdGoToDirection.wdGoToLast, 100);
int end = myRange.End;
myRange.Start = 0;
string myText = GetRangeText(myRange);
MatchCollection problems = _RegFindSymbol.Matches(myText);
int offset = 0;
foreach (Match problem in problems)
{
myRange.Start = problem.Index + offset;
myRange.End = problem.Index + problem.Length + offset;
int newOffset = FindRangeOffset(myRange, problem, offset, end);
ReplaceSymbolCharacters(myRange);
offset = newOffset;
}
}
/// <summary>
/// Get the Range Text with error handling. myRange.Text sometimes will get a null reference exception.
/// </summary>
/// <param name="myRange"></param>
/// <returns></returns>
internal static string GetRangeText(LBRange myRange)
{
string text="";
try
{
text = myRange.Text;
}
catch (Exception ex)
{
Console.WriteLine("{0} - {1}", ex.GetType().Name, ex.Message);
}
return text;
}
/// <summary>
/// Looks for the problem string and adjusts the range as necessary
/// </summary>
/// <param name="myRange"></param>
/// <param name="problem"></param>
/// <param name="offset"></param>
/// <param name="end"></param>
/// <returns></returns>
private int FindRangeOffset(LBRange myRange, Match problem, int offset, int end)
{
// try to find the string
string text = GetRangeText(myRange);
if (text != problem.Value)
{
// Get the entire text starting at the offset of the first match
myRange.Start = problem.Index + offset;
myRange.End = end;
text = GetRangeText(myRange);
while (!text.StartsWith(problem.Value))
{
int newStart = text.IndexOf(problem.Value);// Find the string if it is not at the beginning
myRange.Start += myRange.Start == newStart ? newStart + 1 : newStart; // adjust the starting location
text = GetRangeText(myRange);// get the text to check
}
myRange.End = myRange.Start + problem.Length; // assume that the end should be the start plus the length
text = GetRangeText(myRange);
while (text.Length < problem.Length) // If the result is too short increase the length
{
myRange.End += (problem.Length - text.Length);
text = GetRangeText(myRange);
}
}
return myRange.Start - problem.Index;
}
/// <summary>
/// ReplaceSymbolCharacters Replaces any symbol characters in the specified range
/// </summary>
/// <param name="myRange"></param>
private static void ReplaceSymbolCharacters(LBRange myRange)
{
try
{
string before = GetRangeText(myRange);
string updated = ReplaceSymbolCharacters(before);
myRange.Text = updated;
string after = GetRangeText(myRange);
if (after != updated) // If the Word text doesn't match try including a character before and after and do it again.
{
Console.WriteLine("'TryEntireRange Failed',{0},{1},'{2}','{3}','{4}'", myRange.Start, myRange.End, before, updated, after);
int end = myRange.End;
myRange.Start = myRange.Start - 1;
myRange.End = end + 1;
myRange.Text = ReplaceSymbolCharacters(GetRangeText(myRange));
Console.WriteLine("'TryEntireRange Failed',{0},{1},'{2}'", myRange.Start, myRange.End, GetRangeText(myRange));
}
}
catch (Exception ex)
{
Console.WriteLine("'TryEntireRange Exception',{0},{1},'{2}'", myRange.Start, myRange.End, ex.Message);
}
}
/// <summary>
/// ReplaceSymbolCharacters processes the string returned and changes any symbols (0xF0??) to normal characters
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
private static string ReplaceSymbolCharacters(string str)
{
StringBuilder results = new StringBuilder();
foreach (char c in str)
{
if ((c & 0xFF00) == 0xF000)
results.Append((char)(c & 0xFF));
else
results.Append((char)(c));
}
return results.ToString();
}
/// <summary>
/// Close the document
/// </summary>
/// <param name="SaveChanges">Save Changes</param>
public void Close(bool SaveChanges)
{
InvokeMethod("Close", SaveChanges, Missing.Value, Missing.Value);
@ -334,9 +482,10 @@ namespace LBWordLibrary
{
myRange.Start = start;
myRange.End = start + 1;
if (Regex.IsMatch(myRange.Text, "[A-Z]")) return true;
if (Regex.IsMatch(myRange.Text, "[a-z]")) return false;
start = start - 1;
string previous = LBDocumentClass.GetRangeText(myRange);
if (Regex.IsMatch(previous, "[A-Z]")) return true;
if (Regex.IsMatch(previous, "[a-z]")) return false;
start = start - 1;
}
return false;
}