Initial Commit

This commit is contained in:
2023-06-21 12:46:23 -04:00
commit c70248a520
1352 changed files with 336780 additions and 0 deletions

View File

@@ -0,0 +1,119 @@
using System;
using System.Collections;
/*
* $Id: ByteVector.cs,v 1.2 2005/06/18 08:17:05 psoares33 Exp $
* Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
* For details on use and redistribution please refer to the
* LICENSE file included with these sources.
*/
namespace iTextSharp.text.pdf.hyphenation {
/**
* This class implements a simple byte vector with access to the
* underlying array.
*
* @author Carlos Villegas <cav@uniscope.co.jp>
*/
public class ByteVector {
/**
* Capacity increment size
*/
private static int DEFAULT_BLOCK_SIZE = 2048;
private int BLOCK_SIZE;
/**
* The encapsulated array
*/
private byte[] arr;
/**
* Points to next free item
*/
private int n;
public ByteVector() : this(DEFAULT_BLOCK_SIZE) {}
public ByteVector(int capacity) {
if (capacity > 0)
BLOCK_SIZE = capacity;
else
BLOCK_SIZE = DEFAULT_BLOCK_SIZE;
arr = new byte[BLOCK_SIZE];
n = 0;
}
public ByteVector(byte[] a) {
BLOCK_SIZE = DEFAULT_BLOCK_SIZE;
arr = a;
n = 0;
}
public ByteVector(byte[] a, int capacity) {
if (capacity > 0)
BLOCK_SIZE = capacity;
else
BLOCK_SIZE = DEFAULT_BLOCK_SIZE;
arr = a;
n = 0;
}
public byte[] Arr {
get {
return arr;
}
}
/**
* return number of items in array
*/
public int Length {
get {
return n;
}
}
/**
* returns current capacity of array
*/
public int Capacity {
get {
return arr.Length;
}
}
public byte this[int index] {
get {
return arr[index];
}
set {
arr[index] = value;
}
}
/**
* This is to implement memory allocation in the array. Like Malloc().
*/
public int Alloc(int size) {
int index = n;
int len = arr.Length;
if (n + size >= len) {
byte[] aux = new byte[len + BLOCK_SIZE];
Array.Copy(arr, 0, aux, 0, len);
arr = aux;
}
n += size;
return index;
}
public void TrimToSize() {
if (n < arr.Length) {
byte[] aux = new byte[n];
Array.Copy(arr, 0, aux, 0, n);
arr = aux;
}
}
}
}

View File

@@ -0,0 +1,128 @@
using System;
/*
* $Id: CharVector.cs,v 1.2 2005/06/18 08:17:05 psoares33 Exp $
* Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
* For details on use and redistribution please refer to the
* LICENSE file included with these sources.
*/
namespace iTextSharp.text.pdf.hyphenation {
/**
* This class implements a simple char vector with access to the
* underlying array.
*
* @author Carlos Villegas <cav@uniscope.co.jp>
*/
public class CharVector : ICloneable {
/**
* Capacity increment size
*/
private static int DEFAULT_BLOCK_SIZE = 2048;
private int BLOCK_SIZE;
/**
* The encapsulated array
*/
private char[] array;
/**
* Points to next free item
*/
private int n;
public CharVector() : this(DEFAULT_BLOCK_SIZE) {}
public CharVector(int capacity) {
if (capacity > 0)
BLOCK_SIZE = capacity;
else
BLOCK_SIZE = DEFAULT_BLOCK_SIZE;
array = new char[BLOCK_SIZE];
n = 0;
}
public CharVector(char[] a) {
BLOCK_SIZE = DEFAULT_BLOCK_SIZE;
array = a;
n = a.Length;
}
public CharVector(char[] a, int capacity) {
if (capacity > 0)
BLOCK_SIZE = capacity;
else
BLOCK_SIZE = DEFAULT_BLOCK_SIZE;
array = a;
n = a.Length;
}
/**
* Reset Vector but don't resize or clear elements
*/
public void Clear() {
n = 0;
}
public Object Clone() {
CharVector cv = new CharVector((char[])array.Clone(), BLOCK_SIZE);
cv.n = this.n;
return cv;
}
public char[] Arr {
get {
return array;
}
}
/**
* return number of items in array
*/
public int Length {
get {
return n;
}
}
/**
* returns current capacity of array
*/
public int Capacity {
get {
return array.Length;
}
}
public char this[int index] {
get {
return array[index];
}
set {
array[index] = value;
}
}
public int Alloc(int size) {
int index = n;
int len = array.Length;
if (n + size >= len) {
char[] aux = new char[len + BLOCK_SIZE];
Array.Copy(array, 0, aux, 0, len);
array = aux;
}
n += size;
return index;
}
public void TrimToSize() {
if (n < array.Length) {
char[] aux = new char[n];
Array.Copy(array, 0, aux, 0, n);
array = aux;
}
}
}
}

View File

@@ -0,0 +1,67 @@
using System;
using System.Text;
/*
* Copyright 1999-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
namespace iTextSharp.text.pdf.hyphenation {
/**
* This class represents a hyphen. A 'full' hyphen is made of 3 parts:
* the pre-break text, post-break text and no-break. If no line-break
* is generated at this position, the no-break text is used, otherwise,
* pre-break and post-break are used. Typically, pre-break is equal to
* the hyphen character and the others are empty. However, this general
* scheme allows support for cases in some languages where words change
* spelling if they're split across lines, like german's 'backen' which
* hyphenates 'bak-ken'. BTW, this comes from TeX.
*
* @author Carlos Villegas <cav@uniscope.co.jp>
*/
public class Hyphen {
public String preBreak;
public String noBreak;
public String postBreak;
internal Hyphen(String pre, String no, String post) {
preBreak = pre;
noBreak = no;
postBreak = post;
}
internal Hyphen(String pre) {
preBreak = pre;
noBreak = null;
postBreak = null;
}
public override String ToString() {
if (noBreak == null
&& postBreak == null
&& preBreak != null
&& preBreak.Equals("-")) {
return "-";
}
StringBuilder res = new StringBuilder("{");
res.Append(preBreak);
res.Append("}{");
res.Append(postBreak);
res.Append("}{");
res.Append(noBreak);
res.Append('}');
return res.ToString();
}
}
}

View File

@@ -0,0 +1,79 @@
using System;
using System.Text;
/*
* $Id: Hyphenation.cs,v 1.2 2005/06/18 08:17:05 psoares33 Exp $
* Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
* For details on use and redistribution please refer to the
* LICENSE file included with these sources.
*/
namespace iTextSharp.text.pdf.hyphenation {
/**
* This class represents a hyphenated word.
*
* @author Carlos Villegas <cav@uniscope.co.jp>
*/
public class Hyphenation {
int[] hyphenPoints;
string word;
/**
* number of hyphenation points in word
*/
int len;
/**
* rawWord as made of alternating strings and {@link Hyphen Hyphen}
* instances
*/
internal Hyphenation(string word, int[] points) {
this.word = word;
hyphenPoints = points;
len = points.Length;
}
/**
* @return the number of hyphenation points in the word
*/
public int Length {
get {
return len;
}
}
/**
* @return the pre-break text, not including the hyphen character
*/
public string GetPreHyphenText(int index) {
return word.Substring(0, hyphenPoints[index]);
}
/**
* @return the post-break text
*/
public string GetPostHyphenText(int index) {
return word.Substring(hyphenPoints[index]);
}
/**
* @return the hyphenation points
*/
public int[] HyphenationPoints {
get {
return hyphenPoints;
}
}
public override string ToString() {
StringBuilder str = new StringBuilder();
int start = 0;
for (int i = 0; i < len; i++) {
str.Append(word.Substring(start, hyphenPoints[i]) + "-");
start = hyphenPoints[i];
}
str.Append(word.Substring(start));
return str.ToString();
}
}
}

View File

@@ -0,0 +1,27 @@
using System;
/*
* Copyright 1999-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
namespace iTextSharp.text.pdf.hyphenation {
/**
* @author Carlos Villegas <cav@uniscope.co.jp>
*/
public class HyphenationException : Exception {
public HyphenationException(String msg) : base(msg) {
}
}
}

View File

@@ -0,0 +1,451 @@
using System;
using System.IO;
using System.Text;
using System.Collections;
/*
* Copyright 1999-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* $Id: HyphenationTree.cs,v 1.2 2005/06/18 08:05:23 psoares33 Exp $ */
namespace iTextSharp.text.pdf.hyphenation {
/**
* This tree structure stores the hyphenation patterns in an efficient
* way for fast lookup. It provides the provides the method to
* hyphenate a word.
*
* @author Carlos Villegas <cav@uniscope.co.jp>
*/
public class HyphenationTree : TernaryTree, IPatternConsumer {
/**
* value space: stores the inteletter values
*/
protected ByteVector vspace;
/**
* This map stores hyphenation exceptions
*/
protected Hashtable stoplist;
/**
* This map stores the character classes
*/
protected TernaryTree classmap;
/**
* Temporary map to store interletter values on pattern loading.
*/
private TernaryTree ivalues;
public HyphenationTree() {
stoplist = new Hashtable(23); // usually a small table
classmap = new TernaryTree();
vspace = new ByteVector();
vspace.Alloc(1); // this reserves index 0, which we don't use
}
/**
* Packs the values by storing them in 4 bits, two values into a byte
* Values range is from 0 to 9. We use zero as terminator,
* so we'll add 1 to the value.
* @param values a string of digits from '0' to '9' representing the
* interletter values.
* @return the index into the vspace array where the packed values
* are stored.
*/
protected int PackValues(String values) {
int i, n = values.Length;
int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
int offset = vspace.Alloc(m);
byte[] va = vspace.Arr;
for (i = 0; i < n; i++) {
int j = i >> 1;
byte v = (byte)((values[i] - '0' + 1) & 0x0f);
if ((i & 1) == 1) {
va[j + offset] = (byte)(va[j + offset] | v);
} else {
va[j + offset] = (byte)(v << 4); // big endian
}
}
va[m - 1 + offset] = 0; // terminator
return offset;
}
protected String UnpackValues(int k) {
StringBuilder buf = new StringBuilder();
byte v = vspace[k++];
while (v != 0) {
char c = (char)((v >> 4) - 1 + '0');
buf.Append(c);
c = (char)(v & 0x0f);
if (c == 0) {
break;
}
c = (char)(c - 1 + '0');
buf.Append(c);
v = vspace[k++];
}
return buf.ToString();
}
public void LoadSimplePatterns(Stream stream) {
SimplePatternParser pp = new SimplePatternParser();
ivalues = new TernaryTree();
pp.Parse(stream, this);
// patterns/values should be now in the tree
// let's optimize a bit
TrimToSize();
vspace.TrimToSize();
classmap.TrimToSize();
// get rid of the auxiliary map
ivalues = null;
}
public String FindPattern(String pat) {
int k = base.Find(pat);
if (k >= 0) {
return UnpackValues(k);
}
return "";
}
/**
* String compare, returns 0 if equal or
* t is a substring of s
*/
protected int Hstrcmp(char[] s, int si, char[] t, int ti) {
for (; s[si] == t[ti]; si++, ti++) {
if (s[si] == 0) {
return 0;
}
}
if (t[ti] == 0) {
return 0;
}
return s[si] - t[ti];
}
protected byte[] GetValues(int k) {
StringBuilder buf = new StringBuilder();
byte v = vspace[k++];
while (v != 0) {
char c = (char)((v >> 4) - 1);
buf.Append(c);
c = (char)(v & 0x0f);
if (c == 0) {
break;
}
c = (char)(c - 1);
buf.Append(c);
v = vspace[k++];
}
byte[] res = new byte[buf.Length];
for (int i = 0; i < res.Length; i++) {
res[i] = (byte)buf[i];
}
return res;
}
/**
* <p>Search for all possible partial matches of word starting
* at index an update interletter values. In other words, it
* does something like:</p>
* <code>
* for (i=0; i<patterns.length; i++) {
* if ( word.Substring(index).StartsWidth(patterns[i]) )
* Update_interletter_values(patterns[i]);
* }
* </code>
* <p>But it is done in an efficient way since the patterns are
* stored in a ternary tree. In fact, this is the whole purpose
* of having the tree: doing this search without having to test
* every single pattern. The number of patterns for languages
* such as English range from 4000 to 10000. Thus, doing thousands
* of string comparisons for each word to hyphenate would be
* really slow without the tree. The tradeoff is memory, but
* using a ternary tree instead of a trie, almost halves the
* the memory used by Lout or TeX. It's also faster than using
* a hash table</p>
* @param word null terminated word to match
* @param index start index from word
* @param il interletter values array to update
*/
protected void SearchPatterns(char[] word, int index, byte[] il) {
byte[] values;
int i = index;
char p, q;
char sp = word[i];
p = root;
while (p > 0 && p < sc.Length) {
if (sc[p] == 0xFFFF) {
if (Hstrcmp(word, i, kv.Arr, lo[p]) == 0) {
values = GetValues(eq[p]); // data pointer is in eq[]
int j = index;
for (int k = 0; k < values.Length; k++) {
if (j < il.Length && values[k] > il[j]) {
il[j] = values[k];
}
j++;
}
}
return;
}
int d = sp - sc[p];
if (d == 0) {
if (sp == 0) {
break;
}
sp = word[++i];
p = eq[p];
q = p;
// look for a pattern ending at this position by searching for
// the null char ( splitchar == 0 )
while (q > 0 && q < sc.Length) {
if (sc[q] == 0xFFFF) { // stop at compressed branch
break;
}
if (sc[q] == 0) {
values = GetValues(eq[q]);
int j = index;
for (int k = 0; k < values.Length; k++) {
if (j < il.Length && values[k] > il[j]) {
il[j] = values[k];
}
j++;
}
break;
} else {
q = lo[q];
/**
* actually the code should be:
* q = sc[q] < 0 ? hi[q] : lo[q];
* but java chars are unsigned
*/
}
}
} else {
p = d < 0 ? lo[p] : hi[p];
}
}
}
/**
* Hyphenate word and return a Hyphenation object.
* @param word the word to be hyphenated
* @param remainCharCount Minimum number of characters allowed
* before the hyphenation point.
* @param pushCharCount Minimum number of characters allowed after
* the hyphenation point.
* @return a {@link Hyphenation Hyphenation} object representing
* the hyphenated word or null if word is not hyphenated.
*/
public Hyphenation Hyphenate(String word, int remainCharCount,
int pushCharCount) {
char[] w = word.ToCharArray();
return Hyphenate(w, 0, w.Length, remainCharCount, pushCharCount);
}
/**
* w = "****nnllllllnnn*****",
* where n is a non-letter, l is a letter,
* all n may be absent, the first n is at offset,
* the first l is at offset + iIgnoreAtBeginning;
* word = ".llllll.'\0'***",
* where all l in w are copied into word.
* In the first part of the routine len = w.length,
* in the second part of the routine len = word.length.
* Three indices are used:
* Index(w), the index in w,
* Index(word), the index in word,
* Letterindex(word), the index in the letter part of word.
* The following relations exist:
* Index(w) = offset + i - 1
* Index(word) = i - iIgnoreAtBeginning
* Letterindex(word) = Index(word) - 1
* (see first loop).
* It follows that:
* Index(w) - Index(word) = offset - 1 + iIgnoreAtBeginning
* Index(w) = Letterindex(word) + offset + iIgnoreAtBeginning
*/
/**
* Hyphenate word and return an array of hyphenation points.
* @param w char array that contains the word
* @param offset Offset to first character in word
* @param len Length of word
* @param remainCharCount Minimum number of characters allowed
* before the hyphenation point.
* @param pushCharCount Minimum number of characters allowed after
* the hyphenation point.
* @return a {@link Hyphenation Hyphenation} object representing
* the hyphenated word or null if word is not hyphenated.
*/
public Hyphenation Hyphenate(char[] w, int offset, int len,
int remainCharCount, int pushCharCount) {
int i;
char[] word = new char[len + 3];
// normalize word
char[] c = new char[2];
int iIgnoreAtBeginning = 0;
int iLength = len;
bool bEndOfLetters = false;
for (i = 1; i <= len; i++) {
c[0] = w[offset + i - 1];
int nc = classmap.Find(c, 0);
if (nc < 0) { // found a non-letter character ...
if (i == (1 + iIgnoreAtBeginning)) {
// ... before any letter character
iIgnoreAtBeginning ++;
} else {
// ... after a letter character
bEndOfLetters = true;
}
iLength --;
} else {
if (!bEndOfLetters) {
word[i - iIgnoreAtBeginning] = (char)nc;
} else {
return null;
}
}
}
len = iLength;
if (len < (remainCharCount + pushCharCount)) {
// word is too short to be hyphenated
return null;
}
int[] result = new int[len + 1];
int k = 0;
// check exception list first
String sw = new String(word, 1, len);
if (stoplist.ContainsKey(sw)) {
// assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no = null)
ArrayList hw = (ArrayList)stoplist[sw];
int j = 0;
for (i = 0; i < hw.Count; i++) {
Object o = hw[i];
// j = Index(sw) = Letterindex(word)?
// result[k] = corresponding Index(w)
if (o is String) {
j += ((String)o).Length;
if (j >= remainCharCount && j < (len - pushCharCount)) {
result[k++] = j + iIgnoreAtBeginning;
}
}
}
} else {
// use algorithm to get hyphenation points
word[0] = '.'; // word start marker
word[len + 1] = '.'; // word end marker
word[len + 2] = (char)0; // null terminated
byte[] il = new byte[len + 3]; // initialized to zero
for (i = 0; i < len + 1; i++) {
SearchPatterns(word, i, il);
}
// hyphenation points are located where interletter value is odd
// i is Letterindex(word),
// i + 1 is Index(word),
// result[k] = corresponding Index(w)
for (i = 0; i < len; i++) {
if (((il[i + 1] & 1) == 1) && i >= remainCharCount
&& i <= (len - pushCharCount)) {
result[k++] = i + iIgnoreAtBeginning;
}
}
}
if (k > 0) {
// trim result array
int[] res = new int[k];
Array.Copy(result, 0, res, 0, k);
return new Hyphenation(new String(w, offset, len), res);
} else {
return null;
}
}
/**
* Add a character class to the tree. It is used by
* {@link SimplePatternParser SimplePatternParser} as callback to
* add character classes. Character classes define the
* valid word characters for hyphenation. If a word contains
* a character not defined in any of the classes, it is not hyphenated.
* It also defines a way to normalize the characters in order
* to compare them with the stored patterns. Usually pattern
* files use only lower case characters, in this case a class
* for letter 'a', for example, should be defined as "aA", the first
* character being the normalization char.
*/
public void AddClass(String chargroup) {
if (chargroup.Length > 0) {
char equivChar = chargroup[0];
char[] key = new char[2];
key[1] = (char)0;
for (int i = 0; i < chargroup.Length; i++) {
key[0] = chargroup[i];
classmap.Insert(key, 0, equivChar);
}
}
}
/**
* Add an exception to the tree. It is used by
* {@link SimplePatternParser SimplePatternParser} class as callback to
* store the hyphenation exceptions.
* @param word normalized word
* @param hyphenatedword a vector of alternating strings and
* {@link Hyphen hyphen} objects.
*/
public void AddException(String word, ArrayList hyphenatedword) {
stoplist[word] = hyphenatedword;
}
/**
* Add a pattern to the tree. Mainly, to be used by
* {@link SimplePatternParser SimplePatternParser} class as callback to
* add a pattern to the tree.
* @param pattern the hyphenation pattern
* @param ivalue interletter weight values indicating the
* desirability and priority of hyphenating at a given point
* within the pattern. It should contain only digit characters.
* (i.e. '0' to '9').
*/
public void AddPattern(String pattern, String ivalue) {
int k = ivalues.Find(ivalue);
if (k <= 0) {
k = PackValues(ivalue);
ivalues.Insert(ivalue, (char)k);
}
Insert(pattern, (char)k);
}
public override void PrintStats() {
Console.WriteLine("Value space size = " + vspace.Length);
base.PrintStats();
}
}
}

View File

@@ -0,0 +1,220 @@
using System;
using System.IO;
using System.Text;
using System.Collections;
using System.util;
using iTextSharp.text.pdf;
/*
* Copyright 1999-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
namespace iTextSharp.text.pdf.hyphenation {
/**
* This class is the main entry point to the hyphenation package.
* You can use only the static methods or create an instance.
*
* @author Carlos Villegas <cav@uniscope.co.jp>
*/
public class Hyphenator {
/** TODO: Don't use statics */
private static Hashtable hyphenTrees = Hashtable.Synchronized(new Hashtable());
private HyphenationTree hyphenTree = null;
private int remainCharCount = 2;
private int pushCharCount = 2;
private const String defaultHyphLocation = "iTextSharp.text.pdf.hyphenation.hyph.";
/**
* @param lang
* @param country
* @param leftMin
* @param rightMin
*/
public Hyphenator(String lang, String country, int leftMin,
int rightMin) {
hyphenTree = GetHyphenationTree(lang, country);
remainCharCount = leftMin;
pushCharCount = rightMin;
}
/**
* @param lang
* @param country
* @return the hyphenation tree
*/
public static HyphenationTree GetHyphenationTree(String lang,
String country) {
String key = lang;
// check whether the country code has been used
if (country != null && !country.Equals("none")) {
key += "_" + country;
}
// first try to find it in the cache
if (hyphenTrees.ContainsKey(key)) {
return (HyphenationTree)hyphenTrees[key];
}
if (hyphenTrees.ContainsKey(lang)) {
return (HyphenationTree)hyphenTrees[lang];
}
HyphenationTree hTree = GetResourceHyphenationTree(key);
//if (hTree == null)
// hTree = GetFileHyphenationTree(key);
// put it into the pattern cache
if (hTree != null) {
hyphenTrees[key] = hTree;
}
return hTree;
}
/**
* @param key
* @return a hyphenation tree
*/
public static HyphenationTree GetResourceHyphenationTree(String key) {
try {
Stream stream = BaseFont.GetResourceStream(defaultHyphLocation + key + ".xml");
if (stream == null && key.Length > 2)
stream = BaseFont.GetResourceStream(defaultHyphLocation + key.Substring(0, 2) + ".xml");
if (stream == null)
return null;
HyphenationTree hTree = new HyphenationTree();
hTree.LoadSimplePatterns(stream);
return hTree;
}
catch {
return null;
}
}
/**
* @param key
* @return a hyphenation tree
*/
/* public static HyphenationTree GetFileHyphenationTree(String key) {
try {
if (hyphenDir == null)
return null;
Stream stream = null;
string hyphenFile = Path.Combine(hyphenDir, key + ".xml");
if (File.Exists(hyphenFile))
stream = new FileStream(hyphenFile, FileMode.Open, FileAccess.Read, FileShare.Read);
if (stream == null && key.Length > 2) {
hyphenFile = Path.Combine(hyphenDir, key.Substring(0, 2) + ".xml");
if (File.Exists(hyphenFile))
stream = new FileStream(hyphenFile, FileMode.Open, FileAccess.Read, FileShare.Read);
}
if (stream == null)
return null;
HyphenationTree hTree = new HyphenationTree();
hTree.LoadSimplePatterns(stream);
return hTree;
}
catch (Exception e) {
return null;
}
}*/
/**
* @param lang
* @param country
* @param word
* @param leftMin
* @param rightMin
* @return a hyphenation object
*/
public static Hyphenation Hyphenate(String lang, String country,
String word, int leftMin,
int rightMin) {
HyphenationTree hTree = GetHyphenationTree(lang, country);
if (hTree == null) {
//log.Error("Error building hyphenation tree for language "
// + lang);
return null;
}
return hTree.Hyphenate(word, leftMin, rightMin);
}
/**
* @param lang
* @param country
* @param word
* @param offset
* @param len
* @param leftMin
* @param rightMin
* @return a hyphenation object
*/
public static Hyphenation Hyphenate(String lang, String country,
char[] word, int offset, int len,
int leftMin, int rightMin) {
HyphenationTree hTree = GetHyphenationTree(lang, country);
if (hTree == null) {
//log.Error("Error building hyphenation tree for language "
// + lang);
return null;
}
return hTree.Hyphenate(word, offset, len, leftMin, rightMin);
}
/**
* @param min
*/
public void SetMinRemainCharCount(int min) {
remainCharCount = min;
}
/**
* @param min
*/
public void SetMinPushCharCount(int min) {
pushCharCount = min;
}
/**
* @param lang
* @param country
*/
public void SetLanguage(String lang, String country) {
hyphenTree = GetHyphenationTree(lang, country);
}
/**
* @param word
* @param offset
* @param len
* @return a hyphenation object
*/
public Hyphenation Hyphenate(char[] word, int offset, int len) {
if (hyphenTree == null) {
return null;
}
return hyphenTree.Hyphenate(word, offset, len, remainCharCount,
pushCharCount);
}
/**
* @param word
* @return a hyphenation object
*/
public Hyphenation Hyphenate(String word) {
if (hyphenTree == null) {
return null;
}
return hyphenTree.Hyphenate(word, remainCharCount, pushCharCount);
}
}
}

View File

@@ -0,0 +1,54 @@
using System;
using System.Collections;
/*
* Copyright 1999-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
namespace iTextSharp.text.pdf.hyphenation {
/**
* This interface is used to connect the XML pattern file parser to
* the hyphenation tree.
*
* @author Carlos Villegas <cav@uniscope.co.jp>
*/
public interface IPatternConsumer {
/**
* Add a character class.
* A character class defines characters that are considered
* equivalent for the purpose of hyphenation (e.g. "aA"). It
* usually means to ignore case.
* @param chargroup character group
*/
void AddClass(String chargroup);
/**
* Add a hyphenation exception. An exception replaces the
* result obtained by the algorithm for cases for which this
* fails or the user wants to provide his own hyphenation.
* A hyphenatedword is a vector of alternating String's and
* {@link Hyphen Hyphen} instances
*/
void AddException(String word, ArrayList hyphenatedword);
/**
* Add hyphenation patterns.
* @param pattern the pattern
* @param values interletter values expressed as a string of
* digit characters.
*/
void AddPattern(String pattern, String values);
}
}

View File

@@ -0,0 +1,247 @@
using System;
using System.IO;
using System.Text;
using System.Collections;
using System.util;
using iTextSharp.text.xml.simpleparser;
/*
* Copyright 2005 by Paulo Soares.
*
* The contents of this file are subject to the Mozilla Public License Version 1.1
* (the "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the License.
*
* The Original Code is 'iText, a free JAVA-PDF library'.
*
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
* All Rights Reserved.
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
*
* Contributor(s): all the names of the contributors are added in the source code
* where applicable.
*
* Alternatively, the contents of this file may be used under the terms of the
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
* provisions of LGPL are applicable instead of those above. If you wish to
* allow use of your version of this file only under the terms of the LGPL
* License and not to allow others to use your version of this file under
* the MPL, indicate your decision by deleting the provisions above and
* replace them with the notice and other provisions required by the LGPL.
* If you do not delete the provisions above, a recipient may use your version
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the MPL as stated above or under the terms of the GNU
* Library General Public License as published by the Free Software Foundation;
* either version 2 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
* details.
*
* If you didn't download this code from the following link, you should check if
* you aren't using an obsolete version:
* http://www.lowagie.com/iText/
*/
namespace iTextSharp.text.pdf.hyphenation {
/** Parses the xml hyphenation pattern.
*
* @author Paulo Soares (psoares@consiste.pt)
*/
public class SimplePatternParser : ISimpleXMLDocHandler {
internal int currElement;
internal IPatternConsumer consumer;
internal StringBuilder token;
internal ArrayList exception;
internal char hyphenChar;
internal const int ELEM_CLASSES = 1;
internal const int ELEM_EXCEPTIONS = 2;
internal const int ELEM_PATTERNS = 3;
internal const int ELEM_HYPHEN = 4;
/** Creates a new instance of PatternParser2 */
public SimplePatternParser() {
token = new StringBuilder();
hyphenChar = '-'; // default
}
public void Parse(Stream stream, IPatternConsumer consumer) {
this.consumer = consumer;
try {
SimpleXMLParser.Parse(this, stream);
}
finally {
try{stream.Close();}catch{}
}
}
protected static String GetPattern(String word) {
StringBuilder pat = new StringBuilder();
int len = word.Length;
for (int i = 0; i < len; i++) {
if (!char.IsDigit(word[i])) {
pat.Append(word[i]);
}
}
return pat.ToString();
}
protected ArrayList NormalizeException(ArrayList ex) {
ArrayList res = new ArrayList();
for (int i = 0; i < ex.Count; i++) {
Object item = ex[i];
if (item is String) {
String str = (String)item;
StringBuilder buf = new StringBuilder();
for (int j = 0; j < str.Length; j++) {
char c = str[j];
if (c != hyphenChar) {
buf.Append(c);
} else {
res.Add(buf.ToString());
buf.Length = 0;
char[] h = new char[1];
h[0] = hyphenChar;
// we use here hyphenChar which is not necessarily
// the one to be printed
res.Add(new Hyphen(new String(h), null, null));
}
}
if (buf.Length > 0) {
res.Add(buf.ToString());
}
} else {
res.Add(item);
}
}
return res;
}
protected String GetExceptionWord(ArrayList ex) {
StringBuilder res = new StringBuilder();
for (int i = 0; i < ex.Count; i++) {
Object item = ex[i];
if (item is String) {
res.Append((String)item);
} else {
if (((Hyphen)item).noBreak != null) {
res.Append(((Hyphen)item).noBreak);
}
}
}
return res.ToString();
}
protected static String GetInterletterValues(String pat) {
StringBuilder il = new StringBuilder();
String word = pat + "a"; // add dummy letter to serve as sentinel
int len = word.Length;
for (int i = 0; i < len; i++) {
char c = word[i];
if (char.IsDigit(c)) {
il.Append(c);
i++;
} else {
il.Append('0');
}
}
return il.ToString();
}
public void EndDocument() {
}
public void EndElement(String tag) {
if (token.Length > 0) {
String word = token.ToString();
switch (currElement) {
case ELEM_CLASSES:
consumer.AddClass(word);
break;
case ELEM_EXCEPTIONS:
exception.Add(word);
exception = NormalizeException(exception);
consumer.AddException(GetExceptionWord(exception),
(ArrayList)exception.Clone());
break;
case ELEM_PATTERNS:
consumer.AddPattern(GetPattern(word),
GetInterletterValues(word));
break;
case ELEM_HYPHEN:
// nothing to do
break;
}
if (currElement != ELEM_HYPHEN) {
token.Length = 0;
}
}
if (currElement == ELEM_HYPHEN) {
currElement = ELEM_EXCEPTIONS;
} else {
currElement = 0;
}
}
public void StartDocument() {
}
public void StartElement(String tag, Hashtable h) {
if (tag.Equals("hyphen-char")) {
String hh = (String)h["value"];
if (hh != null && hh.Length == 1) {
hyphenChar = hh[0];
}
} else if (tag.Equals("classes")) {
currElement = ELEM_CLASSES;
} else if (tag.Equals("patterns")) {
currElement = ELEM_PATTERNS;
} else if (tag.Equals("exceptions")) {
currElement = ELEM_EXCEPTIONS;
exception = new ArrayList();
} else if (tag.Equals("hyphen")) {
if (token.Length > 0) {
exception.Add(token.ToString());
}
exception.Add(new Hyphen((String)h["pre"],
(String)h["no"],
(String)h["post"]));
currElement = ELEM_HYPHEN;
}
token.Length = 0;
}
public void Text(String str) {
StringTokenizer tk = new StringTokenizer(str);
while (tk.HasMoreTokens()) {
String word = tk.NextToken();
// System.out.Println("\"" + word + "\"");
switch (currElement) {
case ELEM_CLASSES:
consumer.AddClass(word);
break;
case ELEM_EXCEPTIONS:
exception.Add(word);
exception = NormalizeException(exception);
consumer.AddException(GetExceptionWord(exception),
(ArrayList)exception.Clone());
exception.Clear();
break;
case ELEM_PATTERNS:
consumer.AddPattern(GetPattern(word),
GetInterletterValues(word));
break;
}
}
}
}
}

View File

@@ -0,0 +1,631 @@
using System;
using System.Collections;
using System.Text;
/*
* $Id: TernaryTree.cs,v 1.2 2005/06/18 08:17:05 psoares33 Exp $
* Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
* For details on use and redistribution please refer to the
* LICENSE file included with these sources.
*/
namespace iTextSharp.text.pdf.hyphenation {
/**
* <h2>Ternary Search Tree</h2>
*
* <p>A ternary search tree is a hibrid between a binary tree and
* a digital search tree (trie). Keys are limited to strings.
* A data value of type char is stored in each leaf node.
* It can be used as an index (or pointer) to the data.
* Branches that only contain one key are compressed to one node
* by storing a pointer to the trailer substring of the key.
* This class is intended to serve as base class or helper class
* to implement Dictionary collections or the like. Ternary trees
* have some nice properties as the following: the tree can be
* traversed in sorted order, partial matches (wildcard) can be
* implemented, retrieval of all keys within a given distance
* from the target, etc. The storage requirements are higher than
* a binary tree but a lot less than a trie. Performance is
* comparable with a hash table, sometimes it outperforms a hash
* function (most of the time can determine a miss faster than a hash).</p>
*
* <p>The main purpose of this java port is to serve as a base for
* implementing TeX's hyphenation algorithm (see The TeXBook,
* appendix H). Each language requires from 5000 to 15000 hyphenation
* patterns which will be keys in this tree. The strings patterns
* are usually small (from 2 to 5 characters), but each char in the
* tree is stored in a node. Thus memory usage is the main concern.
* We will sacrify 'elegance' to keep memory requirenments to the
* minimum. Using java's char type as pointer (yes, I know pointer
* it is a forbidden word in java) we can keep the size of the node
* to be just 8 bytes (3 pointers and the data char). This gives
* room for about 65000 nodes. In my tests the english patterns
* took 7694 nodes and the german patterns 10055 nodes,
* so I think we are safe.</p>
*
* <p>All said, this is a map with strings as keys and char as value.
* Pretty limited!. It can be extended to a general map by
* using the string representation of an object and using the
* char value as an index to an array that contains the object
* values.</p>
*
* @author cav@uniscope.co.jp
*/
public class TernaryTree : ICloneable {
/**
* We use 4 arrays to represent a node. I guess I should have created
* a proper node class, but somehow Knuth's pascal code made me forget
* we now have a portable language with memory management and
* automatic garbage collection! And now is kind of late, furthermore,
* if it ain't broken, don't fix it.
*/
/**
* Pointer to low branch and to rest of the key when it is
* stored directly in this node, we don't have unions in java!
*/
protected char[] lo;
/**
* Pointer to high branch.
*/
protected char[] hi;
/**
* Pointer to equal branch and to data when this node is a string terminator.
*/
protected char[] eq;
/**
* <P>The character stored in this node: splitchar
* Two special values are reserved:</P>
* <ul><li>0x0000 as string terminator</li>
* <li>0xFFFF to indicate that the branch starting at
* this node is compressed</li></ul>
* <p>This shouldn't be a problem if we give the usual semantics to
* strings since 0xFFFF is garanteed not to be an Unicode character.</p>
*/
protected char[] sc;
/**
* This vector holds the trailing of the keys when the branch is compressed.
*/
protected CharVector kv;
protected char root;
protected char freenode;
protected int length; // number of items in tree
protected static int BLOCK_SIZE = 2048; // allocation size for arrays
internal TernaryTree() {
Init();
}
protected void Init() {
root = (char)0;
freenode = (char)1;
length = 0;
lo = new char[BLOCK_SIZE];
hi = new char[BLOCK_SIZE];
eq = new char[BLOCK_SIZE];
sc = new char[BLOCK_SIZE];
kv = new CharVector();
}
/**
* Branches are initially compressed, needing
* one node per key plus the size of the string
* key. They are decompressed as needed when
* another key with same prefix
* is inserted. This saves a lot of space,
* specially for long keys.
*/
public void Insert(string key, char val) {
// make sure we have enough room in the arrays
int len = key.Length
+ 1; // maximum number of nodes that may be generated
if (freenode + len > eq.Length)
RedimNodeArrays(eq.Length + BLOCK_SIZE);
char[] strkey = new char[len--];
key.CopyTo(0, strkey, 0, len);
strkey[len] = (char)0;
root = Insert(root, strkey, 0, val);
}
public void Insert(char[] key, int start, char val) {
int len = Strlen(key) + 1;
if (freenode + len > eq.Length)
RedimNodeArrays(eq.Length + BLOCK_SIZE);
root = Insert(root, key, start, val);
}
/**
* The actual insertion function, recursive version.
*/
private char Insert(char p, char[] key, int start, char val) {
int len = Strlen(key, start);
if (p == 0) {
// this means there is no branch, this node will start a new branch.
// Instead of doing that, we store the key somewhere else and create
// only one node with a pointer to the key
p = freenode++;
eq[p] = val; // holds data
length++;
hi[p] = (char)0;
if (len > 0) {
sc[p] = (char)0xFFFF; // indicates branch is compressed
lo[p] = (char)kv.Alloc(len
+ 1); // use 'lo' to hold pointer to key
Strcpy(kv.Arr, lo[p], key, start);
} else {
sc[p] = (char)0;
lo[p] = (char)0;
}
return p;
}
if (sc[p] == 0xFFFF) {
// branch is compressed: need to decompress
// this will generate garbage in the external key array
// but we can do some garbage collection later
char pp = freenode++;
lo[pp] = lo[p]; // previous pointer to key
eq[pp] = eq[p]; // previous pointer to data
lo[p] = (char)0;
if (len > 0) {
sc[p] = kv[lo[pp]];
eq[p] = pp;
lo[pp]++;
if (kv[lo[pp]] == 0) {
// key completly decompressed leaving garbage in key array
lo[pp] = (char)0;
sc[pp] = (char)0;
hi[pp] = (char)0;
} else
sc[pp] =
(char)0xFFFF; // we only got first char of key, rest is still there
} else {
// In this case we can save a node by swapping the new node
// with the compressed node
sc[pp] = (char)0xFFFF;
hi[p] = pp;
sc[p] = (char)0;
eq[p] = val;
length++;
return p;
}
}
char s = key[start];
if (s < sc[p])
lo[p] = Insert(lo[p], key, start, val);
else if (s == sc[p]) {
if (s != 0)
eq[p] = Insert(eq[p], key, start + 1, val);
else {
// key already in tree, overwrite data
eq[p] = val;
}
} else
hi[p] = Insert(hi[p], key, start, val);
return p;
}
/**
* Compares 2 null terminated char arrays
*/
public static int Strcmp(char[] a, int startA, char[] b, int startB) {
for (; a[startA] == b[startB]; startA++, startB++)
if (a[startA] == 0)
return 0;
return a[startA] - b[startB];
}
/**
* Compares a string with null terminated char array
*/
public static int Strcmp(string str, char[] a, int start) {
int i, d, len = str.Length;
for (i = 0; i < len; i++) {
d = (int)str[i] - a[start + i];
if (d != 0)
return d;
if (a[start + i] == 0)
return d;
}
if (a[start + i] != 0)
return (int)-a[start + i];
return 0;
}
public static void Strcpy(char[] dst, int di, char[] src, int si) {
while (src[si] != 0)
dst[di++] = src[si++];
dst[di] = (char)0;
}
public static int Strlen(char[] a, int start) {
int len = 0;
for (int i = start; i < a.Length && a[i] != 0; i++)
len++;
return len;
}
public static int Strlen(char[] a) {
return Strlen(a, 0);
}
public int Find(string key) {
int len = key.Length;
char[] strkey = new char[len + 1];
key.CopyTo(0, strkey, 0, len);
strkey[len] = (char)0;
return Find(strkey, 0);
}
public int Find(char[] key, int start) {
int d;
char p = root;
int i = start;
char c;
while (p != 0) {
if (sc[p] == 0xFFFF) {
if (Strcmp(key, i, kv.Arr, lo[p]) == 0)
return eq[p];
else
return -1;
}
c = key[i];
d = c - sc[p];
if (d == 0) {
if (c == 0)
return eq[p];
i++;
p = eq[p];
} else if (d < 0)
p = lo[p];
else
p = hi[p];
}
return -1;
}
public bool Knows(string key) {
return (Find(key) >= 0);
}
// redimension the arrays
private void RedimNodeArrays(int newsize) {
int len = newsize < lo.Length ? newsize : lo.Length;
char[] na = new char[newsize];
Array.Copy(lo, 0, na, 0, len);
lo = na;
na = new char[newsize];
Array.Copy(hi, 0, na, 0, len);
hi = na;
na = new char[newsize];
Array.Copy(eq, 0, na, 0, len);
eq = na;
na = new char[newsize];
Array.Copy(sc, 0, na, 0, len);
sc = na;
}
public int Size {
get {
return length;
}
}
public Object Clone() {
TernaryTree t = new TernaryTree();
t.lo = (char[])this.lo.Clone();
t.hi = (char[])this.hi.Clone();
t.eq = (char[])this.eq.Clone();
t.sc = (char[])this.sc.Clone();
t.kv = (CharVector)this.kv.Clone();
t.root = this.root;
t.freenode = this.freenode;
t.length = this.length;
return t;
}
/**
* Recursively insert the median first and then the median of the
* lower and upper halves, and so on in order to get a balanced
* tree. The array of keys is assumed to be sorted in ascending
* order.
*/
protected void InsertBalanced(string[] k, char[] v, int offset, int n) {
int m;
if (n < 1)
return;
m = n >> 1;
Insert(k[m + offset], v[m + offset]);
InsertBalanced(k, v, offset, m);
InsertBalanced(k, v, offset + m + 1, n - m - 1);
}
/**
* Balance the tree for best search performance
*/
public void Balance() {
// System.out.Print("Before root splitchar = "); System.out.Println(sc[root]);
int i = 0, n = length;
string[] k = new string[n];
char[] v = new char[n];
Iterator iter = new Iterator(this);
while (iter.HasMoreElements()) {
v[i] = iter.Value;
k[i++] = (string)iter.NextElement();
}
Init();
InsertBalanced(k, v, 0, n);
// With uniform letter distribution sc[root] should be around 'm'
// System.out.Print("After root splitchar = "); System.out.Println(sc[root]);
}
/**
* Each node stores a character (splitchar) which is part of
* some Key(s). In a compressed branch (one that only contain
* a single string key) the trailer of the key which is not
* already in nodes is stored externally in the kv array.
* As items are inserted, key substrings decrease.
* Some substrings may completely disappear when the whole
* branch is totally decompressed.
* The tree is traversed to find the key substrings actually
* used. In addition, duplicate substrings are removed using
* a map (implemented with a TernaryTree!).
*
*/
public void TrimToSize() {
// first balance the tree for best performance
Balance();
// redimension the node arrays
RedimNodeArrays(freenode);
// ok, compact kv array
CharVector kx = new CharVector();
kx.Alloc(1);
TernaryTree map = new TernaryTree();
Compact(kx, map, root);
kv = kx;
kv.TrimToSize();
}
private void Compact(CharVector kx, TernaryTree map, char p) {
int k;
if (p == 0)
return;
if (sc[p] == 0xFFFF) {
k = map.Find(kv.Arr, lo[p]);
if (k < 0) {
k = kx.Alloc(Strlen(kv.Arr, lo[p]) + 1);
Strcpy(kx.Arr, k, kv.Arr, lo[p]);
map.Insert(kx.Arr, k, (char)k);
}
lo[p] = (char)k;
} else {
Compact(kx, map, lo[p]);
if (sc[p] != 0)
Compact(kx, map, eq[p]);
Compact(kx, map, hi[p]);
}
}
public Iterator Keys {
get {
return new Iterator(this);
}
}
public class Iterator {
/**
* current node index
*/
int cur;
/**
* current key
*/
string curkey;
/**
* TernaryTree parent
*/
TernaryTree parent;
private class Item : ICloneable {
internal char parent;
internal char child;
public Item() {
parent = (char)0;
child = (char)0;
}
public Item(char p, char c) {
parent = p;
child = c;
}
public Object Clone() {
return new Item(parent, child);
}
}
/**
* Node stack
*/
Stack ns;
/**
* key stack implemented with a StringBuilder
*/
StringBuilder ks;
public Iterator(TernaryTree parent) {
this.parent = parent;
cur = -1;
ns = new Stack();
ks = new StringBuilder();
Rewind();
}
public void Rewind() {
ns.Clear();
ks.Length = 0;
cur = parent.root;
Run();
}
public Object NextElement() {
string res = curkey;
cur = Up();
Run();
return res;
}
public char Value {
get {
if (cur >= 0)
return this.parent.eq[cur];
return (char)0;
}
}
public bool HasMoreElements() {
return (cur != -1);
}
/**
* traverse upwards
*/
private int Up() {
Item i = new Item();
int res = 0;
if (ns.Count == 0)
return -1;
if (cur != 0 && parent.sc[cur] == 0)
return parent.lo[cur];
bool climb = true;
while (climb) {
i = (Item)ns.Pop();
i.child++;
switch (i.child) {
case (char)1:
if (parent.sc[i.parent] != 0) {
res = parent.eq[i.parent];
ns.Push(i.Clone());
ks.Append(parent.sc[i.parent]);
} else {
i.child++;
ns.Push(i.Clone());
res = parent.hi[i.parent];
}
climb = false;
break;
case (char)2:
res = parent.hi[i.parent];
ns.Push(i.Clone());
if (ks.Length > 0)
ks.Length = ks.Length - 1; // pop
climb = false;
break;
default:
if (ns.Count == 0)
return -1;
climb = true;
break;
}
}
return res;
}
/**
* traverse the tree to find next key
*/
private int Run() {
if (cur == -1)
return -1;
bool leaf = false;
for (; ; ) {
// first go down on low branch until leaf or compressed branch
while (cur != 0) {
if (parent.sc[cur] == 0xFFFF) {
leaf = true;
break;
}
ns.Push(new Item((char)cur, '\u0000'));
if (parent.sc[cur] == 0) {
leaf = true;
break;
}
cur = parent.lo[cur];
}
if (leaf)
break;
// nothing found, go up one node and try again
cur = Up();
if (cur == -1) {
return -1;
}
}
// The current node should be a data node and
// the key should be in the key stack (at least partially)
StringBuilder buf = new StringBuilder(ks.ToString());
if (parent.sc[cur] == 0xFFFF) {
int p = parent.lo[cur];
while (parent.kv[p] != 0)
buf.Append(parent.kv[p++]);
}
curkey = buf.ToString();
return 0;
}
}
public virtual void PrintStats() {
Console.Error.WriteLine("Number of keys = " + length.ToString());
Console.Error.WriteLine("Node count = " + freenode.ToString());
// Console.Error.WriteLine("Array length = " + int.ToString(eq.Length));
Console.Error.WriteLine("Key Array length = "
+ kv.Length.ToString());
/*
* for (int i=0; i<kv.Length; i++)
* if ( kv[i] != 0 )
* System.out.Print(kv[i]);
* else
* System.out.Println("");
* System.out.Println("Keys:");
* for (Enumeration enum = Keys(); enum.HasMoreElements(); )
* System.out.Println(enum.NextElement());
*/
}
}
}