Initial Commit
This commit is contained in:
119
iTechSharp/iTextSharp/text/pdf/hyphenation/ByteVector.cs
Normal file
119
iTechSharp/iTextSharp/text/pdf/hyphenation/ByteVector.cs
Normal file
@@ -0,0 +1,119 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
|
||||
/*
|
||||
* $Id: ByteVector.cs,v 1.2 2005/06/18 08:17:05 psoares33 Exp $
|
||||
* Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
|
||||
* For details on use and redistribution please refer to the
|
||||
* LICENSE file included with these sources.
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.pdf.hyphenation {
|
||||
/**
|
||||
* This class implements a simple byte vector with access to the
|
||||
* underlying array.
|
||||
*
|
||||
* @author Carlos Villegas <cav@uniscope.co.jp>
|
||||
*/
|
||||
public class ByteVector {
|
||||
|
||||
/**
|
||||
* Capacity increment size
|
||||
*/
|
||||
private static int DEFAULT_BLOCK_SIZE = 2048;
|
||||
private int BLOCK_SIZE;
|
||||
|
||||
/**
|
||||
* The encapsulated array
|
||||
*/
|
||||
private byte[] arr;
|
||||
|
||||
/**
|
||||
* Points to next free item
|
||||
*/
|
||||
private int n;
|
||||
|
||||
public ByteVector() : this(DEFAULT_BLOCK_SIZE) {}
|
||||
|
||||
public ByteVector(int capacity) {
|
||||
if (capacity > 0)
|
||||
BLOCK_SIZE = capacity;
|
||||
else
|
||||
BLOCK_SIZE = DEFAULT_BLOCK_SIZE;
|
||||
arr = new byte[BLOCK_SIZE];
|
||||
n = 0;
|
||||
}
|
||||
|
||||
public ByteVector(byte[] a) {
|
||||
BLOCK_SIZE = DEFAULT_BLOCK_SIZE;
|
||||
arr = a;
|
||||
n = 0;
|
||||
}
|
||||
|
||||
public ByteVector(byte[] a, int capacity) {
|
||||
if (capacity > 0)
|
||||
BLOCK_SIZE = capacity;
|
||||
else
|
||||
BLOCK_SIZE = DEFAULT_BLOCK_SIZE;
|
||||
arr = a;
|
||||
n = 0;
|
||||
}
|
||||
|
||||
public byte[] Arr {
|
||||
get {
|
||||
return arr;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* return number of items in array
|
||||
*/
|
||||
public int Length {
|
||||
get {
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* returns current capacity of array
|
||||
*/
|
||||
public int Capacity {
|
||||
get {
|
||||
return arr.Length;
|
||||
}
|
||||
}
|
||||
|
||||
public byte this[int index] {
|
||||
get {
|
||||
return arr[index];
|
||||
}
|
||||
|
||||
set {
|
||||
arr[index] = value;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This is to implement memory allocation in the array. Like Malloc().
|
||||
*/
|
||||
public int Alloc(int size) {
|
||||
int index = n;
|
||||
int len = arr.Length;
|
||||
if (n + size >= len) {
|
||||
byte[] aux = new byte[len + BLOCK_SIZE];
|
||||
Array.Copy(arr, 0, aux, 0, len);
|
||||
arr = aux;
|
||||
}
|
||||
n += size;
|
||||
return index;
|
||||
}
|
||||
|
||||
public void TrimToSize() {
|
||||
if (n < arr.Length) {
|
||||
byte[] aux = new byte[n];
|
||||
Array.Copy(arr, 0, aux, 0, n);
|
||||
arr = aux;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
128
iTechSharp/iTextSharp/text/pdf/hyphenation/CharVector.cs
Normal file
128
iTechSharp/iTextSharp/text/pdf/hyphenation/CharVector.cs
Normal file
@@ -0,0 +1,128 @@
|
||||
using System;
|
||||
|
||||
/*
|
||||
* $Id: CharVector.cs,v 1.2 2005/06/18 08:17:05 psoares33 Exp $
|
||||
* Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
|
||||
* For details on use and redistribution please refer to the
|
||||
* LICENSE file included with these sources.
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.pdf.hyphenation {
|
||||
/**
|
||||
* This class implements a simple char vector with access to the
|
||||
* underlying array.
|
||||
*
|
||||
* @author Carlos Villegas <cav@uniscope.co.jp>
|
||||
*/
|
||||
public class CharVector : ICloneable {
|
||||
|
||||
/**
|
||||
* Capacity increment size
|
||||
*/
|
||||
private static int DEFAULT_BLOCK_SIZE = 2048;
|
||||
private int BLOCK_SIZE;
|
||||
|
||||
/**
|
||||
* The encapsulated array
|
||||
*/
|
||||
private char[] array;
|
||||
|
||||
/**
|
||||
* Points to next free item
|
||||
*/
|
||||
private int n;
|
||||
|
||||
public CharVector() : this(DEFAULT_BLOCK_SIZE) {}
|
||||
|
||||
public CharVector(int capacity) {
|
||||
if (capacity > 0)
|
||||
BLOCK_SIZE = capacity;
|
||||
else
|
||||
BLOCK_SIZE = DEFAULT_BLOCK_SIZE;
|
||||
array = new char[BLOCK_SIZE];
|
||||
n = 0;
|
||||
}
|
||||
|
||||
public CharVector(char[] a) {
|
||||
BLOCK_SIZE = DEFAULT_BLOCK_SIZE;
|
||||
array = a;
|
||||
n = a.Length;
|
||||
}
|
||||
|
||||
public CharVector(char[] a, int capacity) {
|
||||
if (capacity > 0)
|
||||
BLOCK_SIZE = capacity;
|
||||
else
|
||||
BLOCK_SIZE = DEFAULT_BLOCK_SIZE;
|
||||
array = a;
|
||||
n = a.Length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset Vector but don't resize or clear elements
|
||||
*/
|
||||
public void Clear() {
|
||||
n = 0;
|
||||
}
|
||||
|
||||
public Object Clone() {
|
||||
CharVector cv = new CharVector((char[])array.Clone(), BLOCK_SIZE);
|
||||
cv.n = this.n;
|
||||
return cv;
|
||||
}
|
||||
|
||||
public char[] Arr {
|
||||
get {
|
||||
return array;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* return number of items in array
|
||||
*/
|
||||
public int Length {
|
||||
get {
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* returns current capacity of array
|
||||
*/
|
||||
public int Capacity {
|
||||
get {
|
||||
return array.Length;
|
||||
}
|
||||
}
|
||||
|
||||
public char this[int index] {
|
||||
get {
|
||||
return array[index];
|
||||
}
|
||||
|
||||
set {
|
||||
array[index] = value;
|
||||
}
|
||||
}
|
||||
|
||||
public int Alloc(int size) {
|
||||
int index = n;
|
||||
int len = array.Length;
|
||||
if (n + size >= len) {
|
||||
char[] aux = new char[len + BLOCK_SIZE];
|
||||
Array.Copy(array, 0, aux, 0, len);
|
||||
array = aux;
|
||||
}
|
||||
n += size;
|
||||
return index;
|
||||
}
|
||||
|
||||
public void TrimToSize() {
|
||||
if (n < array.Length) {
|
||||
char[] aux = new char[n];
|
||||
Array.Copy(array, 0, aux, 0, n);
|
||||
array = aux;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
67
iTechSharp/iTextSharp/text/pdf/hyphenation/Hyphen.cs
Normal file
67
iTechSharp/iTextSharp/text/pdf/hyphenation/Hyphen.cs
Normal file
@@ -0,0 +1,67 @@
|
||||
using System;
|
||||
using System.Text;
|
||||
/*
|
||||
* Copyright 1999-2004 The Apache Software Foundation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.pdf.hyphenation {
|
||||
/**
|
||||
* This class represents a hyphen. A 'full' hyphen is made of 3 parts:
|
||||
* the pre-break text, post-break text and no-break. If no line-break
|
||||
* is generated at this position, the no-break text is used, otherwise,
|
||||
* pre-break and post-break are used. Typically, pre-break is equal to
|
||||
* the hyphen character and the others are empty. However, this general
|
||||
* scheme allows support for cases in some languages where words change
|
||||
* spelling if they're split across lines, like german's 'backen' which
|
||||
* hyphenates 'bak-ken'. BTW, this comes from TeX.
|
||||
*
|
||||
* @author Carlos Villegas <cav@uniscope.co.jp>
|
||||
*/
|
||||
|
||||
public class Hyphen {
|
||||
public String preBreak;
|
||||
public String noBreak;
|
||||
public String postBreak;
|
||||
|
||||
internal Hyphen(String pre, String no, String post) {
|
||||
preBreak = pre;
|
||||
noBreak = no;
|
||||
postBreak = post;
|
||||
}
|
||||
|
||||
internal Hyphen(String pre) {
|
||||
preBreak = pre;
|
||||
noBreak = null;
|
||||
postBreak = null;
|
||||
}
|
||||
|
||||
public override String ToString() {
|
||||
if (noBreak == null
|
||||
&& postBreak == null
|
||||
&& preBreak != null
|
||||
&& preBreak.Equals("-")) {
|
||||
return "-";
|
||||
}
|
||||
StringBuilder res = new StringBuilder("{");
|
||||
res.Append(preBreak);
|
||||
res.Append("}{");
|
||||
res.Append(postBreak);
|
||||
res.Append("}{");
|
||||
res.Append(noBreak);
|
||||
res.Append('}');
|
||||
return res.ToString();
|
||||
}
|
||||
}
|
||||
}
|
79
iTechSharp/iTextSharp/text/pdf/hyphenation/Hyphenation.cs
Normal file
79
iTechSharp/iTextSharp/text/pdf/hyphenation/Hyphenation.cs
Normal file
@@ -0,0 +1,79 @@
|
||||
using System;
|
||||
using System.Text;
|
||||
|
||||
/*
|
||||
* $Id: Hyphenation.cs,v 1.2 2005/06/18 08:17:05 psoares33 Exp $
|
||||
* Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
|
||||
* For details on use and redistribution please refer to the
|
||||
* LICENSE file included with these sources.
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.pdf.hyphenation {
|
||||
/**
|
||||
* This class represents a hyphenated word.
|
||||
*
|
||||
* @author Carlos Villegas <cav@uniscope.co.jp>
|
||||
*/
|
||||
public class Hyphenation {
|
||||
int[] hyphenPoints;
|
||||
string word;
|
||||
|
||||
/**
|
||||
* number of hyphenation points in word
|
||||
*/
|
||||
int len;
|
||||
|
||||
/**
|
||||
* rawWord as made of alternating strings and {@link Hyphen Hyphen}
|
||||
* instances
|
||||
*/
|
||||
internal Hyphenation(string word, int[] points) {
|
||||
this.word = word;
|
||||
hyphenPoints = points;
|
||||
len = points.Length;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the number of hyphenation points in the word
|
||||
*/
|
||||
public int Length {
|
||||
get {
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the pre-break text, not including the hyphen character
|
||||
*/
|
||||
public string GetPreHyphenText(int index) {
|
||||
return word.Substring(0, hyphenPoints[index]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the post-break text
|
||||
*/
|
||||
public string GetPostHyphenText(int index) {
|
||||
return word.Substring(hyphenPoints[index]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the hyphenation points
|
||||
*/
|
||||
public int[] HyphenationPoints {
|
||||
get {
|
||||
return hyphenPoints;
|
||||
}
|
||||
}
|
||||
|
||||
public override string ToString() {
|
||||
StringBuilder str = new StringBuilder();
|
||||
int start = 0;
|
||||
for (int i = 0; i < len; i++) {
|
||||
str.Append(word.Substring(start, hyphenPoints[i]) + "-");
|
||||
start = hyphenPoints[i];
|
||||
}
|
||||
str.Append(word.Substring(start));
|
||||
return str.ToString();
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,27 @@
|
||||
using System;
|
||||
/*
|
||||
* Copyright 1999-2004 The Apache Software Foundation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.pdf.hyphenation {
|
||||
/**
|
||||
* @author Carlos Villegas <cav@uniscope.co.jp>
|
||||
*/
|
||||
public class HyphenationException : Exception {
|
||||
|
||||
public HyphenationException(String msg) : base(msg) {
|
||||
}
|
||||
}
|
||||
}
|
451
iTechSharp/iTextSharp/text/pdf/hyphenation/HyphenationTree.cs
Normal file
451
iTechSharp/iTextSharp/text/pdf/hyphenation/HyphenationTree.cs
Normal file
@@ -0,0 +1,451 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using System.Collections;
|
||||
/*
|
||||
* Copyright 1999-2004 The Apache Software Foundation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/* $Id: HyphenationTree.cs,v 1.2 2005/06/18 08:05:23 psoares33 Exp $ */
|
||||
|
||||
namespace iTextSharp.text.pdf.hyphenation {
|
||||
/**
|
||||
* This tree structure stores the hyphenation patterns in an efficient
|
||||
* way for fast lookup. It provides the provides the method to
|
||||
* hyphenate a word.
|
||||
*
|
||||
* @author Carlos Villegas <cav@uniscope.co.jp>
|
||||
*/
|
||||
public class HyphenationTree : TernaryTree, IPatternConsumer {
|
||||
|
||||
/**
|
||||
* value space: stores the inteletter values
|
||||
*/
|
||||
protected ByteVector vspace;
|
||||
|
||||
/**
|
||||
* This map stores hyphenation exceptions
|
||||
*/
|
||||
protected Hashtable stoplist;
|
||||
|
||||
/**
|
||||
* This map stores the character classes
|
||||
*/
|
||||
protected TernaryTree classmap;
|
||||
|
||||
/**
|
||||
* Temporary map to store interletter values on pattern loading.
|
||||
*/
|
||||
private TernaryTree ivalues;
|
||||
|
||||
public HyphenationTree() {
|
||||
stoplist = new Hashtable(23); // usually a small table
|
||||
classmap = new TernaryTree();
|
||||
vspace = new ByteVector();
|
||||
vspace.Alloc(1); // this reserves index 0, which we don't use
|
||||
}
|
||||
|
||||
/**
|
||||
* Packs the values by storing them in 4 bits, two values into a byte
|
||||
* Values range is from 0 to 9. We use zero as terminator,
|
||||
* so we'll add 1 to the value.
|
||||
* @param values a string of digits from '0' to '9' representing the
|
||||
* interletter values.
|
||||
* @return the index into the vspace array where the packed values
|
||||
* are stored.
|
||||
*/
|
||||
protected int PackValues(String values) {
|
||||
int i, n = values.Length;
|
||||
int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
|
||||
int offset = vspace.Alloc(m);
|
||||
byte[] va = vspace.Arr;
|
||||
for (i = 0; i < n; i++) {
|
||||
int j = i >> 1;
|
||||
byte v = (byte)((values[i] - '0' + 1) & 0x0f);
|
||||
if ((i & 1) == 1) {
|
||||
va[j + offset] = (byte)(va[j + offset] | v);
|
||||
} else {
|
||||
va[j + offset] = (byte)(v << 4); // big endian
|
||||
}
|
||||
}
|
||||
va[m - 1 + offset] = 0; // terminator
|
||||
return offset;
|
||||
}
|
||||
|
||||
protected String UnpackValues(int k) {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
byte v = vspace[k++];
|
||||
while (v != 0) {
|
||||
char c = (char)((v >> 4) - 1 + '0');
|
||||
buf.Append(c);
|
||||
c = (char)(v & 0x0f);
|
||||
if (c == 0) {
|
||||
break;
|
||||
}
|
||||
c = (char)(c - 1 + '0');
|
||||
buf.Append(c);
|
||||
v = vspace[k++];
|
||||
}
|
||||
return buf.ToString();
|
||||
}
|
||||
|
||||
public void LoadSimplePatterns(Stream stream) {
|
||||
SimplePatternParser pp = new SimplePatternParser();
|
||||
ivalues = new TernaryTree();
|
||||
|
||||
pp.Parse(stream, this);
|
||||
|
||||
// patterns/values should be now in the tree
|
||||
// let's optimize a bit
|
||||
TrimToSize();
|
||||
vspace.TrimToSize();
|
||||
classmap.TrimToSize();
|
||||
|
||||
// get rid of the auxiliary map
|
||||
ivalues = null;
|
||||
}
|
||||
|
||||
|
||||
public String FindPattern(String pat) {
|
||||
int k = base.Find(pat);
|
||||
if (k >= 0) {
|
||||
return UnpackValues(k);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
/**
|
||||
* String compare, returns 0 if equal or
|
||||
* t is a substring of s
|
||||
*/
|
||||
protected int Hstrcmp(char[] s, int si, char[] t, int ti) {
|
||||
for (; s[si] == t[ti]; si++, ti++) {
|
||||
if (s[si] == 0) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (t[ti] == 0) {
|
||||
return 0;
|
||||
}
|
||||
return s[si] - t[ti];
|
||||
}
|
||||
|
||||
protected byte[] GetValues(int k) {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
byte v = vspace[k++];
|
||||
while (v != 0) {
|
||||
char c = (char)((v >> 4) - 1);
|
||||
buf.Append(c);
|
||||
c = (char)(v & 0x0f);
|
||||
if (c == 0) {
|
||||
break;
|
||||
}
|
||||
c = (char)(c - 1);
|
||||
buf.Append(c);
|
||||
v = vspace[k++];
|
||||
}
|
||||
byte[] res = new byte[buf.Length];
|
||||
for (int i = 0; i < res.Length; i++) {
|
||||
res[i] = (byte)buf[i];
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Search for all possible partial matches of word starting
|
||||
* at index an update interletter values. In other words, it
|
||||
* does something like:</p>
|
||||
* <code>
|
||||
* for (i=0; i<patterns.length; i++) {
|
||||
* if ( word.Substring(index).StartsWidth(patterns[i]) )
|
||||
* Update_interletter_values(patterns[i]);
|
||||
* }
|
||||
* </code>
|
||||
* <p>But it is done in an efficient way since the patterns are
|
||||
* stored in a ternary tree. In fact, this is the whole purpose
|
||||
* of having the tree: doing this search without having to test
|
||||
* every single pattern. The number of patterns for languages
|
||||
* such as English range from 4000 to 10000. Thus, doing thousands
|
||||
* of string comparisons for each word to hyphenate would be
|
||||
* really slow without the tree. The tradeoff is memory, but
|
||||
* using a ternary tree instead of a trie, almost halves the
|
||||
* the memory used by Lout or TeX. It's also faster than using
|
||||
* a hash table</p>
|
||||
* @param word null terminated word to match
|
||||
* @param index start index from word
|
||||
* @param il interletter values array to update
|
||||
*/
|
||||
protected void SearchPatterns(char[] word, int index, byte[] il) {
|
||||
byte[] values;
|
||||
int i = index;
|
||||
char p, q;
|
||||
char sp = word[i];
|
||||
p = root;
|
||||
|
||||
while (p > 0 && p < sc.Length) {
|
||||
if (sc[p] == 0xFFFF) {
|
||||
if (Hstrcmp(word, i, kv.Arr, lo[p]) == 0) {
|
||||
values = GetValues(eq[p]); // data pointer is in eq[]
|
||||
int j = index;
|
||||
for (int k = 0; k < values.Length; k++) {
|
||||
if (j < il.Length && values[k] > il[j]) {
|
||||
il[j] = values[k];
|
||||
}
|
||||
j++;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
int d = sp - sc[p];
|
||||
if (d == 0) {
|
||||
if (sp == 0) {
|
||||
break;
|
||||
}
|
||||
sp = word[++i];
|
||||
p = eq[p];
|
||||
q = p;
|
||||
|
||||
// look for a pattern ending at this position by searching for
|
||||
// the null char ( splitchar == 0 )
|
||||
while (q > 0 && q < sc.Length) {
|
||||
if (sc[q] == 0xFFFF) { // stop at compressed branch
|
||||
break;
|
||||
}
|
||||
if (sc[q] == 0) {
|
||||
values = GetValues(eq[q]);
|
||||
int j = index;
|
||||
for (int k = 0; k < values.Length; k++) {
|
||||
if (j < il.Length && values[k] > il[j]) {
|
||||
il[j] = values[k];
|
||||
}
|
||||
j++;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
q = lo[q];
|
||||
|
||||
/**
|
||||
* actually the code should be:
|
||||
* q = sc[q] < 0 ? hi[q] : lo[q];
|
||||
* but java chars are unsigned
|
||||
*/
|
||||
}
|
||||
}
|
||||
} else {
|
||||
p = d < 0 ? lo[p] : hi[p];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Hyphenate word and return a Hyphenation object.
|
||||
* @param word the word to be hyphenated
|
||||
* @param remainCharCount Minimum number of characters allowed
|
||||
* before the hyphenation point.
|
||||
* @param pushCharCount Minimum number of characters allowed after
|
||||
* the hyphenation point.
|
||||
* @return a {@link Hyphenation Hyphenation} object representing
|
||||
* the hyphenated word or null if word is not hyphenated.
|
||||
*/
|
||||
public Hyphenation Hyphenate(String word, int remainCharCount,
|
||||
int pushCharCount) {
|
||||
char[] w = word.ToCharArray();
|
||||
return Hyphenate(w, 0, w.Length, remainCharCount, pushCharCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* w = "****nnllllllnnn*****",
|
||||
* where n is a non-letter, l is a letter,
|
||||
* all n may be absent, the first n is at offset,
|
||||
* the first l is at offset + iIgnoreAtBeginning;
|
||||
* word = ".llllll.'\0'***",
|
||||
* where all l in w are copied into word.
|
||||
* In the first part of the routine len = w.length,
|
||||
* in the second part of the routine len = word.length.
|
||||
* Three indices are used:
|
||||
* Index(w), the index in w,
|
||||
* Index(word), the index in word,
|
||||
* Letterindex(word), the index in the letter part of word.
|
||||
* The following relations exist:
|
||||
* Index(w) = offset + i - 1
|
||||
* Index(word) = i - iIgnoreAtBeginning
|
||||
* Letterindex(word) = Index(word) - 1
|
||||
* (see first loop).
|
||||
* It follows that:
|
||||
* Index(w) - Index(word) = offset - 1 + iIgnoreAtBeginning
|
||||
* Index(w) = Letterindex(word) + offset + iIgnoreAtBeginning
|
||||
*/
|
||||
|
||||
/**
|
||||
* Hyphenate word and return an array of hyphenation points.
|
||||
* @param w char array that contains the word
|
||||
* @param offset Offset to first character in word
|
||||
* @param len Length of word
|
||||
* @param remainCharCount Minimum number of characters allowed
|
||||
* before the hyphenation point.
|
||||
* @param pushCharCount Minimum number of characters allowed after
|
||||
* the hyphenation point.
|
||||
* @return a {@link Hyphenation Hyphenation} object representing
|
||||
* the hyphenated word or null if word is not hyphenated.
|
||||
*/
|
||||
public Hyphenation Hyphenate(char[] w, int offset, int len,
|
||||
int remainCharCount, int pushCharCount) {
|
||||
int i;
|
||||
char[] word = new char[len + 3];
|
||||
|
||||
// normalize word
|
||||
char[] c = new char[2];
|
||||
int iIgnoreAtBeginning = 0;
|
||||
int iLength = len;
|
||||
bool bEndOfLetters = false;
|
||||
for (i = 1; i <= len; i++) {
|
||||
c[0] = w[offset + i - 1];
|
||||
int nc = classmap.Find(c, 0);
|
||||
if (nc < 0) { // found a non-letter character ...
|
||||
if (i == (1 + iIgnoreAtBeginning)) {
|
||||
// ... before any letter character
|
||||
iIgnoreAtBeginning ++;
|
||||
} else {
|
||||
// ... after a letter character
|
||||
bEndOfLetters = true;
|
||||
}
|
||||
iLength --;
|
||||
} else {
|
||||
if (!bEndOfLetters) {
|
||||
word[i - iIgnoreAtBeginning] = (char)nc;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
len = iLength;
|
||||
if (len < (remainCharCount + pushCharCount)) {
|
||||
// word is too short to be hyphenated
|
||||
return null;
|
||||
}
|
||||
int[] result = new int[len + 1];
|
||||
int k = 0;
|
||||
|
||||
// check exception list first
|
||||
String sw = new String(word, 1, len);
|
||||
if (stoplist.ContainsKey(sw)) {
|
||||
// assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no = null)
|
||||
ArrayList hw = (ArrayList)stoplist[sw];
|
||||
int j = 0;
|
||||
for (i = 0; i < hw.Count; i++) {
|
||||
Object o = hw[i];
|
||||
// j = Index(sw) = Letterindex(word)?
|
||||
// result[k] = corresponding Index(w)
|
||||
if (o is String) {
|
||||
j += ((String)o).Length;
|
||||
if (j >= remainCharCount && j < (len - pushCharCount)) {
|
||||
result[k++] = j + iIgnoreAtBeginning;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// use algorithm to get hyphenation points
|
||||
word[0] = '.'; // word start marker
|
||||
word[len + 1] = '.'; // word end marker
|
||||
word[len + 2] = (char)0; // null terminated
|
||||
byte[] il = new byte[len + 3]; // initialized to zero
|
||||
for (i = 0; i < len + 1; i++) {
|
||||
SearchPatterns(word, i, il);
|
||||
}
|
||||
|
||||
// hyphenation points are located where interletter value is odd
|
||||
// i is Letterindex(word),
|
||||
// i + 1 is Index(word),
|
||||
// result[k] = corresponding Index(w)
|
||||
for (i = 0; i < len; i++) {
|
||||
if (((il[i + 1] & 1) == 1) && i >= remainCharCount
|
||||
&& i <= (len - pushCharCount)) {
|
||||
result[k++] = i + iIgnoreAtBeginning;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (k > 0) {
|
||||
// trim result array
|
||||
int[] res = new int[k];
|
||||
Array.Copy(result, 0, res, 0, k);
|
||||
return new Hyphenation(new String(w, offset, len), res);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a character class to the tree. It is used by
|
||||
* {@link SimplePatternParser SimplePatternParser} as callback to
|
||||
* add character classes. Character classes define the
|
||||
* valid word characters for hyphenation. If a word contains
|
||||
* a character not defined in any of the classes, it is not hyphenated.
|
||||
* It also defines a way to normalize the characters in order
|
||||
* to compare them with the stored patterns. Usually pattern
|
||||
* files use only lower case characters, in this case a class
|
||||
* for letter 'a', for example, should be defined as "aA", the first
|
||||
* character being the normalization char.
|
||||
*/
|
||||
public void AddClass(String chargroup) {
|
||||
if (chargroup.Length > 0) {
|
||||
char equivChar = chargroup[0];
|
||||
char[] key = new char[2];
|
||||
key[1] = (char)0;
|
||||
for (int i = 0; i < chargroup.Length; i++) {
|
||||
key[0] = chargroup[i];
|
||||
classmap.Insert(key, 0, equivChar);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an exception to the tree. It is used by
|
||||
* {@link SimplePatternParser SimplePatternParser} class as callback to
|
||||
* store the hyphenation exceptions.
|
||||
* @param word normalized word
|
||||
* @param hyphenatedword a vector of alternating strings and
|
||||
* {@link Hyphen hyphen} objects.
|
||||
*/
|
||||
public void AddException(String word, ArrayList hyphenatedword) {
|
||||
stoplist[word] = hyphenatedword;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a pattern to the tree. Mainly, to be used by
|
||||
* {@link SimplePatternParser SimplePatternParser} class as callback to
|
||||
* add a pattern to the tree.
|
||||
* @param pattern the hyphenation pattern
|
||||
* @param ivalue interletter weight values indicating the
|
||||
* desirability and priority of hyphenating at a given point
|
||||
* within the pattern. It should contain only digit characters.
|
||||
* (i.e. '0' to '9').
|
||||
*/
|
||||
public void AddPattern(String pattern, String ivalue) {
|
||||
int k = ivalues.Find(ivalue);
|
||||
if (k <= 0) {
|
||||
k = PackValues(ivalue);
|
||||
ivalues.Insert(ivalue, (char)k);
|
||||
}
|
||||
Insert(pattern, (char)k);
|
||||
}
|
||||
|
||||
public override void PrintStats() {
|
||||
Console.WriteLine("Value space size = " + vspace.Length);
|
||||
base.PrintStats();
|
||||
}
|
||||
}
|
||||
}
|
220
iTechSharp/iTextSharp/text/pdf/hyphenation/Hyphenator.cs
Normal file
220
iTechSharp/iTextSharp/text/pdf/hyphenation/Hyphenator.cs
Normal file
@@ -0,0 +1,220 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using System.Collections;
|
||||
using System.util;
|
||||
using iTextSharp.text.pdf;
|
||||
/*
|
||||
* Copyright 1999-2004 The Apache Software Foundation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.pdf.hyphenation {
|
||||
/**
|
||||
* This class is the main entry point to the hyphenation package.
|
||||
* You can use only the static methods or create an instance.
|
||||
*
|
||||
* @author Carlos Villegas <cav@uniscope.co.jp>
|
||||
*/
|
||||
public class Hyphenator {
|
||||
|
||||
/** TODO: Don't use statics */
|
||||
private static Hashtable hyphenTrees = Hashtable.Synchronized(new Hashtable());
|
||||
|
||||
private HyphenationTree hyphenTree = null;
|
||||
private int remainCharCount = 2;
|
||||
private int pushCharCount = 2;
|
||||
private const String defaultHyphLocation = "iTextSharp.text.pdf.hyphenation.hyph.";
|
||||
|
||||
/**
|
||||
* @param lang
|
||||
* @param country
|
||||
* @param leftMin
|
||||
* @param rightMin
|
||||
*/
|
||||
public Hyphenator(String lang, String country, int leftMin,
|
||||
int rightMin) {
|
||||
hyphenTree = GetHyphenationTree(lang, country);
|
||||
remainCharCount = leftMin;
|
||||
pushCharCount = rightMin;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param lang
|
||||
* @param country
|
||||
* @return the hyphenation tree
|
||||
*/
|
||||
public static HyphenationTree GetHyphenationTree(String lang,
|
||||
String country) {
|
||||
String key = lang;
|
||||
// check whether the country code has been used
|
||||
if (country != null && !country.Equals("none")) {
|
||||
key += "_" + country;
|
||||
}
|
||||
// first try to find it in the cache
|
||||
if (hyphenTrees.ContainsKey(key)) {
|
||||
return (HyphenationTree)hyphenTrees[key];
|
||||
}
|
||||
if (hyphenTrees.ContainsKey(lang)) {
|
||||
return (HyphenationTree)hyphenTrees[lang];
|
||||
}
|
||||
|
||||
HyphenationTree hTree = GetResourceHyphenationTree(key);
|
||||
//if (hTree == null)
|
||||
// hTree = GetFileHyphenationTree(key);
|
||||
// put it into the pattern cache
|
||||
if (hTree != null) {
|
||||
hyphenTrees[key] = hTree;
|
||||
}
|
||||
return hTree;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param key
|
||||
* @return a hyphenation tree
|
||||
*/
|
||||
public static HyphenationTree GetResourceHyphenationTree(String key) {
|
||||
try {
|
||||
Stream stream = BaseFont.GetResourceStream(defaultHyphLocation + key + ".xml");
|
||||
if (stream == null && key.Length > 2)
|
||||
stream = BaseFont.GetResourceStream(defaultHyphLocation + key.Substring(0, 2) + ".xml");
|
||||
if (stream == null)
|
||||
return null;
|
||||
HyphenationTree hTree = new HyphenationTree();
|
||||
hTree.LoadSimplePatterns(stream);
|
||||
return hTree;
|
||||
}
|
||||
catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param key
|
||||
* @return a hyphenation tree
|
||||
*/
|
||||
/* public static HyphenationTree GetFileHyphenationTree(String key) {
|
||||
try {
|
||||
if (hyphenDir == null)
|
||||
return null;
|
||||
Stream stream = null;
|
||||
string hyphenFile = Path.Combine(hyphenDir, key + ".xml");
|
||||
if (File.Exists(hyphenFile))
|
||||
stream = new FileStream(hyphenFile, FileMode.Open, FileAccess.Read, FileShare.Read);
|
||||
if (stream == null && key.Length > 2) {
|
||||
hyphenFile = Path.Combine(hyphenDir, key.Substring(0, 2) + ".xml");
|
||||
if (File.Exists(hyphenFile))
|
||||
stream = new FileStream(hyphenFile, FileMode.Open, FileAccess.Read, FileShare.Read);
|
||||
}
|
||||
if (stream == null)
|
||||
return null;
|
||||
HyphenationTree hTree = new HyphenationTree();
|
||||
hTree.LoadSimplePatterns(stream);
|
||||
return hTree;
|
||||
}
|
||||
catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}*/
|
||||
|
||||
/**
|
||||
* @param lang
|
||||
* @param country
|
||||
* @param word
|
||||
* @param leftMin
|
||||
* @param rightMin
|
||||
* @return a hyphenation object
|
||||
*/
|
||||
public static Hyphenation Hyphenate(String lang, String country,
|
||||
String word, int leftMin,
|
||||
int rightMin) {
|
||||
HyphenationTree hTree = GetHyphenationTree(lang, country);
|
||||
if (hTree == null) {
|
||||
//log.Error("Error building hyphenation tree for language "
|
||||
// + lang);
|
||||
return null;
|
||||
}
|
||||
return hTree.Hyphenate(word, leftMin, rightMin);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param lang
|
||||
* @param country
|
||||
* @param word
|
||||
* @param offset
|
||||
* @param len
|
||||
* @param leftMin
|
||||
* @param rightMin
|
||||
* @return a hyphenation object
|
||||
*/
|
||||
public static Hyphenation Hyphenate(String lang, String country,
|
||||
char[] word, int offset, int len,
|
||||
int leftMin, int rightMin) {
|
||||
HyphenationTree hTree = GetHyphenationTree(lang, country);
|
||||
if (hTree == null) {
|
||||
//log.Error("Error building hyphenation tree for language "
|
||||
// + lang);
|
||||
return null;
|
||||
}
|
||||
return hTree.Hyphenate(word, offset, len, leftMin, rightMin);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param min
|
||||
*/
|
||||
public void SetMinRemainCharCount(int min) {
|
||||
remainCharCount = min;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param min
|
||||
*/
|
||||
public void SetMinPushCharCount(int min) {
|
||||
pushCharCount = min;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param lang
|
||||
* @param country
|
||||
*/
|
||||
public void SetLanguage(String lang, String country) {
|
||||
hyphenTree = GetHyphenationTree(lang, country);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param word
|
||||
* @param offset
|
||||
* @param len
|
||||
* @return a hyphenation object
|
||||
*/
|
||||
public Hyphenation Hyphenate(char[] word, int offset, int len) {
|
||||
if (hyphenTree == null) {
|
||||
return null;
|
||||
}
|
||||
return hyphenTree.Hyphenate(word, offset, len, remainCharCount,
|
||||
pushCharCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param word
|
||||
* @return a hyphenation object
|
||||
*/
|
||||
public Hyphenation Hyphenate(String word) {
|
||||
if (hyphenTree == null) {
|
||||
return null;
|
||||
}
|
||||
return hyphenTree.Hyphenate(word, remainCharCount, pushCharCount);
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,54 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
/*
|
||||
* Copyright 1999-2004 The Apache Software Foundation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.pdf.hyphenation {
|
||||
/**
|
||||
* This interface is used to connect the XML pattern file parser to
|
||||
* the hyphenation tree.
|
||||
*
|
||||
* @author Carlos Villegas <cav@uniscope.co.jp>
|
||||
*/
|
||||
public interface IPatternConsumer {
|
||||
|
||||
/**
|
||||
* Add a character class.
|
||||
* A character class defines characters that are considered
|
||||
* equivalent for the purpose of hyphenation (e.g. "aA"). It
|
||||
* usually means to ignore case.
|
||||
* @param chargroup character group
|
||||
*/
|
||||
void AddClass(String chargroup);
|
||||
|
||||
/**
|
||||
* Add a hyphenation exception. An exception replaces the
|
||||
* result obtained by the algorithm for cases for which this
|
||||
* fails or the user wants to provide his own hyphenation.
|
||||
* A hyphenatedword is a vector of alternating String's and
|
||||
* {@link Hyphen Hyphen} instances
|
||||
*/
|
||||
void AddException(String word, ArrayList hyphenatedword);
|
||||
|
||||
/**
|
||||
* Add hyphenation patterns.
|
||||
* @param pattern the pattern
|
||||
* @param values interletter values expressed as a string of
|
||||
* digit characters.
|
||||
*/
|
||||
void AddPattern(String pattern, String values);
|
||||
}
|
||||
}
|
@@ -0,0 +1,247 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using System.Collections;
|
||||
using System.util;
|
||||
using iTextSharp.text.xml.simpleparser;
|
||||
/*
|
||||
* Copyright 2005 by Paulo Soares.
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
||||
* (the "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the License.
|
||||
*
|
||||
* The Original Code is 'iText, a free JAVA-PDF library'.
|
||||
*
|
||||
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
|
||||
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
|
||||
* All Rights Reserved.
|
||||
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
|
||||
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): all the names of the contributors are added in the source code
|
||||
* where applicable.
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of the
|
||||
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
|
||||
* provisions of LGPL are applicable instead of those above. If you wish to
|
||||
* allow use of your version of this file only under the terms of the LGPL
|
||||
* License and not to allow others to use your version of this file under
|
||||
* the MPL, indicate your decision by deleting the provisions above and
|
||||
* replace them with the notice and other provisions required by the LGPL.
|
||||
* If you do not delete the provisions above, a recipient may use your version
|
||||
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the MPL as stated above or under the terms of the GNU
|
||||
* Library General Public License as published by the Free Software Foundation;
|
||||
* either version 2 of the License, or any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
|
||||
* details.
|
||||
*
|
||||
* If you didn't download this code from the following link, you should check if
|
||||
* you aren't using an obsolete version:
|
||||
* http://www.lowagie.com/iText/
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.pdf.hyphenation {
|
||||
/** Parses the xml hyphenation pattern.
|
||||
*
|
||||
* @author Paulo Soares (psoares@consiste.pt)
|
||||
*/
|
||||
public class SimplePatternParser : ISimpleXMLDocHandler {
|
||||
internal int currElement;
|
||||
internal IPatternConsumer consumer;
|
||||
internal StringBuilder token;
|
||||
internal ArrayList exception;
|
||||
internal char hyphenChar;
|
||||
|
||||
internal const int ELEM_CLASSES = 1;
|
||||
internal const int ELEM_EXCEPTIONS = 2;
|
||||
internal const int ELEM_PATTERNS = 3;
|
||||
internal const int ELEM_HYPHEN = 4;
|
||||
|
||||
/** Creates a new instance of PatternParser2 */
|
||||
public SimplePatternParser() {
|
||||
token = new StringBuilder();
|
||||
hyphenChar = '-'; // default
|
||||
}
|
||||
|
||||
public void Parse(Stream stream, IPatternConsumer consumer) {
|
||||
this.consumer = consumer;
|
||||
try {
|
||||
SimpleXMLParser.Parse(this, stream);
|
||||
}
|
||||
finally {
|
||||
try{stream.Close();}catch{}
|
||||
}
|
||||
}
|
||||
|
||||
protected static String GetPattern(String word) {
|
||||
StringBuilder pat = new StringBuilder();
|
||||
int len = word.Length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (!char.IsDigit(word[i])) {
|
||||
pat.Append(word[i]);
|
||||
}
|
||||
}
|
||||
return pat.ToString();
|
||||
}
|
||||
|
||||
protected ArrayList NormalizeException(ArrayList ex) {
|
||||
ArrayList res = new ArrayList();
|
||||
for (int i = 0; i < ex.Count; i++) {
|
||||
Object item = ex[i];
|
||||
if (item is String) {
|
||||
String str = (String)item;
|
||||
StringBuilder buf = new StringBuilder();
|
||||
for (int j = 0; j < str.Length; j++) {
|
||||
char c = str[j];
|
||||
if (c != hyphenChar) {
|
||||
buf.Append(c);
|
||||
} else {
|
||||
res.Add(buf.ToString());
|
||||
buf.Length = 0;
|
||||
char[] h = new char[1];
|
||||
h[0] = hyphenChar;
|
||||
// we use here hyphenChar which is not necessarily
|
||||
// the one to be printed
|
||||
res.Add(new Hyphen(new String(h), null, null));
|
||||
}
|
||||
}
|
||||
if (buf.Length > 0) {
|
||||
res.Add(buf.ToString());
|
||||
}
|
||||
} else {
|
||||
res.Add(item);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
protected String GetExceptionWord(ArrayList ex) {
|
||||
StringBuilder res = new StringBuilder();
|
||||
for (int i = 0; i < ex.Count; i++) {
|
||||
Object item = ex[i];
|
||||
if (item is String) {
|
||||
res.Append((String)item);
|
||||
} else {
|
||||
if (((Hyphen)item).noBreak != null) {
|
||||
res.Append(((Hyphen)item).noBreak);
|
||||
}
|
||||
}
|
||||
}
|
||||
return res.ToString();
|
||||
}
|
||||
|
||||
protected static String GetInterletterValues(String pat) {
|
||||
StringBuilder il = new StringBuilder();
|
||||
String word = pat + "a"; // add dummy letter to serve as sentinel
|
||||
int len = word.Length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
char c = word[i];
|
||||
if (char.IsDigit(c)) {
|
||||
il.Append(c);
|
||||
i++;
|
||||
} else {
|
||||
il.Append('0');
|
||||
}
|
||||
}
|
||||
return il.ToString();
|
||||
}
|
||||
|
||||
public void EndDocument() {
|
||||
}
|
||||
|
||||
public void EndElement(String tag) {
|
||||
if (token.Length > 0) {
|
||||
String word = token.ToString();
|
||||
switch (currElement) {
|
||||
case ELEM_CLASSES:
|
||||
consumer.AddClass(word);
|
||||
break;
|
||||
case ELEM_EXCEPTIONS:
|
||||
exception.Add(word);
|
||||
exception = NormalizeException(exception);
|
||||
consumer.AddException(GetExceptionWord(exception),
|
||||
(ArrayList)exception.Clone());
|
||||
break;
|
||||
case ELEM_PATTERNS:
|
||||
consumer.AddPattern(GetPattern(word),
|
||||
GetInterletterValues(word));
|
||||
break;
|
||||
case ELEM_HYPHEN:
|
||||
// nothing to do
|
||||
break;
|
||||
}
|
||||
if (currElement != ELEM_HYPHEN) {
|
||||
token.Length = 0;
|
||||
}
|
||||
}
|
||||
if (currElement == ELEM_HYPHEN) {
|
||||
currElement = ELEM_EXCEPTIONS;
|
||||
} else {
|
||||
currElement = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public void StartDocument() {
|
||||
}
|
||||
|
||||
public void StartElement(String tag, Hashtable h) {
|
||||
if (tag.Equals("hyphen-char")) {
|
||||
String hh = (String)h["value"];
|
||||
if (hh != null && hh.Length == 1) {
|
||||
hyphenChar = hh[0];
|
||||
}
|
||||
} else if (tag.Equals("classes")) {
|
||||
currElement = ELEM_CLASSES;
|
||||
} else if (tag.Equals("patterns")) {
|
||||
currElement = ELEM_PATTERNS;
|
||||
} else if (tag.Equals("exceptions")) {
|
||||
currElement = ELEM_EXCEPTIONS;
|
||||
exception = new ArrayList();
|
||||
} else if (tag.Equals("hyphen")) {
|
||||
if (token.Length > 0) {
|
||||
exception.Add(token.ToString());
|
||||
}
|
||||
exception.Add(new Hyphen((String)h["pre"],
|
||||
(String)h["no"],
|
||||
(String)h["post"]));
|
||||
currElement = ELEM_HYPHEN;
|
||||
}
|
||||
token.Length = 0;
|
||||
}
|
||||
|
||||
public void Text(String str) {
|
||||
StringTokenizer tk = new StringTokenizer(str);
|
||||
while (tk.HasMoreTokens()) {
|
||||
String word = tk.NextToken();
|
||||
// System.out.Println("\"" + word + "\"");
|
||||
switch (currElement) {
|
||||
case ELEM_CLASSES:
|
||||
consumer.AddClass(word);
|
||||
break;
|
||||
case ELEM_EXCEPTIONS:
|
||||
exception.Add(word);
|
||||
exception = NormalizeException(exception);
|
||||
consumer.AddException(GetExceptionWord(exception),
|
||||
(ArrayList)exception.Clone());
|
||||
exception.Clear();
|
||||
break;
|
||||
case ELEM_PATTERNS:
|
||||
consumer.AddPattern(GetPattern(word),
|
||||
GetInterletterValues(word));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
631
iTechSharp/iTextSharp/text/pdf/hyphenation/TernaryTree.cs
Normal file
631
iTechSharp/iTextSharp/text/pdf/hyphenation/TernaryTree.cs
Normal file
@@ -0,0 +1,631 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
|
||||
/*
|
||||
* $Id: TernaryTree.cs,v 1.2 2005/06/18 08:17:05 psoares33 Exp $
|
||||
* Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
|
||||
* For details on use and redistribution please refer to the
|
||||
* LICENSE file included with these sources.
|
||||
*/
|
||||
|
||||
namespace iTextSharp.text.pdf.hyphenation {
|
||||
/**
|
||||
* <h2>Ternary Search Tree</h2>
|
||||
*
|
||||
* <p>A ternary search tree is a hibrid between a binary tree and
|
||||
* a digital search tree (trie). Keys are limited to strings.
|
||||
* A data value of type char is stored in each leaf node.
|
||||
* It can be used as an index (or pointer) to the data.
|
||||
* Branches that only contain one key are compressed to one node
|
||||
* by storing a pointer to the trailer substring of the key.
|
||||
* This class is intended to serve as base class or helper class
|
||||
* to implement Dictionary collections or the like. Ternary trees
|
||||
* have some nice properties as the following: the tree can be
|
||||
* traversed in sorted order, partial matches (wildcard) can be
|
||||
* implemented, retrieval of all keys within a given distance
|
||||
* from the target, etc. The storage requirements are higher than
|
||||
* a binary tree but a lot less than a trie. Performance is
|
||||
* comparable with a hash table, sometimes it outperforms a hash
|
||||
* function (most of the time can determine a miss faster than a hash).</p>
|
||||
*
|
||||
* <p>The main purpose of this java port is to serve as a base for
|
||||
* implementing TeX's hyphenation algorithm (see The TeXBook,
|
||||
* appendix H). Each language requires from 5000 to 15000 hyphenation
|
||||
* patterns which will be keys in this tree. The strings patterns
|
||||
* are usually small (from 2 to 5 characters), but each char in the
|
||||
* tree is stored in a node. Thus memory usage is the main concern.
|
||||
* We will sacrify 'elegance' to keep memory requirenments to the
|
||||
* minimum. Using java's char type as pointer (yes, I know pointer
|
||||
* it is a forbidden word in java) we can keep the size of the node
|
||||
* to be just 8 bytes (3 pointers and the data char). This gives
|
||||
* room for about 65000 nodes. In my tests the english patterns
|
||||
* took 7694 nodes and the german patterns 10055 nodes,
|
||||
* so I think we are safe.</p>
|
||||
*
|
||||
* <p>All said, this is a map with strings as keys and char as value.
|
||||
* Pretty limited!. It can be extended to a general map by
|
||||
* using the string representation of an object and using the
|
||||
* char value as an index to an array that contains the object
|
||||
* values.</p>
|
||||
*
|
||||
* @author cav@uniscope.co.jp
|
||||
*/
|
||||
|
||||
public class TernaryTree : ICloneable {
|
||||
|
||||
/**
|
||||
* We use 4 arrays to represent a node. I guess I should have created
|
||||
* a proper node class, but somehow Knuth's pascal code made me forget
|
||||
* we now have a portable language with memory management and
|
||||
* automatic garbage collection! And now is kind of late, furthermore,
|
||||
* if it ain't broken, don't fix it.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Pointer to low branch and to rest of the key when it is
|
||||
* stored directly in this node, we don't have unions in java!
|
||||
*/
|
||||
protected char[] lo;
|
||||
|
||||
/**
|
||||
* Pointer to high branch.
|
||||
*/
|
||||
protected char[] hi;
|
||||
|
||||
/**
|
||||
* Pointer to equal branch and to data when this node is a string terminator.
|
||||
*/
|
||||
protected char[] eq;
|
||||
|
||||
/**
|
||||
* <P>The character stored in this node: splitchar
|
||||
* Two special values are reserved:</P>
|
||||
* <ul><li>0x0000 as string terminator</li>
|
||||
* <li>0xFFFF to indicate that the branch starting at
|
||||
* this node is compressed</li></ul>
|
||||
* <p>This shouldn't be a problem if we give the usual semantics to
|
||||
* strings since 0xFFFF is garanteed not to be an Unicode character.</p>
|
||||
*/
|
||||
protected char[] sc;
|
||||
|
||||
/**
|
||||
* This vector holds the trailing of the keys when the branch is compressed.
|
||||
*/
|
||||
protected CharVector kv;
|
||||
|
||||
protected char root;
|
||||
protected char freenode;
|
||||
protected int length; // number of items in tree
|
||||
|
||||
protected static int BLOCK_SIZE = 2048; // allocation size for arrays
|
||||
|
||||
internal TernaryTree() {
|
||||
Init();
|
||||
}
|
||||
|
||||
protected void Init() {
|
||||
root = (char)0;
|
||||
freenode = (char)1;
|
||||
length = 0;
|
||||
lo = new char[BLOCK_SIZE];
|
||||
hi = new char[BLOCK_SIZE];
|
||||
eq = new char[BLOCK_SIZE];
|
||||
sc = new char[BLOCK_SIZE];
|
||||
kv = new CharVector();
|
||||
}
|
||||
|
||||
/**
|
||||
* Branches are initially compressed, needing
|
||||
* one node per key plus the size of the string
|
||||
* key. They are decompressed as needed when
|
||||
* another key with same prefix
|
||||
* is inserted. This saves a lot of space,
|
||||
* specially for long keys.
|
||||
*/
|
||||
public void Insert(string key, char val) {
|
||||
// make sure we have enough room in the arrays
|
||||
int len = key.Length
|
||||
+ 1; // maximum number of nodes that may be generated
|
||||
if (freenode + len > eq.Length)
|
||||
RedimNodeArrays(eq.Length + BLOCK_SIZE);
|
||||
char[] strkey = new char[len--];
|
||||
key.CopyTo(0, strkey, 0, len);
|
||||
strkey[len] = (char)0;
|
||||
root = Insert(root, strkey, 0, val);
|
||||
}
|
||||
|
||||
public void Insert(char[] key, int start, char val) {
|
||||
int len = Strlen(key) + 1;
|
||||
if (freenode + len > eq.Length)
|
||||
RedimNodeArrays(eq.Length + BLOCK_SIZE);
|
||||
root = Insert(root, key, start, val);
|
||||
}
|
||||
|
||||
/**
|
||||
* The actual insertion function, recursive version.
|
||||
*/
|
||||
private char Insert(char p, char[] key, int start, char val) {
|
||||
int len = Strlen(key, start);
|
||||
if (p == 0) {
|
||||
// this means there is no branch, this node will start a new branch.
|
||||
// Instead of doing that, we store the key somewhere else and create
|
||||
// only one node with a pointer to the key
|
||||
p = freenode++;
|
||||
eq[p] = val; // holds data
|
||||
length++;
|
||||
hi[p] = (char)0;
|
||||
if (len > 0) {
|
||||
sc[p] = (char)0xFFFF; // indicates branch is compressed
|
||||
lo[p] = (char)kv.Alloc(len
|
||||
+ 1); // use 'lo' to hold pointer to key
|
||||
Strcpy(kv.Arr, lo[p], key, start);
|
||||
} else {
|
||||
sc[p] = (char)0;
|
||||
lo[p] = (char)0;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
if (sc[p] == 0xFFFF) {
|
||||
// branch is compressed: need to decompress
|
||||
// this will generate garbage in the external key array
|
||||
// but we can do some garbage collection later
|
||||
char pp = freenode++;
|
||||
lo[pp] = lo[p]; // previous pointer to key
|
||||
eq[pp] = eq[p]; // previous pointer to data
|
||||
lo[p] = (char)0;
|
||||
if (len > 0) {
|
||||
sc[p] = kv[lo[pp]];
|
||||
eq[p] = pp;
|
||||
lo[pp]++;
|
||||
if (kv[lo[pp]] == 0) {
|
||||
// key completly decompressed leaving garbage in key array
|
||||
lo[pp] = (char)0;
|
||||
sc[pp] = (char)0;
|
||||
hi[pp] = (char)0;
|
||||
} else
|
||||
sc[pp] =
|
||||
(char)0xFFFF; // we only got first char of key, rest is still there
|
||||
} else {
|
||||
// In this case we can save a node by swapping the new node
|
||||
// with the compressed node
|
||||
sc[pp] = (char)0xFFFF;
|
||||
hi[p] = pp;
|
||||
sc[p] = (char)0;
|
||||
eq[p] = val;
|
||||
length++;
|
||||
return p;
|
||||
}
|
||||
}
|
||||
char s = key[start];
|
||||
if (s < sc[p])
|
||||
lo[p] = Insert(lo[p], key, start, val);
|
||||
else if (s == sc[p]) {
|
||||
if (s != 0)
|
||||
eq[p] = Insert(eq[p], key, start + 1, val);
|
||||
else {
|
||||
// key already in tree, overwrite data
|
||||
eq[p] = val;
|
||||
}
|
||||
|
||||
} else
|
||||
hi[p] = Insert(hi[p], key, start, val);
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares 2 null terminated char arrays
|
||||
*/
|
||||
public static int Strcmp(char[] a, int startA, char[] b, int startB) {
|
||||
for (; a[startA] == b[startB]; startA++, startB++)
|
||||
if (a[startA] == 0)
|
||||
return 0;
|
||||
return a[startA] - b[startB];
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares a string with null terminated char array
|
||||
*/
|
||||
public static int Strcmp(string str, char[] a, int start) {
|
||||
int i, d, len = str.Length;
|
||||
for (i = 0; i < len; i++) {
|
||||
d = (int)str[i] - a[start + i];
|
||||
if (d != 0)
|
||||
return d;
|
||||
if (a[start + i] == 0)
|
||||
return d;
|
||||
}
|
||||
if (a[start + i] != 0)
|
||||
return (int)-a[start + i];
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
public static void Strcpy(char[] dst, int di, char[] src, int si) {
|
||||
while (src[si] != 0)
|
||||
dst[di++] = src[si++];
|
||||
dst[di] = (char)0;
|
||||
}
|
||||
|
||||
public static int Strlen(char[] a, int start) {
|
||||
int len = 0;
|
||||
for (int i = start; i < a.Length && a[i] != 0; i++)
|
||||
len++;
|
||||
return len;
|
||||
}
|
||||
|
||||
public static int Strlen(char[] a) {
|
||||
return Strlen(a, 0);
|
||||
}
|
||||
|
||||
public int Find(string key) {
|
||||
int len = key.Length;
|
||||
char[] strkey = new char[len + 1];
|
||||
key.CopyTo(0, strkey, 0, len);
|
||||
strkey[len] = (char)0;
|
||||
|
||||
return Find(strkey, 0);
|
||||
}
|
||||
|
||||
public int Find(char[] key, int start) {
|
||||
int d;
|
||||
char p = root;
|
||||
int i = start;
|
||||
char c;
|
||||
|
||||
while (p != 0) {
|
||||
if (sc[p] == 0xFFFF) {
|
||||
if (Strcmp(key, i, kv.Arr, lo[p]) == 0)
|
||||
return eq[p];
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
c = key[i];
|
||||
d = c - sc[p];
|
||||
if (d == 0) {
|
||||
if (c == 0)
|
||||
return eq[p];
|
||||
i++;
|
||||
p = eq[p];
|
||||
} else if (d < 0)
|
||||
p = lo[p];
|
||||
else
|
||||
p = hi[p];
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
public bool Knows(string key) {
|
||||
return (Find(key) >= 0);
|
||||
}
|
||||
|
||||
// redimension the arrays
|
||||
private void RedimNodeArrays(int newsize) {
|
||||
int len = newsize < lo.Length ? newsize : lo.Length;
|
||||
char[] na = new char[newsize];
|
||||
Array.Copy(lo, 0, na, 0, len);
|
||||
lo = na;
|
||||
na = new char[newsize];
|
||||
Array.Copy(hi, 0, na, 0, len);
|
||||
hi = na;
|
||||
na = new char[newsize];
|
||||
Array.Copy(eq, 0, na, 0, len);
|
||||
eq = na;
|
||||
na = new char[newsize];
|
||||
Array.Copy(sc, 0, na, 0, len);
|
||||
sc = na;
|
||||
}
|
||||
|
||||
public int Size {
|
||||
get {
|
||||
return length;
|
||||
}
|
||||
}
|
||||
|
||||
public Object Clone() {
|
||||
TernaryTree t = new TernaryTree();
|
||||
t.lo = (char[])this.lo.Clone();
|
||||
t.hi = (char[])this.hi.Clone();
|
||||
t.eq = (char[])this.eq.Clone();
|
||||
t.sc = (char[])this.sc.Clone();
|
||||
t.kv = (CharVector)this.kv.Clone();
|
||||
t.root = this.root;
|
||||
t.freenode = this.freenode;
|
||||
t.length = this.length;
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively insert the median first and then the median of the
|
||||
* lower and upper halves, and so on in order to get a balanced
|
||||
* tree. The array of keys is assumed to be sorted in ascending
|
||||
* order.
|
||||
*/
|
||||
protected void InsertBalanced(string[] k, char[] v, int offset, int n) {
|
||||
int m;
|
||||
if (n < 1)
|
||||
return;
|
||||
m = n >> 1;
|
||||
|
||||
Insert(k[m + offset], v[m + offset]);
|
||||
InsertBalanced(k, v, offset, m);
|
||||
|
||||
InsertBalanced(k, v, offset + m + 1, n - m - 1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Balance the tree for best search performance
|
||||
*/
|
||||
public void Balance() {
|
||||
// System.out.Print("Before root splitchar = "); System.out.Println(sc[root]);
|
||||
|
||||
int i = 0, n = length;
|
||||
string[] k = new string[n];
|
||||
char[] v = new char[n];
|
||||
Iterator iter = new Iterator(this);
|
||||
while (iter.HasMoreElements()) {
|
||||
v[i] = iter.Value;
|
||||
k[i++] = (string)iter.NextElement();
|
||||
}
|
||||
Init();
|
||||
InsertBalanced(k, v, 0, n);
|
||||
|
||||
// With uniform letter distribution sc[root] should be around 'm'
|
||||
// System.out.Print("After root splitchar = "); System.out.Println(sc[root]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Each node stores a character (splitchar) which is part of
|
||||
* some Key(s). In a compressed branch (one that only contain
|
||||
* a single string key) the trailer of the key which is not
|
||||
* already in nodes is stored externally in the kv array.
|
||||
* As items are inserted, key substrings decrease.
|
||||
* Some substrings may completely disappear when the whole
|
||||
* branch is totally decompressed.
|
||||
* The tree is traversed to find the key substrings actually
|
||||
* used. In addition, duplicate substrings are removed using
|
||||
* a map (implemented with a TernaryTree!).
|
||||
*
|
||||
*/
|
||||
public void TrimToSize() {
|
||||
// first balance the tree for best performance
|
||||
Balance();
|
||||
|
||||
// redimension the node arrays
|
||||
RedimNodeArrays(freenode);
|
||||
|
||||
// ok, compact kv array
|
||||
CharVector kx = new CharVector();
|
||||
kx.Alloc(1);
|
||||
TernaryTree map = new TernaryTree();
|
||||
Compact(kx, map, root);
|
||||
kv = kx;
|
||||
kv.TrimToSize();
|
||||
}
|
||||
|
||||
private void Compact(CharVector kx, TernaryTree map, char p) {
|
||||
int k;
|
||||
if (p == 0)
|
||||
return;
|
||||
if (sc[p] == 0xFFFF) {
|
||||
k = map.Find(kv.Arr, lo[p]);
|
||||
if (k < 0) {
|
||||
k = kx.Alloc(Strlen(kv.Arr, lo[p]) + 1);
|
||||
Strcpy(kx.Arr, k, kv.Arr, lo[p]);
|
||||
map.Insert(kx.Arr, k, (char)k);
|
||||
}
|
||||
lo[p] = (char)k;
|
||||
} else {
|
||||
Compact(kx, map, lo[p]);
|
||||
if (sc[p] != 0)
|
||||
Compact(kx, map, eq[p]);
|
||||
Compact(kx, map, hi[p]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public Iterator Keys {
|
||||
get {
|
||||
return new Iterator(this);
|
||||
}
|
||||
}
|
||||
|
||||
public class Iterator {
|
||||
|
||||
/**
|
||||
* current node index
|
||||
*/
|
||||
int cur;
|
||||
|
||||
/**
|
||||
* current key
|
||||
*/
|
||||
string curkey;
|
||||
|
||||
/**
|
||||
* TernaryTree parent
|
||||
*/
|
||||
TernaryTree parent;
|
||||
|
||||
private class Item : ICloneable {
|
||||
internal char parent;
|
||||
internal char child;
|
||||
|
||||
public Item() {
|
||||
parent = (char)0;
|
||||
child = (char)0;
|
||||
}
|
||||
|
||||
public Item(char p, char c) {
|
||||
parent = p;
|
||||
child = c;
|
||||
}
|
||||
|
||||
public Object Clone() {
|
||||
return new Item(parent, child);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Node stack
|
||||
*/
|
||||
Stack ns;
|
||||
|
||||
/**
|
||||
* key stack implemented with a StringBuilder
|
||||
*/
|
||||
StringBuilder ks;
|
||||
|
||||
public Iterator(TernaryTree parent) {
|
||||
this.parent = parent;
|
||||
cur = -1;
|
||||
ns = new Stack();
|
||||
ks = new StringBuilder();
|
||||
Rewind();
|
||||
}
|
||||
|
||||
public void Rewind() {
|
||||
ns.Clear();
|
||||
ks.Length = 0;
|
||||
cur = parent.root;
|
||||
Run();
|
||||
}
|
||||
|
||||
public Object NextElement() {
|
||||
string res = curkey;
|
||||
cur = Up();
|
||||
Run();
|
||||
return res;
|
||||
}
|
||||
|
||||
public char Value {
|
||||
get {
|
||||
if (cur >= 0)
|
||||
return this.parent.eq[cur];
|
||||
return (char)0;
|
||||
}
|
||||
}
|
||||
|
||||
public bool HasMoreElements() {
|
||||
return (cur != -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* traverse upwards
|
||||
*/
|
||||
private int Up() {
|
||||
Item i = new Item();
|
||||
int res = 0;
|
||||
|
||||
if (ns.Count == 0)
|
||||
return -1;
|
||||
|
||||
if (cur != 0 && parent.sc[cur] == 0)
|
||||
return parent.lo[cur];
|
||||
|
||||
bool climb = true;
|
||||
|
||||
while (climb) {
|
||||
i = (Item)ns.Pop();
|
||||
i.child++;
|
||||
switch (i.child) {
|
||||
case (char)1:
|
||||
if (parent.sc[i.parent] != 0) {
|
||||
res = parent.eq[i.parent];
|
||||
ns.Push(i.Clone());
|
||||
ks.Append(parent.sc[i.parent]);
|
||||
} else {
|
||||
i.child++;
|
||||
ns.Push(i.Clone());
|
||||
res = parent.hi[i.parent];
|
||||
}
|
||||
climb = false;
|
||||
break;
|
||||
|
||||
case (char)2:
|
||||
res = parent.hi[i.parent];
|
||||
ns.Push(i.Clone());
|
||||
if (ks.Length > 0)
|
||||
ks.Length = ks.Length - 1; // pop
|
||||
climb = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
if (ns.Count == 0)
|
||||
return -1;
|
||||
climb = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* traverse the tree to find next key
|
||||
*/
|
||||
private int Run() {
|
||||
if (cur == -1)
|
||||
return -1;
|
||||
|
||||
bool leaf = false;
|
||||
for (; ; ) {
|
||||
// first go down on low branch until leaf or compressed branch
|
||||
while (cur != 0) {
|
||||
if (parent.sc[cur] == 0xFFFF) {
|
||||
leaf = true;
|
||||
break;
|
||||
}
|
||||
ns.Push(new Item((char)cur, '\u0000'));
|
||||
if (parent.sc[cur] == 0) {
|
||||
leaf = true;
|
||||
break;
|
||||
}
|
||||
cur = parent.lo[cur];
|
||||
}
|
||||
if (leaf)
|
||||
break;
|
||||
// nothing found, go up one node and try again
|
||||
cur = Up();
|
||||
if (cur == -1) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
// The current node should be a data node and
|
||||
// the key should be in the key stack (at least partially)
|
||||
StringBuilder buf = new StringBuilder(ks.ToString());
|
||||
if (parent.sc[cur] == 0xFFFF) {
|
||||
int p = parent.lo[cur];
|
||||
while (parent.kv[p] != 0)
|
||||
buf.Append(parent.kv[p++]);
|
||||
}
|
||||
curkey = buf.ToString();
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public virtual void PrintStats() {
|
||||
Console.Error.WriteLine("Number of keys = " + length.ToString());
|
||||
Console.Error.WriteLine("Node count = " + freenode.ToString());
|
||||
// Console.Error.WriteLine("Array length = " + int.ToString(eq.Length));
|
||||
Console.Error.WriteLine("Key Array length = "
|
||||
+ kv.Length.ToString());
|
||||
|
||||
/*
|
||||
* for (int i=0; i<kv.Length; i++)
|
||||
* if ( kv[i] != 0 )
|
||||
* System.out.Print(kv[i]);
|
||||
* else
|
||||
* System.out.Println("");
|
||||
* System.out.Println("Keys:");
|
||||
* for (Enumeration enum = Keys(); enum.HasMoreElements(); )
|
||||
* System.out.Println(enum.NextElement());
|
||||
*/
|
||||
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user