3rdPartyLibraries/iTechSharp/iTextSharp/text/pdf/ArabicLigaturizer.cs

using System;
using System.Text;
/*
 * Copyright 2003 by Paulo Soares.
 *
 * The contents of this file are subject to the Mozilla Public License Version 1.1
 * (the "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the License.
 *
 * The Original Code is 'iText, a free JAVA-PDF library'.
 *
 * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
 * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
 * All Rights Reserved.
 * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
 * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
 *
 * Contributor(s): all the names of the contributors are added in the source code
 * where applicable.
 *
 * Alternatively, the contents of this file may be used under the terms of the
 * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
 * provisions of LGPL are applicable instead of those above.  If you wish to
 * allow use of your version of this file only under the terms of the LGPL
 * License and not to allow others to use your version of this file under
 * the MPL, indicate your decision by deleting the provisions above and
 * replace them with the notice and other provisions required by the LGPL.
 * If you do not delete the provisions above, a recipient may use your version
 * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the MPL as stated above or under the terms of the GNU
 * Library General Public License as published by the Free Software Foundation;
 * either version 2 of the License, or any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
 * details.
 *
 * If you didn't download this code from the following link, you should check if
 * you aren't using an obsolete version:
 * http://www.lowagie.com/iText/
 */

namespace iTextSharp.text.pdf {

    /**
    * Shape arabic characters. This code was inspired by an LGPL'ed C library:
    * Pango ( see http://www.pango.com/ ). Note that the code of this is the
    * original work of Paulo Soares. Hence it is perfectly justifiable to distribute
    * it under the MPL.
    *
    * @author Paulo Soares (psoares@consiste.pt)
    */
    public class ArabicLigaturizer {

        static bool IsVowel(char s) {
            return ((s >= '\u064B') && (s <= '\u0655')) || (s == '\u0670');
        }

        static char Charshape(char s, int which)
        /* which 0=isolated 1=final 2=initial 3=medial */
        {
            int l, r, m;
            if ((s >= '\u0621') && (s <= '\u06D3')) {
                l = 0;
                r = chartable.Length - 1;
                while (l <= r) {
                    m = (l + r) / 2;
                    if (s == chartable[m][0]) {
                        return chartable[m][which + 1];
                    }
                    else if (s < chartable[m][0]) {
                        r = m - 1;
                    }
                    else {
                        l = m + 1;
                    }
                }
            }
            else if (s >= '\ufef5' && s <= '\ufefb')
                return (char)(s + which);
            return s;
        }

        static int Shapecount(char s) {
            int l, r, m;
            if ((s >= '\u0621') && (s <= '\u06D3') && !IsVowel(s)) {
                l = 0;
                r = chartable.Length - 1;
                while (l <= r) {
                    m = (l + r) / 2;
                    if (s == chartable[m][0]) {
                        return chartable[m].Length - 1;
                    }
                    else if (s < chartable[m][0]) {
                        r = m - 1;
                    }
                    else {
                        l = m + 1;
                    }
                }
            }
            else if (s == ZWJ) {
                return 4;
            }
            return 1;
        }

        static int Ligature(char newchar, Charstruct oldchar) {
        /* 0 == no ligature possible; 1 == vowel; 2 == two chars; 3 == Lam+Alef */
            int retval = 0;

            if (oldchar.basechar == 0)
                return 0;
            if (IsVowel(newchar)) {
                retval = 1;
                if ((oldchar.vowel != 0) && (newchar != SHADDA)) {
                    retval = 2;           /* we eliminate the old vowel .. */
                }
                switch (newchar) {
                    case SHADDA:
                        if (oldchar.mark1 == 0) {
                            oldchar.mark1 = SHADDA;
                        }
                        else {
                            return 0;         /* no ligature possible */
                        }
                        break;
                    case HAMZABELOW:
                        switch (oldchar.basechar) {
                            case ALEF:
                                oldchar.basechar = ALEFHAMZABELOW;
                                retval = 2;
                                break;
                            case LAM_ALEF:
                                oldchar.basechar = LAM_ALEFHAMZABELOW;
                                retval = 2;
                                break;
                            default:
                                oldchar.mark1 = HAMZABELOW;
                                break;
                        }
                        break;
                    case HAMZAABOVE:
                        switch (oldchar.basechar) {
                            case ALEF:
                                oldchar.basechar = ALEFHAMZA;
                                retval = 2;
                                break;
                            case LAM_ALEF:
                                oldchar.basechar = LAM_ALEFHAMZA;
                                retval = 2;
                                break;
                            case WAW:
                                oldchar.basechar = WAWHAMZA;
                                retval = 2;
                                break;
                            case YEH:
                            case ALEFMAKSURA:
                            case FARSIYEH:
                                oldchar.basechar = YEHHAMZA;
                                retval = 2;
                                break;
                            default:           /* whatever sense this may make .. */
                                oldchar.mark1 = HAMZAABOVE;
                                break;
                        }
                        break;
                    case MADDA:
                        switch (oldchar.basechar) {
                            case ALEF:
                                oldchar.basechar = ALEFMADDA;
                                retval = 2;
                                break;
                        }
                        break;
                    default:
                        oldchar.vowel = newchar;
                        break;
                }
                if (retval == 1) {
                    oldchar.lignum++;
                }
                return retval;
            }
            if (oldchar.vowel != 0) {  /* if we already joined a vowel, we can't join a Hamza */
                return 0;
            }

            switch (oldchar.basechar) {
                case LAM:
                    switch (newchar) {
                        case ALEF:
                            oldchar.basechar = LAM_ALEF;
                            oldchar.numshapes = 2;
                            retval = 3;
                            break;
                        case ALEFHAMZA:
                            oldchar.basechar = LAM_ALEFHAMZA;
                            oldchar.numshapes = 2;
                            retval = 3;
                            break;
                        case ALEFHAMZABELOW:
                            oldchar.basechar = LAM_ALEFHAMZABELOW;
                            oldchar.numshapes = 2;
                            retval = 3;
                            break;
                        case ALEFMADDA:
                            oldchar.basechar = LAM_ALEFMADDA;
                            oldchar.numshapes = 2;
                            retval = 3;
                            break;
                    }
                    break;
                case (char)0:
                    oldchar.basechar = newchar;
                    oldchar.numshapes = Shapecount(newchar);
                    retval = 1;
                    break;
            }
            return retval;
        }

        static void Copycstostring(StringBuilder str, Charstruct s, int level) {
        /* s is a shaped charstruct; i is the index into the string */
            if (s.basechar == 0)
                return;

            str.Append(s.basechar);
            s.lignum--;
            if (s.mark1 != 0) {
                if ((level & ar_novowel) == 0) {
                    str.Append(s.mark1);
                    s.lignum--;
                }
                else {
                    s.lignum--;
                }
            }
            if (s.vowel != 0) {
                if ((level & ar_novowel) == 0) {
                    str.Append(s.vowel);
                    s.lignum--;
                }
                else {                       /* vowel elimination */
                    s.lignum--;
                }
            }
        }

        // return len
        internal static void Doublelig(StringBuilder str, int level)
        /* Ok. We have presentation ligatures in our font. */
        {
            int len;
            int olen = len = str.Length;
            int j = 0, si = 1;
            char lapresult;

            while (si < olen) {
                lapresult = (char)0;
                if ((level & ar_composedtashkeel) != 0) {
                    switch (str[j]) {
                        case SHADDA:
                            switch (str[si]) {
                                case KASRA:
                                    lapresult = '\uFC62';
                                    break;
                                case FATHA:
                                    lapresult = '\uFC60';
                                    break;
                                case DAMMA:
                                    lapresult = '\uFC61';
                                    break;
                                case '\u064C':
                                    lapresult = '\uFC5E';
                                    break;
                                case '\u064D':
                                    lapresult = '\uFC5F';
                                    break;
                            }
                            break;
                        case KASRA:
                            if (str[si] == SHADDA)
                                lapresult = '\uFC62';
                            break;
                        case FATHA:
                            if (str[si] == SHADDA)
                                lapresult = '\uFC60';
                            break;
                        case DAMMA:
                            if (str[si] == SHADDA)
                                lapresult = '\uFC61';
                            break;
                    }
                }

                if ((level & ar_lig) != 0) {
                    switch (str[j]) {
                        case '\uFEDF':       /* LAM initial */
                            switch (str[si]) {
                                case '\uFE9E':
                                    lapresult = '\uFC3F';
                                    break;        /* JEEM final */
                                case '\uFEA0':
                                    lapresult = '\uFCC9';
                                    break;        /* JEEM medial */
                                case '\uFEA2':
                                    lapresult = '\uFC40';
                                    break;        /* HAH final */
                                case '\uFEA4':
                                    lapresult = '\uFCCA';
                                    break;        /* HAH medial */
                                case '\uFEA6':
                                    lapresult = '\uFC41';
                                    break;        /* KHAH final */
                                case '\uFEA8':
                                    lapresult = '\uFCCB';
                                    break;        /* KHAH medial */
                                case '\uFEE2':
                                    lapresult = '\uFC42';
                                    break;        /* MEEM final */
                                case '\uFEE4':
                                    lapresult = '\uFCCC';
                                    break;        /* MEEM medial */
                            }
                            break;
                        case '\uFE97':       /* TEH inital */
                            switch (str[si]) {
                                case '\uFEA0':
                                    lapresult = '\uFCA1';
                                    break;        /* JEEM medial */
                                case '\uFEA4':
                                    lapresult = '\uFCA2';
                                    break;        /* HAH medial */
                                case '\uFEA8':
                                    lapresult = '\uFCA3';
                                    break;        /* KHAH medial */
                            }
                            break;
                        case '\uFE91':       /* BEH inital */
                            switch (str[si]) {
                                case '\uFEA0':
                                    lapresult = '\uFC9C';
                                    break;        /* JEEM medial */
                                case '\uFEA4':
                                    lapresult = '\uFC9D';
                                    break;        /* HAH medial */
                                case '\uFEA8':
                                    lapresult = '\uFC9E';
                                    break;        /* KHAH medial */
                            }
                            break;
                        case '\uFEE7':       /* NOON inital */
                            switch (str[si]) {
                                case '\uFEA0':
                                    lapresult = '\uFCD2';
                                    break;        /* JEEM initial */
                                case '\uFEA4':
                                    lapresult = '\uFCD3';
                                    break;        /* HAH medial */
                                case '\uFEA8':
                                    lapresult = '\uFCD4';
                                    break;        /* KHAH medial */
                            }
                            break;

                        case '\uFEE8':       /* NOON medial */
                            switch (str[si]) {
                                case '\uFEAE':
                                    lapresult = '\uFC8A';
                                    break;        /* REH final  */
                                case '\uFEB0':
                                    lapresult = '\uFC8B';
                                    break;        /* ZAIN final */
                            }
                            break;
                        case '\uFEE3':       /* MEEM initial */
                            switch (str[si]) {
                                case '\uFEA0':
                                    lapresult = '\uFCCE';
                                    break;        /* JEEM medial */
                                case '\uFEA4':
                                    lapresult = '\uFCCF';
                                    break;        /* HAH medial */
                                case '\uFEA8':
                                    lapresult = '\uFCD0';
                                    break;        /* KHAH medial */
                                case '\uFEE4':
                                    lapresult = '\uFCD1';
                                    break;        /* MEEM medial */
                            }
                            break;

                        case '\uFED3':       /* FEH initial */
                            switch (str[si]) {
                                case '\uFEF2':
                                    lapresult = '\uFC32';
                                    break;        /* YEH final */
                            }
                            break;

                        default:
                            break;
                    }                   /* end switch string[si] */
                }
                if (lapresult != 0) {
                    str[j] = lapresult;
                    len--;
                    si++;                 /* jump over one character */
                    /* we'll have to change this, too. */
                }
                else {
                    j++;
                    str[j] = str[si];
                    si++;
                }
            }
            str.Length = len;
        }

        static bool Connects_to_left(Charstruct a) {
            return a.numshapes > 2;
        }

        internal static void Shape(char[] text, StringBuilder str, int level) {
    /* string is assumed to be empty and big enough.
    * text is the original text.
    * This routine does the basic arabic reshaping.
    * *len the number of non-null characters.
    *
    * Note: We have to unshape each character first!
    */
            int join;
            int which;
            char nextletter;

            int p = 0;                     /* initialize for output */
            Charstruct oldchar = new Charstruct();
            Charstruct curchar = new Charstruct();
            while (p < text.Length) {
                nextletter = text[p++];
                //nextletter = unshape (nextletter);

                join = Ligature(nextletter, curchar);
                if (join == 0) {                       /* shape curchar */
                    int nc = Shapecount(nextletter);
                    //(*len)++;
                    if (nc == 1) {
                        which = 0;        /* final or isolated */
                    }
                    else {
                        which = 2;        /* medial or initial */
                    }
                    if (Connects_to_left(oldchar)) {
                        which++;
                    }

                    which = which % (curchar.numshapes);
                    curchar.basechar = Charshape(curchar.basechar, which);

                    /* get rid of oldchar */
                    Copycstostring(str, oldchar, level);
                    oldchar = curchar;    /* new values in oldchar */

                    /* init new curchar */
                    curchar = new Charstruct();
                    curchar.basechar = nextletter;
                    curchar.numshapes = nc;
                    curchar.lignum++;
                    //          (*len) += unligature (&curchar, level);
                }
                else if (join == 1) {
                }
                //      else
                //        {
                //          (*len) += unligature (&curchar, level);
                //        }
                //      p = g_utf8_next_char (p);
            }

            /* Handle last char */
            if (Connects_to_left(oldchar))
                which = 1;
            else
                which = 0;
            which = which % (curchar.numshapes);
            curchar.basechar = Charshape(curchar.basechar, which);

            /* get rid of oldchar */
            Copycstostring(str, oldchar, level);
            Copycstostring(str, curchar, level);
        }

        internal static int Arabic_shape(char[] src, int srcoffset, int srclength, char[] dest, int destoffset, int destlength, int level) {
            char[] str = new char[srclength];
            for (int k = srclength + srcoffset - 1; k >= srcoffset; --k)
                str[k - srcoffset] = src[k];
            StringBuilder str2 = new StringBuilder(srclength);
            Shape(str, str2, level);
            if ((level & (ar_composedtashkeel | ar_lig)) != 0)
                Doublelig(str2, level);
    //        string.Reverse();
            System.Array.Copy(str2.ToString().ToCharArray(), 0, dest, destoffset, str2.Length);
            return str2.Length;
        }

        internal static void ProcessNumbers(char[] text, int offset, int length, int options) {
            int limit = offset + length;
            if ((options & DIGITS_MASK) != 0) {
                char digitBase = '\u0030'; // European digits
                switch (options & DIGIT_TYPE_MASK) {
                    case DIGIT_TYPE_AN:
                        digitBase = '\u0660';  // Arabic-Indic digits
                        break;

                    case DIGIT_TYPE_AN_EXTENDED:
                        digitBase = '\u06f0';  // Eastern Arabic-Indic digits (Persian and Urdu)
                        break;

                    default:
                        break;
                }

                switch (options & DIGITS_MASK) {
                    case DIGITS_EN2AN: {
                        int digitDelta = digitBase - '\u0030';
                        for (int i = offset; i < limit; ++i) {
                            char ch = text[i];
                            if (ch <= '\u0039' && ch >= '\u0030') {
                                text[i] += (char)digitDelta;
                            }
                        }
                    }
                    break;

                    case DIGITS_AN2EN: {
                        char digitTop = (char)(digitBase + 9);
                        int digitDelta = '\u0030' - digitBase;
                        for (int i = offset; i < limit; ++i) {
                            char ch = text[i];
                            if (ch <= digitTop && ch >= digitBase) {
                                text[i] += (char)digitDelta;
                            }
                        }
                    }
                    break;

                    case DIGITS_EN2AN_INIT_LR:
                        ShapeToArabicDigitsWithContext(text, 0, length, digitBase, false);
                        break;

                    case DIGITS_EN2AN_INIT_AL:
                        ShapeToArabicDigitsWithContext(text, 0, length, digitBase, true);
                        break;

                    default:
                        break;
                }
            }
        }

        internal static void ShapeToArabicDigitsWithContext(char[] dest, int start, int length, char digitBase,  bool lastStrongWasAL) {
            digitBase -= '0'; // move common adjustment out of loop

            int limit = start + length;
            for (int i = start; i < limit; ++i) {
                char ch = dest[i];
                switch (BidiOrder.GetDirection(ch)) {
                case BidiOrder.L:
                case BidiOrder.R:
                    lastStrongWasAL = false;
                    break;
                case BidiOrder.AL:
                    lastStrongWasAL = true;
                    break;
                case BidiOrder.EN:
                    if (lastStrongWasAL && ch <= '\u0039') {
                        dest[i] = (char)(ch + digitBase);
                    }
                    break;
                default:
                    break;
                }
            }
        }

        private const char ALEF = '\u0627';
        private const char ALEFHAMZA = '\u0623';
        private const char ALEFHAMZABELOW = '\u0625';
        private const char ALEFMADDA = '\u0622';
        private const char LAM = '\u0644';
        private const char HAMZA = '\u0621';
        private const char TATWEEL = '\u0640';
        private const char ZWJ = '\u200D';

        private const char HAMZAABOVE = '\u0654';
        private const char HAMZABELOW = '\u0655';

        private const char WAWHAMZA = '\u0624';
        private const char YEHHAMZA = '\u0626';
        private const char WAW = '\u0648';
        private const char ALEFMAKSURA = '\u0649';
        private const char YEH = '\u064A';
        private const char FARSIYEH = '\u06CC';

        private const char SHADDA = '\u0651';
        private const char KASRA = '\u0650';
        private const char FATHA = '\u064E';
        private const char DAMMA = '\u064F';
        private const char MADDA = '\u0653';

        private const char LAM_ALEF = '\uFEFB';
        private const char LAM_ALEFHAMZA = '\uFEF7';
        private const char LAM_ALEFHAMZABELOW = '\uFEF9';
        private const char LAM_ALEFMADDA = '\uFEF5';

        private static char[][] chartable = {
            new char[]{'\u0621', '\uFE80'}, /* HAMZA */
            new char[]{'\u0622', '\uFE81', '\uFE82'}, /* ALEF WITH MADDA ABOVE */
            new char[]{'\u0623', '\uFE83', '\uFE84'}, /* ALEF WITH HAMZA ABOVE */
            new char[]{'\u0624', '\uFE85', '\uFE86'}, /* WAW WITH HAMZA ABOVE */
            new char[]{'\u0625', '\uFE87', '\uFE88'}, /* ALEF WITH HAMZA BELOW */
            new char[]{'\u0626', '\uFE89', '\uFE8A', '\uFE8B', '\uFE8C'}, /* YEH WITH HAMZA ABOVE */
            new char[]{'\u0627', '\uFE8D', '\uFE8E'}, /* ALEF */
            new char[]{'\u0628', '\uFE8F', '\uFE90', '\uFE91', '\uFE92'}, /* BEH */
            new char[]{'\u0629', '\uFE93', '\uFE94'}, /* TEH MARBUTA */
            new char[]{'\u062A', '\uFE95', '\uFE96', '\uFE97', '\uFE98'}, /* TEH */
            new char[]{'\u062B', '\uFE99', '\uFE9A', '\uFE9B', '\uFE9C'}, /* THEH */
            new char[]{'\u062C', '\uFE9D', '\uFE9E', '\uFE9F', '\uFEA0'}, /* JEEM */
            new char[]{'\u062D', '\uFEA1', '\uFEA2', '\uFEA3', '\uFEA4'}, /* HAH */
            new char[]{'\u062E', '\uFEA5', '\uFEA6', '\uFEA7', '\uFEA8'}, /* KHAH */
            new char[]{'\u062F', '\uFEA9', '\uFEAA'}, /* DAL */
            new char[]{'\u0630', '\uFEAB', '\uFEAC'}, /* THAL */
            new char[]{'\u0631', '\uFEAD', '\uFEAE'}, /* REH */
            new char[]{'\u0632', '\uFEAF', '\uFEB0'}, /* ZAIN */
            new char[]{'\u0633', '\uFEB1', '\uFEB2', '\uFEB3', '\uFEB4'}, /* SEEN */
            new char[]{'\u0634', '\uFEB5', '\uFEB6', '\uFEB7', '\uFEB8'}, /* SHEEN */
            new char[]{'\u0635', '\uFEB9', '\uFEBA', '\uFEBB', '\uFEBC'}, /* SAD */
            new char[]{'\u0636', '\uFEBD', '\uFEBE', '\uFEBF', '\uFEC0'}, /* DAD */
            new char[]{'\u0637', '\uFEC1', '\uFEC2', '\uFEC3', '\uFEC4'}, /* TAH */
            new char[]{'\u0638', '\uFEC5', '\uFEC6', '\uFEC7', '\uFEC8'}, /* ZAH */
            new char[]{'\u0639', '\uFEC9', '\uFECA', '\uFECB', '\uFECC'}, /* AIN */
            new char[]{'\u063A', '\uFECD', '\uFECE', '\uFECF', '\uFED0'}, /* GHAIN */
            new char[]{'\u0640', '\u0640', '\u0640', '\u0640', '\u0640'}, /* TATWEEL */
            new char[]{'\u0641', '\uFED1', '\uFED2', '\uFED3', '\uFED4'}, /* FEH */
            new char[]{'\u0642', '\uFED5', '\uFED6', '\uFED7', '\uFED8'}, /* QAF */
            new char[]{'\u0643', '\uFED9', '\uFEDA', '\uFEDB', '\uFEDC'}, /* KAF */
            new char[]{'\u0644', '\uFEDD', '\uFEDE', '\uFEDF', '\uFEE0'}, /* LAM */
            new char[]{'\u0645', '\uFEE1', '\uFEE2', '\uFEE3', '\uFEE4'}, /* MEEM */
            new char[]{'\u0646', '\uFEE5', '\uFEE6', '\uFEE7', '\uFEE8'}, /* NOON */
            new char[]{'\u0647', '\uFEE9', '\uFEEA', '\uFEEB', '\uFEEC'}, /* HEH */
            new char[]{'\u0648', '\uFEED', '\uFEEE'}, /* WAW */
            new char[]{'\u0649', '\uFEEF', '\uFEF0', '\uFBE8', '\uFBE9'}, /* ALEF MAKSURA */
            new char[]{'\u064A', '\uFEF1', '\uFEF2', '\uFEF3', '\uFEF4'}, /* YEH */
            new char[]{'\u0671', '\uFB50', '\uFB51'}, /* ALEF WASLA */
            new char[]{'\u0679', '\uFB66', '\uFB67', '\uFB68', '\uFB69'}, /* TTEH */
            new char[]{'\u067A', '\uFB5E', '\uFB5F', '\uFB60', '\uFB61'}, /* TTEHEH */
            new char[]{'\u067B', '\uFB52', '\uFB53', '\uFB54', '\uFB55'}, /* BEEH */
            new char[]{'\u067E', '\uFB56', '\uFB57', '\uFB58', '\uFB59'}, /* PEH */
            new char[]{'\u067F', '\uFB62', '\uFB63', '\uFB64', '\uFB65'}, /* TEHEH */
            new char[]{'\u0680', '\uFB5A', '\uFB5B', '\uFB5C', '\uFB5D'}, /* BEHEH */
            new char[]{'\u0683', '\uFB76', '\uFB77', '\uFB78', '\uFB79'}, /* NYEH */
            new char[]{'\u0684', '\uFB72', '\uFB73', '\uFB74', '\uFB75'}, /* DYEH */
            new char[]{'\u0686', '\uFB7A', '\uFB7B', '\uFB7C', '\uFB7D'}, /* TCHEH */
            new char[]{'\u0687', '\uFB7E', '\uFB7F', '\uFB80', '\uFB81'}, /* TCHEHEH */
            new char[]{'\u0688', '\uFB88', '\uFB89'}, /* DDAL */
            new char[]{'\u068C', '\uFB84', '\uFB85'}, /* DAHAL */
            new char[]{'\u068D', '\uFB82', '\uFB83'}, /* DDAHAL */
            new char[]{'\u068E', '\uFB86', '\uFB87'}, /* DUL */
            new char[]{'\u0691', '\uFB8C', '\uFB8D'}, /* RREH */
            new char[]{'\u0698', '\uFB8A', '\uFB8B'}, /* JEH */
            new char[]{'\u06A4', '\uFB6A', '\uFB6B', '\uFB6C', '\uFB6D'}, /* VEH */
            new char[]{'\u06A6', '\uFB6E', '\uFB6F', '\uFB70', '\uFB71'}, /* PEHEH */
            new char[]{'\u06A9', '\uFB8E', '\uFB8F', '\uFB90', '\uFB91'}, /* KEHEH */
            new char[]{'\u06AD', '\uFBD3', '\uFBD4', '\uFBD5', '\uFBD6'}, /* NG */
            new char[]{'\u06AF', '\uFB92', '\uFB93', '\uFB94', '\uFB95'}, /* GAF */
            new char[]{'\u06B1', '\uFB9A', '\uFB9B', '\uFB9C', '\uFB9D'}, /* NGOEH */
            new char[]{'\u06B3', '\uFB96', '\uFB97', '\uFB98', '\uFB99'}, /* GUEH */
            new char[]{'\u06BA', '\uFB9E', '\uFB9F'}, /* NOON GHUNNA */
            new char[]{'\u06BB', '\uFBA0', '\uFBA1', '\uFBA2', '\uFBA3'}, /* RNOON */
            new char[]{'\u06BE', '\uFBAA', '\uFBAB', '\uFBAC', '\uFBAD'}, /* HEH DOACHASHMEE */
            new char[]{'\u06C0', '\uFBA4', '\uFBA5'}, /* HEH WITH YEH ABOVE */
            new char[]{'\u06C1', '\uFBA6', '\uFBA7', '\uFBA8', '\uFBA9'}, /* HEH GOAL */
            new char[]{'\u06C5', '\uFBE0', '\uFBE1'}, /* KIRGHIZ OE */
            new char[]{'\u06C6', '\uFBD9', '\uFBDA'}, /* OE */
            new char[]{'\u06C7', '\uFBD7', '\uFBD8'}, /* U */
            new char[]{'\u06C8', '\uFBDB', '\uFBDC'}, /* YU */
            new char[]{'\u06C9', '\uFBE2', '\uFBE3'}, /* KIRGHIZ YU */
            new char[]{'\u06CB', '\uFBDE', '\uFBDF'}, /* VE */
            new char[]{'\u06CC', '\uFBFC', '\uFBFD', '\uFBFE', '\uFBFF'}, /* FARSI YEH */
            new char[]{'\u06D0', '\uFBE4', '\uFBE5', '\uFBE6', '\uFBE7'}, /* E */
            new char[]{'\u06D2', '\uFBAE', '\uFBAF'}, /* YEH BARREE */
            new char[]{'\u06D3', '\uFBB0', '\uFBB1'} /* YEH BARREE WITH HAMZA ABOVE */
            };

            public const int ar_nothing  = 0x0;
            public const int ar_novowel = 0x1;
            public const int ar_composedtashkeel = 0x4;
            public const int ar_lig = 0x8;
            /**
            * Digit shaping option: Replace European digits (U+0030...U+0039) by Arabic-Indic digits.
            */
            public const int DIGITS_EN2AN = 0x20;

            /**
            * Digit shaping option: Replace Arabic-Indic digits by European digits (U+0030...U+0039).
            */
            public const int DIGITS_AN2EN = 0x40;

            /**
            * Digit shaping option:
            * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
            * if the most recent strongly directional character
            * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
            * The initial state at the start of the text is assumed to be not an Arabic,
            * letter, so European digits at the start of the text will not change.
            * Compare to DIGITS_ALEN2AN_INIT_AL.
            */
            public const int DIGITS_EN2AN_INIT_LR = 0x60;

            /**
            * Digit shaping option:
            * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
            * if the most recent strongly directional character
            * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
            * The initial state at the start of the text is assumed to be an Arabic,
            * letter, so European digits at the start of the text will change.
            * Compare to DIGITS_ALEN2AN_INT_LR.
            */
            public const int DIGITS_EN2AN_INIT_AL = 0x80;

            /** Not a valid option value. */
            private const int DIGITS_RESERVED = 0xa0;

            /**
            * Bit mask for digit shaping options.
            */
            public const int DIGITS_MASK = 0xe0;

            /**
            * Digit type option: Use Arabic-Indic digits (U+0660...U+0669).
            */
            public const int DIGIT_TYPE_AN = 0;

            /**
            * Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9).
            */
            public const int DIGIT_TYPE_AN_EXTENDED = 0x100;

            /**
            * Bit mask for digit type options.
            */
            public const int DIGIT_TYPE_MASK = '\u0100'; // '\u3f00'?

            private class Charstruct {
                internal char basechar;
                internal char mark1;               /* has to be initialized to zero */
                internal char vowel;
                internal int lignum;           /* is a ligature with lignum aditional characters */
                internal int numshapes = 1;
            };


    }
}