hebrewshape.cs source code in C# .NET

Source code for the .NET framework in C#

                        

Code:

/ Net / Net / 3.5.50727.3053 / DEVDIV / depot / DevDiv / releases / Orcas / SP / wpf / src / Core / CSharp / MS / Internal / Shaping / hebrewshape.cs / 1 / hebrewshape.cs

                            //---------------------------------------------------------------------- 
//
//  Microsoft Windows Client Platform
//  Copyright (C) Microsoft Corporation, 2003
// 
//  File:      HebrewShape.cs
// 
//  Contents:  Implementation of Hebrew shaping engine and its factory 
//
//  Created:   08-05-2003 Nick Beal (nbeal) 
//
//-----------------------------------------------------------------------

// #define VALIDATE_CLUSTER_PARAMETERS 

using System; 
using System.Security; 
using System.Security.Permissions;
using System.Diagnostics; 
using System.Collections;
using System.Globalization;
using System.Windows;
using System.Windows.Media; 
using System.Windows.Media.TextFormatting;
using MS.Internal.FontCache; 
using MS.Internal.FontFace; 
using MS.Internal.PresentationCore;
 

namespace MS.Internal.Shaping
{
 

    ///  
    /// HebrewCharClass - enumeration of Hebrew classification flags 
    /// ordinal position
    ///  
    /// 
    /// To allow for all possible Biblical combinations of cantillation
    /// marks (teamin) and nikud, a number of cantillation classes
    /// are defined. 
    /// Note that the order is important here.  OpenType
    /// fonts' rules expect that the base/diacritic glyph sequences 
    /// are arranged canonically (ie, a diacritic or cantillation mark whose 
    /// HebrewCharClass value is small canonically preceeds a character
    /// whose HebrewCharClass value is larger).  Note that our "canonical" 
    /// order is not entirely consistent with the Unicode canonical weighting
    /// ( for more info on this)
    /// 
    internal enum HebrewCharClass : byte 
    {
        UnknownCharClass,     // Unknown class -- can take any diacritic 
        PunctuationMark,      // PunctuationMark -- cannot take diacritics 
        BaseAcceptsVowel,     // Thaana base glyph can have vowel
        FirstBaseChar = BaseAcceptsVowel, 
        BaseAcceptsNotShin,   // Hebrew base can have dagesh && rafe && cantillation & vowel
        BaseAcceptsRafe,      // Hebrew base can have rafe && cantillation && vowel
        BaseAcceptsDagesh,    // Hebrew base can have dagesh && cantillation && vowel
        ShinBaseChar,         // Hebrew shin can have shin/sin dot && dagesh && rafe && cantillation && vowel 
        SpaceChar,            // space may be base of shin/sin dot | dagesh | cantillation | vowel (only one)
        LastBaseChar = SpaceChar, 
        ShinSinDot,           // Hebrew shin/sin dot (05C1, 05C2) 
        Dagesh,               // Hebrew dagesh/mapiq (05BC)
        Rafe,                 // Hebrew Rafe (05BF) 
        Holam,                // Hebrew Holam (05B9)
        Hataf,                // Hebrew hataf vowels -- only one per base
        Vowel,                // Hebrew vowels -- only two per base
        LowerMark,            // 
        LowerRightMark,       // Hebrew cantillation - Below right
        UpperMark, 
        UpperLeftMark,        // Hebrew cantillation - Above left 
        Puncta,               // Hebrew upper dot (05C4)
        MasoraCircle,         // Hebrew cantillation - MasoraCircle Circle 
        LastDiacritic = MasoraCircle,
        UnicodeCombiningMark, // Combining marks
        UnicodeCGJ = UnicodeCombiningMark,// Unicode CGJ
        UnicodeLayoutControl,   // ZWNJ, ZWJ 
        NumberOfHebrewCharClasses,
    }; 
 

    ///  
    /// The Hebrew Shaping Engine - (shapes Hebrew text)
    /// 
    /// 
    /// The IShaper and IShapingEngine interfaces are implemented to 
    /// provide the shaping methods for Hebrew Scripts.
    /// There are four Hebrew private types defined/used in this class: 
    /// 1.) HebrewShapeInfo - this class manages the shape information 
    /// 2.) HebrewClusterCop - this class manages the canonical ordering
    /// 3.) HebrewFontInfo - this class manages the font interface 
    /// 4.) HebrewCharClassInfo - contains the char classification tables
    ///
    internal sealed class HebrewShape : BaseShape
    { 

        public const char UnicodeCombiningDotBelow  =   '\u0323'; // combining dot below 
 
        //
        // See static HebrewShape() constructor for notes and initialization. 
        //
        private static readonly ScriptTags[] _supportedScripts =
                        new ScriptTags[] {ScriptTags.Hebrew,ScriptTags.Thaana};
 
        //--------------------------------------
        // 
        //  Constructors 
        //
        //-------------------------------------- 

#region Constructors

        internal HebrewShape() 
        {
            _textFlowDirection = TextFlowDirection.RTL; 
        } 

#endregion 

        //--------------------------------------
        //
        //  Internal Methods 
        //
        //-------------------------------------- 
 
#region Internal methods
 
        /// 
        /// HebrewShape.GetCharClassifier - Base shape
        /// 
        ///  
        ///     This will normally be overridden by derived shapers. It is used in OnLoadFont
        ///  
        protected override ShaperCharacterClassifier GetCharClassifier(ScriptTags scriptTag, GlyphTypeface fontFace) 
        {
            return new HebrewCharClassifier (scriptTag, fontFace); 
        }


 
        /// 
        ///     HebrewShape.GetGlyphs - Hebrew override of the GetGlyphs() helper function. 
        ///  
        /// shaping currentRun
        /// Text item 
        /// number of glyphs
        /// 
        /// Critical - calls critical code
        ///  
        [SecurityCritical]
        unsafe protected override int GetGlyphs ( ref ShapingWorkspace currentRun, Item item ) 
        { 
            // get shape info for every character.  If there are no diacritics this loop
            // will process all the text. 
            CharShapeInfo currShape;
            while ( currentRun.SetNextGlyphProperties (out currShape) )
            {
                // The only thing we need to check for is whether there are any diacritics (or ZWJ/ZWNJ) 
                // If there are, we need to break out of this loop and run the
                if ( (currShape & 
                        (CharShapeInfo.IsStartOfCluster | CharShapeInfo.IsUnicodeLayoutControl)) == 0 ) 
                {
                    if (currentRun.CurrentCharIx < 3) 
                    {
                        // if we've only done the first 3 glyphs just restart for loop below
                        currentRun.Reset(0,0,currentRun.CharsCount);
                    } 
                    else
                    { 
                        ushort restartCharIx = currentRun.PreviousCharIx;   // restart at the previous char (the base char) 
                        ushort restartGlyphIx = currentRun.PreviousGlyphIx;
 
                        if ((currentRun.GetShapeInfo(restartCharIx) &
                                CharShapeInfo.IsUnicodeLayoutControl) != 0)
                        {
                            // if zero width unicode control, go back one more character 
                            restartGlyphIx = currentRun.GetGlyphIx( --restartCharIx );
                        } 
 
                        // if not a base char or layout control character, we have to do more
                        // elaborate loop below 
                        currentRun.Reset(restartCharIx,   // restart at the previous char (the base char)
                                         restartGlyphIx,
                                         (ushort)(currentRun.CharsCount - restartCharIx));// add back this char and the previous char
                    } 
                    break;
 
                } 
            }
 
            // modern Hebrew has no diacritics so all the text will normally be done in loop above.
            // If there's more to do (ie, a diacritic has been found, do this...
            if ( !currentRun.IsFinished )
            { 
                // Create support for diacritic reordering.  "clusterControl" is
                // on the stack; fast to alloc, easy to throw away. 
                char *clusterChars = stackalloc char [ HebrewClusterCop.ReorderArraySize ]; 
                ushort *clusterGlyphs = stackalloc ushort [ HebrewClusterCop.ReorderArraySize ];
                HebrewClusterCop clusterCop = new HebrewClusterCop( clusterChars, clusterGlyphs ); 

                bool isClusterInProgress = false;

 
                // get shape info for every character
                while (currentRun.GetNextShape(out currShape)) 
                { 
                    if ( (currShape & CharShapeInfo.IsStartOfCluster) != 0 )
                    { 
                        // this is a base character.  If there is a just completed cluster which
                        // needs reordering, nows the time to change the glyphs.
                        if (isClusterInProgress)
                        { 
                            // get reordered glyphs
                            clusterCop.GetReorderedGlyphs( ref currentRun ); 
                            isClusterInProgress = false; 
                        }
 
                        currentRun.SetGlyphPropertiesUsingShapeInfo(currShape);
                    }
                    else
                    { 
                        isClusterInProgress = clusterCop.AddCluster( ref currentRun,
                                                                       currShape ); 
                    } 

                } 

                if (isClusterInProgress)
                {
                    // get reordered glyphs 
                    clusterCop.GetReorderedGlyphs( ref currentRun );
                } 
 
            }
 

            return  currentRun.GlyphsCount;
        }
 
        /// 
        /// HebrewShape.SupportedScripts - 
        ///  IShapingEngine member override 
        /// 
        /// Our supported scripts (Hebrew, Thaana). 
        public override ScriptTags[] SupportedScripts
        {
            get
            { 
                return _supportedScripts;
            } 
        } 

 


#endregion
 
    }
 
    ///  
    /// Class HebrewClusterCop:
    ///  Manages the diacritic ordering requirement 
    /// 
    /// 
    /// This class is used to provide diacritic ordering and invalid
    /// diacritic enforcement for the diacritics and cantillation marks 
    /// found in the unicode stream.
    /// Diacritic ordering is done by maintaining an array whose extent 
    /// is HebrewCharClassifier.NumberOfClasses. 
    ///  for comments about the
    /// char class enum. 
    /// For each base character in the unicode run, the diacritics and
    /// other marks are added to the arrays at the index corresponding
    /// to the HebrewCharClass value.  Only one mark per HebrewCharClass
    /// value is allowed per base character.  Once the extent of the valid 
    /// diacritics and cantillation marks has been determined (ie, when an
    /// illegal mark is found, when a new base is found, or at the end of 
    /// the unicode run) the array used to reorder the glyphs collected in 
    /// the GlyphList object if reordering is needed.
    /// The diacritics reordering done here is not consistent with Unicode 
    /// canonical ordering, but is instead consistent with the needs of
    /// current Hebrew OpenType fonts.
    /// 
    internal struct HebrewClusterCop 
    {
        [SecurityCritical] 
        unsafe private char*       _clusterChars;  // our temporary array for reordering 
        [SecurityCritical]
        unsafe private ushort*     _clusterGlyphs; // our temporary array for reordering 
        private HebrewClusterState _clusterState;  // current reordering state
        private bool               _textHasUnicodeControlChars; //
        private bool               _reorderingIsSuppressed; //
        private bool               _clusterIsInvalid; // 
        private bool               _clusterRequiresReordering;
        private bool               _clusterHasCGJ; 
 
        private HebrewCharClass     _baseCharClass;     // base char's class info
        private ushort             _clusterSize;   // number of diacritics currently in cluster 
        private ushort             _nextReorderedGlyphIx;  // for returning reordered glyphs (0 if no reordering)
        private ushort             _lastReorderedGlyphIx;  // for returning reordered glyphs (0 if no reordering)
        private ushort             _firstCharIx;
        private ushort             _lowerMarksCount; 
        private ushort             _upperMarksCount;
 
 
        /// 
        /// HebrewClusterCop constructor 
        /// 
        /// 
        /// Critical - accepts a pointer input, uses a raw buffer
        ///  
        [SecurityCritical]
        unsafe public HebrewClusterCop ( char *clusterArray, ushort *clusterGlyphs ) 
        { 
            _baseCharClass = HebrewCharClass.UnknownCharClass;
 
            _clusterChars = clusterArray;
            _clusterGlyphs = clusterGlyphs;
            _clusterState = HebrewClusterState.Idle;
 
            _clusterIsInvalid = _reorderingIsSuppressed =
            _clusterHasCGJ = _clusterRequiresReordering = _textHasUnicodeControlChars = false; 
 
            _firstCharIx = _lastReorderedGlyphIx =_nextReorderedGlyphIx = 0;
            _clusterSize = 0; 

            _lowerMarksCount = _upperMarksCount = 0;

 
            // initialize the diacritics class array (its on the stack)
            int i = (int)HebrewReorderPosition.ClusterCopArraySize; 
            while (--i >= 0) 
           {
                _clusterChars[i] = '\u0000'; 
                _clusterGlyphs[i] = 0;
           }
        }
 
        /// 
        /// HebrewClusterCop.AddCluster - adds as many characters as possible to the 
        ///                               current cluster 
        /// 
        ///  
        /// This routine is used to create a diacritics cluster on the current
        /// base char.  It continues to add characters till it notes a non-legal
        /// character.  Enforces member count restrictions for the various diacritic
        /// classes and keeps track of reordering needs. 
        /// 
        ///  
        ///     true if cluster remains valid 
        /// 
        ///  
        /// Critical - calls critical code, uses unsafe accessors
        /// 
        [SecurityCritical]
        unsafe internal bool AddCluster ( ref ShapingWorkspace currentRun, 
                                             CharShapeInfo currShape )
        { 
            bool isClusterInProgress = StartCluster( ref currentRun, currShape ); 
            char currChar = currentRun.CurrentChar;
            ushort currGlyph = currentRun.CharConverter.ToGlyph(currChar); 

            do {
                HebrewCharClass currClass = (HebrewCharClass)(currShape & CharShapeInfo.ShaperClassMask);
 
                // add this character's glyph to the cluster
                if ( _clusterIsInvalid) 
                { 
                    _reorderingIsSuppressed = true;
                    currShape |= CharShapeInfo.RequiresInsertedBase; 
                    isClusterInProgress = false;
                }
                else
                { 

                    // step 1. get the reordering state 
                    HebrewClusterState newState = clusterStateMap[ (int)currClass ]; 

 
                    // step 2. get the reordering array entry (clusterIx)

                    // The position (clusterIx) for a character in the reordering
                    // array depends on the reordering state for the character. 
                    // Several states can contain multiple entries.
                    HebrewReorderPosition clusterIx = clusterPositionMap[ (int)currClass ] ; 
                    switch (clusterIx) 
                    {
                         case HebrewReorderPosition.SinShinPosition: 
                         case HebrewReorderPosition.MasoraCirclePosition:
                            // verify that sin/shin dot or masora circle is legal on this
                            // base
                            _clusterIsInvalid = 
                                IsIllegalPair(_baseCharClass, currClass);
                            break; 
 
                         case HebrewReorderPosition.DageshRafePosition:
                            // diacritic is dagesh, rafe 
                            char firstDageshRafe = _clusterChars[ (ushort)clusterIx ];

                            // We've combined the dagesh and rafe into one reordering class.
                            // However we can have one dagesh and one rafe, but not two of 
                            // either class types.  If there is currently a dagesh/rafe class
                            // member already in this cluster, cluster is done 
                            // if it is the same char class or if there's actually already 
                            // 2 dagesh/rafe char's in array.  If there's not a dagesh in
                            // this cluster, cluster is done if the current base char doesn't 
                            // support dagesh.
                            if ( firstDageshRafe != '\u0000' )
                            {
                                HebrewCharClass prevDageshClass = (HebrewCharClass) 
                                    (currentRun.CharConverter.ToShapeInfo(firstDageshRafe) & CharShapeInfo.ShaperClassMask);
                                _clusterIsInvalid = (prevDageshClass == currClass); 
                                ++clusterIx; 
                            }
                            else 
                            {
                                _clusterIsInvalid = IsIllegalPair(_baseCharClass, currClass);
                            }
                            break; 

                         case HebrewReorderPosition.UnresolvedPosition: 
                            // all marks that need to be checked against the other 
                            // marks in the cluster will come here...
                            _clusterIsInvalid = IsIllegalMark(currChar, currClass); 

                            if ( !_clusterIsInvalid )
                            {
                                // need to resolve the cluster index (this character is of 
                                // a class that there can be more than one of per cluster)
                                clusterIx = ResolveClusterIx(newState); 
                                if (clusterIx == 0) 
                                {
                                    // ok, it is a puncta.  Punctas don't have their own 
                                    // reordering class.  They are part of the lower marks
                                    // or upper marks class.
                                    if (currChar == HebrewCharClassifier.BelowPuncta)
                                    { 
                                    // change the state, 'cause this is a lower puncta
                                        newState = HebrewClusterState.LowerMarkSeen; 
                                        clusterIx = (HebrewReorderPosition)((ushort)HebrewReorderPosition.LowerMarksStart 
                                                  + _lowerMarksCount++);
                                    } 
                                    else
                                    {
                                    // change the state, 'cause this is an upper puncta
                                        newState = HebrewClusterState.UpperMarkSeen; 
                                        clusterIx = (HebrewReorderPosition)((ushort)HebrewReorderPosition.UpperMarksStart
                                                  + _upperMarksCount++); 
                                    } 
                                }
                            } 
                            break;


                         case HebrewReorderPosition.CombiningMarkPosition: 
                            // combining mark.  Only one per base, won't "share" base.
                             isClusterInProgress = false; 
                            _reorderingIsSuppressed = true; // don't save in cluster array 

                            // We have mapped the CGJ character to the same class so check 
                            // for it...
                            if (currChar == UnicodeCharacter.CGJ)
                            {
                                // CGJ may only be applied to a nun base char, and 
                                // may not "share" any other mark with its base
                                _clusterIsInvalid = (_clusterSize == 0 && currentRun.PreviousChar == 0x5e0); 
                                _clusterHasCGJ = true; 
                            }
                            else 
                            {
                                _clusterIsInvalid = (_clusterSize != (_clusterHasCGJ ? 1 : 0));
                            }
                            break; 

                         case HebrewReorderPosition.ZWControl: 
                            _clusterRequiresReordering = false; 
                            _textHasUnicodeControlChars = true;
                            _reorderingIsSuppressed = true;      // we don't want to confuse things by reordering 
                            currShape |= CharShapeInfo.IsUnicodeLayoutControl;
                            break;

                         default: 
                             // This marks the end of the cluster.  We'll have to re-process this character
                             currentRun.Reset(); 
                             return _clusterRequiresReordering;     // leave now. 
                        }
 
                        // step 3.  make sure any invalid marks get their very own dotted circle base.

                        if (_clusterIsInvalid || (_clusterHasCGJ ?
                                    clusterIx != HebrewReorderPosition.CombiningMarkPosition : 
                                    _clusterChars[ (ushort)clusterIx ] != 0))
                        { 
                            // if there's some glyphs to reorder, do it now before we move on... 
                            if (_clusterRequiresReordering)
                            { 
                                GetReorderedGlyphs(ref currentRun);
                            }

                             // for consistency with unmanaged Uniscribe suppress any further reordering 
                            _clusterIsInvalid = _reorderingIsSuppressed = true;
 
                            // add a dotted circle (except if the preceding character was a ZWJ) 
                            if (!_textHasUnicodeControlChars ||
                               (currentRun.PreviousChar != UnicodeCharacter.ZWJ)) 
                            {
                                currShape |= CharShapeInfo.RequiresInsertedBase;
                            }
 
                            isClusterInProgress = false;
                        } 
                        else 
                        {
 
                            // step 4. save this new character in the reordering array
                            _clusterChars[ (ushort)clusterIx ] = currChar;

                            // step 5.  As long as reordering isn't suppressed, save the current glyph in the 
                            // reordering array.  (We may not need to reorder, but we might)
                            if (_reorderingIsSuppressed == false) 
                            { 
                                _clusterGlyphs[ (ushort)clusterIx ] = currGlyph;
                                _clusterSize++; 

                                // step 5. update the reordering state
                                if ( _lastReorderedGlyphIx < (ushort)clusterIx )
                                { 
                                    // Keep _lastReorderedGlyphIx at the "highest" member of the
                                    // reordering array 
                                    _lastReorderedGlyphIx = (ushort)clusterIx; 
                                }
 
                                if (newState >= _clusterState)
                                {
                                    // no reordering needed thus far...
                                    // keep track of the "highest" cluster member 
                                    _clusterState = newState;
 
                                } 
                                else
                                { 
                                    // this current char is "lower" than a previous character
                                    // in the cluster so we're gonna need to reorder this cluster
                                    _clusterRequiresReordering = true;
                                } 
                            }
                        } 
 
                }
 
                currentRun.SetGlyphPropertiesUsingGlyph(currShape,currGlyph);

            } while ( isClusterInProgress &&
                      currentRun.GetNextCharProperties (out currChar, out currGlyph, out currShape) ); 

            return _clusterRequiresReordering; 
 
        }
 

        /// 
        ///  HebrewClusterCop.GetNextGlyph - get the reordered glyph
        ///  
        /// 
        /// This function is called to fetch each member of a reordered cluster. 
        /// It clears the array position as it fetchs each char. 
        /// This function will return '\u0000' if there are no valid chars
        /// to return. 
        /// 
        /// next reordered character
        /// 
        /// Critical - calls critical code, uses pointers 
        /// 
        [SecurityCritical] 
        unsafe private ushort GetNextGlyph() 
        {
            ushort nextGlyph = 0; 

            if ( _lastReorderedGlyphIx != 0 &&_clusterSize != 0 &&
                 _lastReorderedGlyphIx < (ushort)HebrewReorderPosition.ReorderingArraySize)
            { 
                ushort nextGlyphIx = _nextReorderedGlyphIx;
                ushort lastGlyphIx = _lastReorderedGlyphIx; 
 
                while (nextGlyph == 0)
                { 
                    if (++nextGlyphIx > lastGlyphIx)
                    {
                        _clusterSize = 0;
                        return nextGlyph; 
                    }
 
                    nextGlyph = _clusterGlyphs[ nextGlyphIx ]; 
                }
 
                _nextReorderedGlyphIx = nextGlyphIx;
                --_clusterSize;
            }
            else 
            {
                _clusterSize = 0; 
            } 

            return nextGlyph; 
        }

        /// 
        /// Critical - calls critical code, uses pointers 
        /// 
        [SecurityCritical] 
        unsafe internal ushort GetReorderedGlyphs(ref ShapingWorkspace currentRun) 
        {
            ushort reorderedCount = ReorderedCount; 

            // get reordered glyphs
            if (reorderedCount > 0)
            { 
                // we have a number of diacritics that need reordering.  So, put the
                // new glyphs in the right order in the already filled glyphlist. 
                Debug.Assert (_firstCharIx + _clusterSize - (currentRun.IsFinished?1:0) <= currentRun.CurrentCharIx); 
                ushort nextGlyphIx = currentRun.GetGlyphIx( _firstCharIx );
                while ( _clusterSize > 0 ) 
                {
                    currentRun.SetGlyph(nextGlyphIx++, GetNextGlyph());
                }
 
                _clusterRequiresReordering = false;
            } 
 
            return reorderedCount;
 
        }

        /// 
        /// HebrewClusterCop.GetPrecomposedBaseVowel 
        /// 
        /// character to test 
        /// the vowel if classification is a precomposed base+vowel character, 
        ///         otherwise returns '\u0000'
        ///     0xfb1d == 0x05d9,0x05b4 
        ///     0xfb1f == 0x05f2,0x05b7
        ///     0xfb2e == 0x05d0,0x05b7
        ///     0xfb2f == 0x05d0,0x05b8
        ///  
        private char GetPrecomposedBaseVowel(char baseChar)
        { 
 
            int xorValue = (int) baseChar ^ 0xfb00;
            if (xorValue < 0x4c)        // if char is a member of [0xfb00,0xfb4b) 
            {
                byte tempValue = (byte)(xorValue ^ 0x2e);
                // (0x0fb00 ^ 0xfb1d) ^ 0x2e == 0x33
                // (0x0fb00 ^ 0xfb1f) ^ 0x2e == 0x31 
                // (0x0fb00 ^ 0xfb2e) ^ 0x2e == 0
                // (0x0fb00 ^ 0xfb2f) ^ 0x2e == 1 
                // (0x0fb00 ^ 0xfb4b) ^ 0x2e == 0x67 
                if (tempValue < 2)
                { 
                    return (tempValue == 0 ? '\u05b7' : '\u05b8');    // 0xfb2e or 0xfb2f
                }
                else if ((tempValue ^= 0x33) < 4)
                // ((0x0fb00 ^ 0xfb1d) ^ 0x2e) ^ 0x33 == 0 
                // ((0x0fb00 ^ 0xfb1f) ^ 0x2e) ^ 0x33 == 2
                // ((0x0fb00 ^ 0xfb4b) ^ 0x2e) ^ 0x33 == 0x54 
                { 
                    if (tempValue == 0)
                    { 
                        return '\u05b4';    // 0xfb1d
                    }
                    else if (tempValue == 2)
                    { 
                        return '\u05b7';    // 0xfb1f
                    } 
 
                }
                else if (tempValue == 0x54) 
                {
                    return '\u05b9';        // 0xfb4b
                }
            } 

            return '\u0000'; 
        } 

        ///  
        /// HebrewClusterCop.IsIllegalMark - process mark
        /// 
        /// 
        /// This routine is used to validate a mark.  It checks 
        /// for number of marks in the class (2 maximum) and for
        /// duplicates. 
        /// 
        /// the current char
        /// the char's class 
        /// 
        ///     false if can add accent.
        /// 
        ///  
        /// Critical - calls critical code, uses pointers
        ///  
        [SecurityCritical] 
        unsafe private bool IsIllegalMark(char unicodeChar, HebrewCharClass charClass)
        { 
            ushort validationPosition;
            char validationValue;

            if ( IsVowel(charClass) ) 
            {
                validationPosition = (ushort)HebrewReorderPosition.VowelValidation; 
 
                // this is a vowel.  If its the first vowel, its good.
                validationValue = _clusterChars[ validationPosition ]; 
                if (validationValue == '\u0000')
                {
                    validationValue = charClass == HebrewCharClass.Hataf ? '\uffff' : unicodeChar;
                } 
                else
                { 
                    // if this is the third vowel (only two allowed) or this char is a 
                    // duplicate or if either vowels are hatafs, then this is not a valid
                    // vowel 
                    if (validationValue == '\uffff' ||
                        validationValue == unicodeChar ||
                        charClass == HebrewCharClass.Hataf)
                    { 
                        return true;
                    } 
 
                    validationValue = '\uffff';
                } 
            }
            else
            {
                validationPosition = charClass == HebrewCharClass.Puncta ? 
                    (ushort)HebrewReorderPosition.PunctaValidation : (ushort)HebrewReorderPosition.MarkValidation;
 
                // this is a punta or mark.  If its the first one, its good. 
                validationValue = _clusterChars[ validationPosition ];
                if (validationValue == '\u0000') 
                {
                    validationValue = unicodeChar;
                }
                else 
                {
 
                    // if this is the third (only two allowed) or this char is a 
                    // duplicate then this is not a valid mark
                    if (validationValue == '\uffff' || validationValue == unicodeChar) 
                    {
                        return true;
                    }
 
                    validationValue = '\uffff';
                } 
 

            } 

            // save validation value for comparison against next mark
            _clusterChars[validationPosition] = validationValue;
            return false; 
        }
 
        ///  
        /// HebrewClusterCop.IsIllegalPair - Validates a base/mark combo
        ///  
        /// 
        /// Uniscribe does not verify that a given mark is legal on
        /// a given base. Thus, this behavior is different than legacy
        /// implementation.  Note that a space character is a valid 
        /// base, but we return "false" so that space char base can
        /// be differentiated in the ClusterControl.AddToCluster() 
        ///  
        /// current base char
        /// class of diacritic 
        /// false if diacritic can be attached to base
        private bool IsIllegalPair(HebrewCharClass baseClass, HebrewCharClass markClass)
        {
            return ( (CharClassToFlag[(int)baseClass] & HebrewCharClassFlags.AllBaseChars) == 0 || 
                     (CharClassToFlag[(int)markClass] & BaseDiacriticValidationTable[ (int)baseClass ] ) == 0);
        } 
 
        /// 
        /// HebrewClusterCop.IsVowel 
        /// 
        /// classification to test
        /// true if classification is a mark character
        private bool IsVowel(HebrewCharClass charClass) 
        {
            return (CharClassToFlag[(int)charClass] & HebrewCharClassFlags.AllVowels) != 0; 
        } 

        ///  
        /// HebrewClusterCop.ReorderedCount - get size of cluster
        /// 
        /// 
        /// this number doesn't include the base character. 
        /// 
        internal ushort ReorderedCount 
        { 
          get {
                  return _clusterRequiresReordering ? _clusterSize : (ushort)0; 
              }
        }

        ///  
        /// Critical - is unsafe code
        ///  
        [SecurityCritical] 
        unsafe private void ResetClusterCop ()
        { 
            // If last processed cluster did not require reordering, then
            // clusterSize will be non-zero and we need to re-initialize the
            // reordering array.  Otherwise, its already init'ed.
            // initialize the glyph array (its on the stack) 
            int i = (int)HebrewReorderPosition.ClusterCopArraySize;
            while (--i >= 0) 
            { 
              _clusterChars[i] = '\u0000';
              _clusterGlyphs[i] = 0; 
            }

            _lowerMarksCount = 0;
            _upperMarksCount = 0; 
            _clusterSize = 0;
 
            _clusterIsInvalid = _reorderingIsSuppressed = _clusterHasCGJ = _clusterRequiresReordering = false; 

            _clusterState = HebrewClusterState.Idle; 
            _lastReorderedGlyphIx = _nextReorderedGlyphIx = 0;
        }

        private HebrewReorderPosition ResolveClusterIx (HebrewClusterState newState) 
        {
            int clusterIx = 0; 
 
            // need to resolve the cluster index (this character is of
            // a class that there can be more than one of per cluster) 
            switch(newState)
            {
                case HebrewClusterState.HolamSeen:
                    clusterIx = (int)HebrewReorderPosition.HolamPosition; 
                    break;
                case HebrewClusterState.LowerMarkSeen: 
                    clusterIx = (int)HebrewReorderPosition.LowerMarksStart 
                                        + _lowerMarksCount++;
                    break; 
                case HebrewClusterState.LowerRightMarkSeen:
                    // diacritic is lower right
                    clusterIx = (int)HebrewReorderPosition.LowerRightMarkPosition;
                    break; 
                case HebrewClusterState.UpperMarkSeen:
                    clusterIx = (int)HebrewReorderPosition.UpperMarksStart 
                                        + _upperMarksCount++; 
                    break;
                case HebrewClusterState.UpperLeftMarkSeen: 
                    // diacritic is an upper left mark
                    clusterIx = (int)HebrewReorderPosition.UpperLeftMarkPosition;
                    break;
                case HebrewClusterState.PunctaSeen: 
                    break;      // leave w' clusterIx == 0...
                 default: 
                    Invariant.Assert(false,"HebrewClusterCop.AddToCluster() - invalid newState"); 
                    break;
            } 

            return (HebrewReorderPosition)clusterIx;
        }
 
        /// 
        /// HebrewClusterCop.StartCluster - start of new cluster (potentially) 
        ///  
        /// 
        /// This routine is used at the start of each cluster.  If the base + this 
        /// first non-base char are the valid start of a new cluster, return
        /// 
        /// true if more characters can be added to this cluster
        ///  
        /// Critical - calls critical code, uses unsafe accessors
        ///  
        [SecurityCritical] 
        unsafe private bool StartCluster( ref ShapingWorkspace currentRun,
                                             CharShapeInfo currShape ) 
        {

            ResetClusterCop();
 
            CharShapeInfo baseShape = currentRun.PreviousShape;  // default base shape
 
            // Keep track of our current character index for checking 
            // reordering later (when next base is detected)
            _firstCharIx = currentRun.CurrentCharIx; 

            _textHasUnicodeControlChars =
                    ((currShape & CharShapeInfo.IsUnicodeLayoutControl) != 0) ||
                    ((baseShape & CharShapeInfo.IsUnicodeLayoutControl) != 0) ; // note any ZWNJ/ZWJ chars 

            if ( _textHasUnicodeControlChars ) 
            { 
                // if this is a ZW joiner just allow whatever follows
               _reorderingIsSuppressed = true; 
            }
            else
            {
                 _baseCharClass = (HebrewCharClass)(baseShape & CharShapeInfo.ShaperClassMask); 

                  _clusterIsInvalid = IsIllegalPair (_baseCharClass, (HebrewCharClass)(currShape & CharShapeInfo.ShaperClassMask)); 
 
                  if (_clusterIsInvalid)
                  { 
                      if (currentRun.PreviousChar == UnicodeCharacter.NoBreakSpace)
                      {
                          // if the base is a NBSP, this diacritic is acceptable (and we're done).
                          _clusterIsInvalid = false; 
                          return false;         // this is the only character we'll allow in this cluster
                      } 
                  } 
                  else
                  { 
                      // the base class is good, we can start to process a cluster - so prepare.

                      // if the base character is a precomposed form that includes a vowel,
                      // get the vowel... 
                      _clusterChars[ (int)HebrewReorderPosition.VowelValidation ] =
                                                GetPrecomposedBaseVowel(currentRun.PreviousChar); 
 
                  }
            } 


            return !_clusterIsInvalid;
        } 

 
        ///  
        /// HebrewCharClassFlags - enumeration of Hebrew character classification flags
        ///  
        /// 
        /// This enum is linked to the list of char classes in the HebrewCharClass
        /// enum; keep them in [....].  ()
        /// There are several masks that are defined here: 
        ///     the "All..." masks are used to test the char class of a unicode char
        ///  
        [Flags()] 
        private enum HebrewCharClassFlags: uint
        { 
            UnknownCharClass,
            PunctuationMark    = 0x0001,
            AcceptsVowel       = 0x0002, // Thaana base glyph can have a vowel (only)
            AcceptsNotShin     = 0x0004, // Hebrew base can have cantillation | diacritic 
            AcceptsRafe        = 0x0008, // Hebrew base can have rafe | cantillation | diacritic
            AcceptsDagesh      = 0x0010, // Hebrew base can have dagesh | cantillation | diacritic 
            ShinBase           = 0x0020, // Hebrew shin can have shin/sin dot | dagesh | cantillation | diacritic 
            SpaceChar          = 0x0040, // Space char may take one mark
            AllBaseChars       = 0x003E, // mask for all possible bases 
            ShinSinDot         = 0x0080, // Hebrew shin/sin dot (05C1, 05C2)
            Dagesh             = 0x0100, // Hebrew dagesh/mapiq (05BC)
            Rafe               = 0x0200, // Hebrew Rafe (05BF)
            DageshRafe         = 0x0300, // dagesh and rafe mask 
            Holam              = 0x0400, // Hebrew Holam (05B9)
            Hataf              = 0x0800, // Hebrew hataf -- only one per base 
            ThaanaVowel        = 0x0800, // Thaana vowel -- only one per base 
            Vowel              = 0x1000, // Hebrew vowel -- only two per base
            AllVowels          = 0x1c00, // mask for vowels only 
            BelowMark          = 0x2000, // Hebrew lower accents
            LowerRightMark     = 0x4000, // Hebrew Dehi, Yetiv accent (always last in lower sequence)
            UpperMark          = 0x8000, // Hebrew upper marks
            UpperLeftMark     = 0x10000, // Hebrew zinor (upper left mark) and others 
            Puncta            = 0x20000, // Hebrew upper dot (05C4)
            AllLowerAccents   = 0x06000,  // mask for lower cantillation marks only 
            AllAccents        = 0x1e000,  // mask for cantillation marks only 
            MasoraCircle      = 0x40000, // Hebrew cantillation - MasoraCircle Circle
            CombiningMark     = 0x80000, 
            ControlChar      = 0x100000,
            AllDiacritics    = 0x0FFF80,
         AllVowelsAndAccents = 0x1FFC00 // includes ZWJ, ZWNJ
        }; 

        // BaseDiacriticValidationTable - table used for validating a given base/mark pair 
        // Used to quickly determine whether a given diacritic/mark are 
        // legal to add to a particular base type.
        private static HebrewCharClassFlags[] BaseDiacriticValidationTable = 
        {
           0,            // unknown chars accept no marks
           0,            // punctuation marks accept no marks
           HebrewCharClassFlags.ThaanaVowel, // base accepts vowels only (thaana bases) 
           HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.DageshRafe,
           HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.Rafe, 
           HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.Dagesh, 
           HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.DageshRafe | HebrewCharClassFlags.ShinSinDot,
           0             // space char 
        };

        private static HebrewCharClassFlags[] CharClassToFlag =
        { 
            HebrewCharClassFlags.UnknownCharClass,
            HebrewCharClassFlags.PunctuationMark, 
            HebrewCharClassFlags.AcceptsVowel, // Thaana base glyph can have a vowel (only) 
            HebrewCharClassFlags.AcceptsNotShin, // Hebrew base can have cantillation | diacritic
            HebrewCharClassFlags.AcceptsRafe, // Hebrew base can have rafe | cantillation | diacritic 
            HebrewCharClassFlags.AcceptsDagesh, // Hebrew base can have dagesh | cantillation | diacritic
            HebrewCharClassFlags.ShinBase, // Hebrew shin can have shin/sin dot | dagesh | cantillation | diacritic
            HebrewCharClassFlags.SpaceChar, // Space char may take one mark
            HebrewCharClassFlags.ShinSinDot, // Hebrew shin/sin dot (05C1, 05C2) 
            HebrewCharClassFlags.Dagesh,  // Hebrew Dagesh/mapiq (05BC)
            HebrewCharClassFlags.Rafe,      // Hebrew Rafe 
            HebrewCharClassFlags.Holam,  // Hebrew Holam 
            HebrewCharClassFlags.Hataf, // Hebrew hataf -- only one per base
            HebrewCharClassFlags.Vowel, // Hebrew vowel -- only two per base 
            HebrewCharClassFlags.BelowMark, // Hebrew cantillation - Below
            HebrewCharClassFlags.LowerRightMark, // Hebrew cantillation - Below right
            HebrewCharClassFlags.UpperMark, // Hebrew cantillation - Above marks
            HebrewCharClassFlags.UpperLeftMark, // Hebrew cantillation - Above left 
            HebrewCharClassFlags.Puncta, // Hebrew upper dot (05C4)
            HebrewCharClassFlags.MasoraCircle, // Hebrew cantillation - MasoraCircle Circle 
            HebrewCharClassFlags.CombiningMark, // combining marks (0323,0307,0308) 
            HebrewCharClassFlags.ControlChar // ZWJ, ZWNJ, CGJ
        }; 

        /// 
        /// HebrewClusterCop.HebrewClusterState: state definitions
        ///  
        private enum HebrewClusterState : byte
        { 
            Idle, 
            SinShinSeen,
            DageshRafeSeen, 
            HolamSeen,
            PunctaSeen,
            LowerMarkSeen,
            LowerRightMarkSeen, 
            UpperMarkSeen,
            UpperLeftMarkSeen, 
            MasoraCircleSeen, 
            CombiningMarkSeen,
            UnicodeLayoutControlSeen 
        }

        // reordering array positions...
        private enum HebrewReorderPosition : byte 
        {
            BaseCharPosition,   // not used 
            SinShinPosition, 
            DageshRafePosition,
            HolamPosition = DageshRafePosition + 2, 
            LowerMarksStart,
            // lower right marks follow all other lower marks.
            LowerRightMarkPosition = LowerMarksStart + 4,
            // upper marks follow lower marks.  There must be room for up to 
            // five lower marks (2 vowels, 2 accents, and 1 puncta )
            UpperMarksStart, 
            // upper left marks follow all other upper marks.  There must be 
            // room for one upper mark and one puncta
            UpperLeftMarkPosition = UpperMarksStart + 3, 
            // masora circle follows upper marks.  There must be room for up to
            // four upper marks (2 accents, 1 vowel (holam) and 1 puncta)
            MasoraCirclePosition,
            CombiningMarkPosition, 
            ReorderingArraySize,
            ZWControl = ReorderingArraySize,    // not added to the array 
            VowelValidation,              // for validating vowels 
            MarkValidation,               // for validating marks
            PunctaValidation,             // for validating puncta 
            ClusterCopArraySize,
            UnresolvedPosition = ClusterCopArraySize        // not in array (index too big)
        }
 
        public const int ReorderArraySize = (int)HebrewReorderPosition.ClusterCopArraySize;
 
        // this table is used to determine if reordering will be required. 
        private static readonly HebrewReorderPosition[] clusterPositionMap =
        { 
            // This maps all the Hebrew char classes to the corresponding cluster
            // state.
            HebrewReorderPosition.BaseCharPosition,    // Unknown class -- can take any diacritic
            HebrewReorderPosition.BaseCharPosition,    // PunctuationMark -- cannot take diacritics 
            HebrewReorderPosition.BaseCharPosition,    // Thaana base glyph can have single vowel
            HebrewReorderPosition.BaseCharPosition,    // Hebrew base can have cantillation | vowel 
            HebrewReorderPosition.BaseCharPosition,    // Hebrew base can have rafe | cantillation | vowel 
            HebrewReorderPosition.BaseCharPosition,    // Hebrew base can have dagesh | cantillation | vowel
            HebrewReorderPosition.BaseCharPosition,    // Hebrew shin can have shin/sin dot && dagesh && rafe && cantillation && vowel 
            HebrewReorderPosition.BaseCharPosition,    // space char
            HebrewReorderPosition.SinShinPosition,     // Hebrew shin/sin dot (05C1, 05C2)
            HebrewReorderPosition.DageshRafePosition,  // Hebrew dagesh/mapiq (05BC)
            HebrewReorderPosition.DageshRafePosition,  // Hebrew rafe 
            HebrewReorderPosition.UnresolvedPosition,  // Hebrew holam
            HebrewReorderPosition.UnresolvedPosition,  // Hebrew hataf -- only one per base 
            HebrewReorderPosition.UnresolvedPosition,  // Hebrew/Thaana vowel -- only two per base 
            HebrewReorderPosition.UnresolvedPosition,  // Hebrew cantillation - Below
            HebrewReorderPosition.UnresolvedPosition,  // Hebrew cantillation - Below right 
            HebrewReorderPosition.UnresolvedPosition,  // Hebrew cantillation - Above
            HebrewReorderPosition.UnresolvedPosition,  // Hebrew cantillation - Above left
            HebrewReorderPosition.UnresolvedPosition,  // puncta seen
            HebrewReorderPosition.MasoraCirclePosition,// Hebrew cantillation - MasoraCircle Circle 
            HebrewReorderPosition.CombiningMarkPosition,   // combining marks
            HebrewReorderPosition.ZWControl 
            // extent of array; NumberOfHebrewCharClasses members 
        };
 
        // this table is used to determine if reordering will be required.
        private static readonly HebrewClusterState[] clusterStateMap =
        {
            // This maps all the Hebrew char classes to the corresponding cluster 
            // state.
            HebrewClusterState.Idle,     // Unknown class -- can take any diacritic 
            HebrewClusterState.Idle,     // PunctuationMark -- cannot take diacritics 
            HebrewClusterState.Idle,     // Thaana base glyph can have single vowel
            HebrewClusterState.Idle,     // Hebrew base can have cantillation | vowel 
            HebrewClusterState.Idle,     // Hebrew base can have rafe | cantillation | vowel
            HebrewClusterState.Idle,     // Hebrew base can have dagesh | cantillation | vowel
            HebrewClusterState.Idle,     // Hebrew shin can have shin/sin dot | dagesh | cantillation | vowel
            HebrewClusterState.Idle,     // base of shin/sin dot | dagesh | cantillation | vowel 
            HebrewClusterState.SinShinSeen, // Hebrew shin/sin dot (05C1, 05C2)
            HebrewClusterState.DageshRafeSeen,// Hebrew dagesh/mapiq (05BC) 
            HebrewClusterState.DageshRafeSeen,// Hebrew rafe 
            HebrewClusterState.HolamSeen,    // Hebrew holam
            HebrewClusterState.LowerMarkSeen,// Hebrew hataf -- only one per base 
            HebrewClusterState.LowerMarkSeen,// Hebrew/Thaana vowel -- only two per base
            HebrewClusterState.LowerMarkSeen,// Hebrew cantillation - Below
            HebrewClusterState.LowerRightMarkSeen, // Hebrew cantillation - Below right
            HebrewClusterState.UpperMarkSeen, // Hebrew cantillation - Above 
            HebrewClusterState.UpperLeftMarkSeen, // Hebrew cantillation - Above left
            HebrewClusterState.PunctaSeen,    // puncta seen 
            HebrewClusterState.MasoraCircleSeen, // Hebrew cantillation - MasoraCircle Circle 
            HebrewClusterState.CombiningMarkSeen,
            HebrewClusterState.UnicodeLayoutControlSeen 
            // extent of array; NumberOfHebrewCharClasses members
        };
    }
 

  ///  
  /// HebrewCharClassifier - The char converter for Hebrew. 
  /// 
  internal class HebrewCharClassifier : ShaperCharacterClassifier 
  {

      public HebrewCharClassifier(ScriptTags scriptTag, GlyphTypeface fontFace) : base(scriptTag, fontFace)
      { 
        _unknownClass =   UnknownCharClass;
        _spaceClass =     (byte)HebrewCharClass.SpaceChar; 
        _zwControlClass = (byte)HebrewCharClass.UnicodeLayoutControl; 
        _zwjClass =       (byte)HebrewCharClass.UnicodeLayoutControl;
        _zwnjClass =      (byte)HebrewCharClass.UnicodeLayoutControl; 
        _shyClass =       UnknownCharClass;

        if (scriptTag == ScriptTags.Hebrew)
        { 
            _firstChar     = '\u0590';     // this is the first Hebrew Unicode char
            _lastChar      = '\u05FF';     // this is the last Hebrew Unicode char 
            _xorMask       = 0x580;     // this mask is used in GetCharShapeInfo 
            _xorRange      = 0x080;     // this is used in GetCharShapeInfo
 
            _charClassTable = _hebrewCharClasses;
        }
        else if (scriptTag == ScriptTags.Thaana)
        { 
            _firstChar     = '\u0780';
            _lastChar      = '\u07BF'; 
            _xorMask       = 0x780; 
            _xorRange      = 0x040;
            _charClassTable = _thaanaCharClasses; 
        }
      }

      const char AlphabeticPresentationangeStart = '\uFB1D'; 
      const char AlphabeticPresentationRangeEnd = '\uFB4F';
 
      const char UnicodeCombiningDotAbove  =   '\u0307'; // combining dot above 
      const char UnicodeCombiningDiaresis  =   '\u0308'; // combining diaresis
      internal const char BelowPuncta               =   '\u0323'; // combining dot below 

      /// 
      /// HebrewCharClassifier.this[] - returns CharShapeInfo for the Unicode character
      ///  
      public override CharShapeInfo ToShapeInfo(char unicodeChar)
       { 
           CharShapeInfo charShape = base.ToShapeInfo(unicodeChar); 
           HebrewCharClass charClass = (HebrewCharClass)(charShape & CharShapeInfo.ShaperClassMask);
           if (charClass == HebrewCharClass.UnknownCharClass) 
           {
               if (unicodeChar >= AlphabeticPresentationangeStart && unicodeChar <= AlphabeticPresentationRangeEnd)
               {
                   charShape = (CharShapeInfo) 
                            _alphabeticPresentationFormsCharClasses[unicodeChar - AlphabeticPresentationangeStart];
               } 
               else if ( unicodeChar == UnicodeCombiningDotAbove || 
                        unicodeChar == UnicodeCombiningDiaresis)
               { 
                   charShape = (CharShapeInfo) HebrewCharClass.UnicodeCombiningMark;
               }
               else if (unicodeChar == BelowPuncta)
               { 
                   charShape = (CharShapeInfo) HebrewCharClass.Puncta;
               } 
               else if (unicodeChar == UnicodeCharacter.CGJ) 
               {
                   charShape = (CharShapeInfo) HebrewCharClass.UnicodeCGJ | CharShapeInfo.IsUnicodeLayoutControl; 
               }

           }
 
           return charShape;
 
       } 

 

#region Classification Tables

        // these consts are so the tables below will be more readable 
        private const HebrewCharClass StartOfCluster = (HebrewCharClass)CharShapeInfo.IsStartOfCluster;
        private const byte UnknownCharClass = (byte)(HebrewCharClass.UnknownCharClass | StartOfCluster); 
        private const byte PunctuationMark = (byte)(HebrewCharClass.PunctuationMark | StartOfCluster); 
        private const byte BaseAcceptsVowel = (byte)(HebrewCharClass.BaseAcceptsVowel | StartOfCluster);
        private const byte BaseAcceptsAll = (byte)(HebrewCharClass.BaseAcceptsNotShin | StartOfCluster); 
        private const byte BaseAcceptsRafe = (byte)(HebrewCharClass.BaseAcceptsRafe | StartOfCluster);
        private const byte BaseAcceptsDagesh = (byte)(HebrewCharClass.BaseAcceptsDagesh | StartOfCluster);
        private const byte ShinBaseChar = (byte)(HebrewCharClass.ShinBaseChar | StartOfCluster);
        private const byte ShinSinDot = (byte)HebrewCharClass.ShinSinDot; 
        private const byte Dagesh = (byte)HebrewCharClass.Dagesh;
        private const byte Rafe = (byte)HebrewCharClass.Rafe; 
        private const byte Holam = (byte)HebrewCharClass.Holam; 
        private const byte Hataf = (byte)HebrewCharClass.Hataf;
        private const byte ThaanaVowel = (byte)HebrewCharClass.Hataf; 
        private const byte Vowel = (byte)HebrewCharClass.Vowel;
        private const byte Meteg = (byte)HebrewCharClass.LowerMark;
        private const byte BelowCenterRightMark = (byte)HebrewCharClass.LowerMark;
        private const byte BelowCenterLeftMark = (byte)HebrewCharClass.LowerMark; 
        private const byte BelowLeftMark = (byte)HebrewCharClass.LowerMark;
        private const byte Puncta = (byte)HebrewCharClass.Puncta; 
        private const byte LowerRightMark = (byte)HebrewCharClass.LowerRightMark; 
        private const byte AboveRightMark = (byte)HebrewCharClass.UpperMark;
        private const byte AboveCenterRightMark = (byte)HebrewCharClass.UpperMark; 
        private const byte AboveCenterMark = (byte)HebrewCharClass.UpperMark;
        private const byte AboveCenterLeftMark = (byte)HebrewCharClass.UpperMark;
        private const byte UpperLeftMark = (byte)HebrewCharClass.UpperLeftMark;
        private const byte MasoraCircle = (byte)HebrewCharClass.MasoraCircle; 

        // All the Hebrew Unicode chars (U+590 - U+5FF) classified 
         private static readonly byte[] _hebrewCharClasses  = //new HebrewCharClass[] 
         {
                        // U+590 (Hebrew Unicode) 
            // 0, 4, 8, C       1, 5, 9, D        2, 6, A, E         3, 7, B, F
            UnknownCharClass,   BelowCenterLeftMark,AboveCenterMark, AboveCenterMark,
            AboveCenterMark,    AboveCenterMark,  BelowCenterRightMark, AboveCenterLeftMark,
            AboveCenterLeftMark,UpperLeftMark,    LowerRightMark,    BelowLeftMark, 
            AboveCenterLeftMark,AboveRightMark,   AboveCenterLeftMark,AboveCenterLeftMark,
 
            // U+5A0 (Hebrew Unicode) 
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E        3, 7, B, F
            AboveRightMark,     AboveCenterLeftMark,UnknownCharClass,BelowCenterLeftMark, 
            BelowCenterRightMark,BelowLeftMark,    BelowCenterLeftMark,BelowCenterRightMark,
            AboveCenterLeftMark,UpperLeftMark,     BelowCenterRightMark,AboveCenterMark,
            AboveCenterLeftMark,LowerRightMark,    UpperLeftMark,    MasoraCircle,
 
            // U+5B0 (Hebrew Unicode)
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E        3, 7, B, F 
            Vowel,              Hataf,             Hataf,            Hataf, 
            Vowel,              Vowel,             Vowel,            Vowel,
            Vowel,              Holam,             UnknownCharClass, Vowel, 
            Dagesh,             Meteg,             PunctuationMark,  Rafe,

            // U+5C0 (Hebrew Unicode)
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E        3, 7, B, F 
            PunctuationMark,    ShinSinDot,        ShinSinDot,       PunctuationMark,
            Puncta,             UnknownCharClass,  UnknownCharClass, UnknownCharClass, 
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass, UnknownCharClass, 
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass, UnknownCharClass,
 
            // U+5D0 (Hebrew Unicode)
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll,
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll, 
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll,
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll, 
 
            // U+5E0 (Hebrew Unicode)
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F 
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll,
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll,
            BaseAcceptsAll,     ShinBaseChar,      BaseAcceptsAll,    UnknownCharClass,
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass,  UnknownCharClass, 

            // U+5F0 (Hebrew Unicode) 
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F 
            BaseAcceptsRafe,    BaseAcceptsRafe,   BaseAcceptsRafe,   PunctuationMark,
            PunctuationMark,    UnknownCharClass,  UnknownCharClass,  UnknownCharClass, 
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass,  UnknownCharClass,
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass,  UnknownCharClass,
        };
 
        // All the Thaana Unicode chars (U+780 - U+7BF) classifications
        private static readonly byte[] _thaanaCharClasses = // new HebrewCharClass[] 
        { 
            // U+780 (Thaana Unicode)
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F 
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel,
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel,
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel,
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel, 

            // U+790 (Thaana Unicode) 
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F 
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel,
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel, 
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel,
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel,

            // U+7A0 (Thaana Unicode) 
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel, 
            BaseAcceptsVowel,   BaseAcceptsVowel,  ThaanaVowel,       ThaanaVowel, 
            ThaanaVowel,        ThaanaVowel,       ThaanaVowel,       ThaanaVowel,
            ThaanaVowel,        ThaanaVowel,       ThaanaVowel,       ThaanaVowel, 

            // U+7B0 (Thaana Unicode)
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F
            ThaanaVowel,        BaseAcceptsVowel,  UnknownCharClass,  UnknownCharClass, 
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass,  UnknownCharClass,
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass,  UnknownCharClass, 
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass,  UnknownCharClass, 
        };
 
        // All the Hebrew Alphabetic Presentation Forms Unicode chars (U+FB1D - U+FB4F)
        private static readonly byte[] _alphabeticPresentationFormsCharClasses = // new HebrewCharClass[]
        {
            // U+FB10 (Presentation Forms Unicode) 
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F
                                BaseAcceptsAll,    Rafe,              BaseAcceptsRafe, 
 
            // U+FB20 (Presentation Forms Unicode)
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F 
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll,
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll,
            BaseAcceptsAll,     PunctuationMark,   BaseAcceptsAll,    BaseAcceptsAll,
            BaseAcceptsRafe,    BaseAcceptsRafe,   BaseAcceptsAll,    BaseAcceptsAll, 

            // U+FB30 (Presentation Forms Unicode) 
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F 
            BaseAcceptsRafe,    BaseAcceptsRafe,   BaseAcceptsRafe,   BaseAcceptsRafe,
            BaseAcceptsRafe,    BaseAcceptsRafe,   BaseAcceptsRafe,   UnknownCharClass, 
            BaseAcceptsRafe,    BaseAcceptsRafe,   BaseAcceptsRafe,   BaseAcceptsRafe,
            BaseAcceptsRafe,    UnknownCharClass,  BaseAcceptsRafe,   UnknownCharClass,

            // U+FB40 (Presentation Forms Unicode) 
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F
            BaseAcceptsRafe,    BaseAcceptsRafe,   UnknownCharClass,  BaseAcceptsRafe, 
            BaseAcceptsRafe,    UnknownCharClass,  BaseAcceptsRafe,   BaseAcceptsRafe, 
            BaseAcceptsRafe,    BaseAcceptsRafe,   BaseAcceptsRafe,   BaseAcceptsAll,
            BaseAcceptsDagesh,  BaseAcceptsDagesh, BaseAcceptsDagesh, UnknownCharClass, 

        };
#endregion  // end of Classification Tables
    } 

} 

// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// Copyright (c) Microsoft Corporation. All rights reserved.
//---------------------------------------------------------------------- 
//
//  Microsoft Windows Client Platform
//  Copyright (C) Microsoft Corporation, 2003
// 
//  File:      HebrewShape.cs
// 
//  Contents:  Implementation of Hebrew shaping engine and its factory 
//
//  Created:   08-05-2003 Nick Beal (nbeal) 
//
//-----------------------------------------------------------------------

// #define VALIDATE_CLUSTER_PARAMETERS 

using System; 
using System.Security; 
using System.Security.Permissions;
using System.Diagnostics; 
using System.Collections;
using System.Globalization;
using System.Windows;
using System.Windows.Media; 
using System.Windows.Media.TextFormatting;
using MS.Internal.FontCache; 
using MS.Internal.FontFace; 
using MS.Internal.PresentationCore;
 

namespace MS.Internal.Shaping
{
 

    ///  
    /// HebrewCharClass - enumeration of Hebrew classification flags 
    /// ordinal position
    ///  
    /// 
    /// To allow for all possible Biblical combinations of cantillation
    /// marks (teamin) and nikud, a number of cantillation classes
    /// are defined. 
    /// Note that the order is important here.  OpenType
    /// fonts' rules expect that the base/diacritic glyph sequences 
    /// are arranged canonically (ie, a diacritic or cantillation mark whose 
    /// HebrewCharClass value is small canonically preceeds a character
    /// whose HebrewCharClass value is larger).  Note that our "canonical" 
    /// order is not entirely consistent with the Unicode canonical weighting
    /// ( for more info on this)
    /// 
    internal enum HebrewCharClass : byte 
    {
        UnknownCharClass,     // Unknown class -- can take any diacritic 
        PunctuationMark,      // PunctuationMark -- cannot take diacritics 
        BaseAcceptsVowel,     // Thaana base glyph can have vowel
        FirstBaseChar = BaseAcceptsVowel, 
        BaseAcceptsNotShin,   // Hebrew base can have dagesh && rafe && cantillation & vowel
        BaseAcceptsRafe,      // Hebrew base can have rafe && cantillation && vowel
        BaseAcceptsDagesh,    // Hebrew base can have dagesh && cantillation && vowel
        ShinBaseChar,         // Hebrew shin can have shin/sin dot && dagesh && rafe && cantillation && vowel 
        SpaceChar,            // space may be base of shin/sin dot | dagesh | cantillation | vowel (only one)
        LastBaseChar = SpaceChar, 
        ShinSinDot,           // Hebrew shin/sin dot (05C1, 05C2) 
        Dagesh,               // Hebrew dagesh/mapiq (05BC)
        Rafe,                 // Hebrew Rafe (05BF) 
        Holam,                // Hebrew Holam (05B9)
        Hataf,                // Hebrew hataf vowels -- only one per base
        Vowel,                // Hebrew vowels -- only two per base
        LowerMark,            // 
        LowerRightMark,       // Hebrew cantillation - Below right
        UpperMark, 
        UpperLeftMark,        // Hebrew cantillation - Above left 
        Puncta,               // Hebrew upper dot (05C4)
        MasoraCircle,         // Hebrew cantillation - MasoraCircle Circle 
        LastDiacritic = MasoraCircle,
        UnicodeCombiningMark, // Combining marks
        UnicodeCGJ = UnicodeCombiningMark,// Unicode CGJ
        UnicodeLayoutControl,   // ZWNJ, ZWJ 
        NumberOfHebrewCharClasses,
    }; 
 

    ///  
    /// The Hebrew Shaping Engine - (shapes Hebrew text)
    /// 
    /// 
    /// The IShaper and IShapingEngine interfaces are implemented to 
    /// provide the shaping methods for Hebrew Scripts.
    /// There are four Hebrew private types defined/used in this class: 
    /// 1.) HebrewShapeInfo - this class manages the shape information 
    /// 2.) HebrewClusterCop - this class manages the canonical ordering
    /// 3.) HebrewFontInfo - this class manages the font interface 
    /// 4.) HebrewCharClassInfo - contains the char classification tables
    ///
    internal sealed class HebrewShape : BaseShape
    { 

        public const char UnicodeCombiningDotBelow  =   '\u0323'; // combining dot below 
 
        //
        // See static HebrewShape() constructor for notes and initialization. 
        //
        private static readonly ScriptTags[] _supportedScripts =
                        new ScriptTags[] {ScriptTags.Hebrew,ScriptTags.Thaana};
 
        //--------------------------------------
        // 
        //  Constructors 
        //
        //-------------------------------------- 

#region Constructors

        internal HebrewShape() 
        {
            _textFlowDirection = TextFlowDirection.RTL; 
        } 

#endregion 

        //--------------------------------------
        //
        //  Internal Methods 
        //
        //-------------------------------------- 
 
#region Internal methods
 
        /// 
        /// HebrewShape.GetCharClassifier - Base shape
        /// 
        ///  
        ///     This will normally be overridden by derived shapers. It is used in OnLoadFont
        ///  
        protected override ShaperCharacterClassifier GetCharClassifier(ScriptTags scriptTag, GlyphTypeface fontFace) 
        {
            return new HebrewCharClassifier (scriptTag, fontFace); 
        }


 
        /// 
        ///     HebrewShape.GetGlyphs - Hebrew override of the GetGlyphs() helper function. 
        ///  
        /// shaping currentRun
        /// Text item 
        /// number of glyphs
        /// 
        /// Critical - calls critical code
        ///  
        [SecurityCritical]
        unsafe protected override int GetGlyphs ( ref ShapingWorkspace currentRun, Item item ) 
        { 
            // get shape info for every character.  If there are no diacritics this loop
            // will process all the text. 
            CharShapeInfo currShape;
            while ( currentRun.SetNextGlyphProperties (out currShape) )
            {
                // The only thing we need to check for is whether there are any diacritics (or ZWJ/ZWNJ) 
                // If there are, we need to break out of this loop and run the
                if ( (currShape & 
                        (CharShapeInfo.IsStartOfCluster | CharShapeInfo.IsUnicodeLayoutControl)) == 0 ) 
                {
                    if (currentRun.CurrentCharIx < 3) 
                    {
                        // if we've only done the first 3 glyphs just restart for loop below
                        currentRun.Reset(0,0,currentRun.CharsCount);
                    } 
                    else
                    { 
                        ushort restartCharIx = currentRun.PreviousCharIx;   // restart at the previous char (the base char) 
                        ushort restartGlyphIx = currentRun.PreviousGlyphIx;
 
                        if ((currentRun.GetShapeInfo(restartCharIx) &
                                CharShapeInfo.IsUnicodeLayoutControl) != 0)
                        {
                            // if zero width unicode control, go back one more character 
                            restartGlyphIx = currentRun.GetGlyphIx( --restartCharIx );
                        } 
 
                        // if not a base char or layout control character, we have to do more
                        // elaborate loop below 
                        currentRun.Reset(restartCharIx,   // restart at the previous char (the base char)
                                         restartGlyphIx,
                                         (ushort)(currentRun.CharsCount - restartCharIx));// add back this char and the previous char
                    } 
                    break;
 
                } 
            }
 
            // modern Hebrew has no diacritics so all the text will normally be done in loop above.
            // If there's more to do (ie, a diacritic has been found, do this...
            if ( !currentRun.IsFinished )
            { 
                // Create support for diacritic reordering.  "clusterControl" is
                // on the stack; fast to alloc, easy to throw away. 
                char *clusterChars = stackalloc char [ HebrewClusterCop.ReorderArraySize ]; 
                ushort *clusterGlyphs = stackalloc ushort [ HebrewClusterCop.ReorderArraySize ];
                HebrewClusterCop clusterCop = new HebrewClusterCop( clusterChars, clusterGlyphs ); 

                bool isClusterInProgress = false;

 
                // get shape info for every character
                while (currentRun.GetNextShape(out currShape)) 
                { 
                    if ( (currShape & CharShapeInfo.IsStartOfCluster) != 0 )
                    { 
                        // this is a base character.  If there is a just completed cluster which
                        // needs reordering, nows the time to change the glyphs.
                        if (isClusterInProgress)
                        { 
                            // get reordered glyphs
                            clusterCop.GetReorderedGlyphs( ref currentRun ); 
                            isClusterInProgress = false; 
                        }
 
                        currentRun.SetGlyphPropertiesUsingShapeInfo(currShape);
                    }
                    else
                    { 
                        isClusterInProgress = clusterCop.AddCluster( ref currentRun,
                                                                       currShape ); 
                    } 

                } 

                if (isClusterInProgress)
                {
                    // get reordered glyphs 
                    clusterCop.GetReorderedGlyphs( ref currentRun );
                } 
 
            }
 

            return  currentRun.GlyphsCount;
        }
 
        /// 
        /// HebrewShape.SupportedScripts - 
        ///  IShapingEngine member override 
        /// 
        /// Our supported scripts (Hebrew, Thaana). 
        public override ScriptTags[] SupportedScripts
        {
            get
            { 
                return _supportedScripts;
            } 
        } 

 


#endregion
 
    }
 
    ///  
    /// Class HebrewClusterCop:
    ///  Manages the diacritic ordering requirement 
    /// 
    /// 
    /// This class is used to provide diacritic ordering and invalid
    /// diacritic enforcement for the diacritics and cantillation marks 
    /// found in the unicode stream.
    /// Diacritic ordering is done by maintaining an array whose extent 
    /// is HebrewCharClassifier.NumberOfClasses. 
    ///  for comments about the
    /// char class enum. 
    /// For each base character in the unicode run, the diacritics and
    /// other marks are added to the arrays at the index corresponding
    /// to the HebrewCharClass value.  Only one mark per HebrewCharClass
    /// value is allowed per base character.  Once the extent of the valid 
    /// diacritics and cantillation marks has been determined (ie, when an
    /// illegal mark is found, when a new base is found, or at the end of 
    /// the unicode run) the array used to reorder the glyphs collected in 
    /// the GlyphList object if reordering is needed.
    /// The diacritics reordering done here is not consistent with Unicode 
    /// canonical ordering, but is instead consistent with the needs of
    /// current Hebrew OpenType fonts.
    /// 
    internal struct HebrewClusterCop 
    {
        [SecurityCritical] 
        unsafe private char*       _clusterChars;  // our temporary array for reordering 
        [SecurityCritical]
        unsafe private ushort*     _clusterGlyphs; // our temporary array for reordering 
        private HebrewClusterState _clusterState;  // current reordering state
        private bool               _textHasUnicodeControlChars; //
        private bool               _reorderingIsSuppressed; //
        private bool               _clusterIsInvalid; // 
        private bool               _clusterRequiresReordering;
        private bool               _clusterHasCGJ; 
 
        private HebrewCharClass     _baseCharClass;     // base char's class info
        private ushort             _clusterSize;   // number of diacritics currently in cluster 
        private ushort             _nextReorderedGlyphIx;  // for returning reordered glyphs (0 if no reordering)
        private ushort             _lastReorderedGlyphIx;  // for returning reordered glyphs (0 if no reordering)
        private ushort             _firstCharIx;
        private ushort             _lowerMarksCount; 
        private ushort             _upperMarksCount;
 
 
        /// 
        /// HebrewClusterCop constructor 
        /// 
        /// 
        /// Critical - accepts a pointer input, uses a raw buffer
        ///  
        [SecurityCritical]
        unsafe public HebrewClusterCop ( char *clusterArray, ushort *clusterGlyphs ) 
        { 
            _baseCharClass = HebrewCharClass.UnknownCharClass;
 
            _clusterChars = clusterArray;
            _clusterGlyphs = clusterGlyphs;
            _clusterState = HebrewClusterState.Idle;
 
            _clusterIsInvalid = _reorderingIsSuppressed =
            _clusterHasCGJ = _clusterRequiresReordering = _textHasUnicodeControlChars = false; 
 
            _firstCharIx = _lastReorderedGlyphIx =_nextReorderedGlyphIx = 0;
            _clusterSize = 0; 

            _lowerMarksCount = _upperMarksCount = 0;

 
            // initialize the diacritics class array (its on the stack)
            int i = (int)HebrewReorderPosition.ClusterCopArraySize; 
            while (--i >= 0) 
           {
                _clusterChars[i] = '\u0000'; 
                _clusterGlyphs[i] = 0;
           }
        }
 
        /// 
        /// HebrewClusterCop.AddCluster - adds as many characters as possible to the 
        ///                               current cluster 
        /// 
        ///  
        /// This routine is used to create a diacritics cluster on the current
        /// base char.  It continues to add characters till it notes a non-legal
        /// character.  Enforces member count restrictions for the various diacritic
        /// classes and keeps track of reordering needs. 
        /// 
        ///  
        ///     true if cluster remains valid 
        /// 
        ///  
        /// Critical - calls critical code, uses unsafe accessors
        /// 
        [SecurityCritical]
        unsafe internal bool AddCluster ( ref ShapingWorkspace currentRun, 
                                             CharShapeInfo currShape )
        { 
            bool isClusterInProgress = StartCluster( ref currentRun, currShape ); 
            char currChar = currentRun.CurrentChar;
            ushort currGlyph = currentRun.CharConverter.ToGlyph(currChar); 

            do {
                HebrewCharClass currClass = (HebrewCharClass)(currShape & CharShapeInfo.ShaperClassMask);
 
                // add this character's glyph to the cluster
                if ( _clusterIsInvalid) 
                { 
                    _reorderingIsSuppressed = true;
                    currShape |= CharShapeInfo.RequiresInsertedBase; 
                    isClusterInProgress = false;
                }
                else
                { 

                    // step 1. get the reordering state 
                    HebrewClusterState newState = clusterStateMap[ (int)currClass ]; 

 
                    // step 2. get the reordering array entry (clusterIx)

                    // The position (clusterIx) for a character in the reordering
                    // array depends on the reordering state for the character. 
                    // Several states can contain multiple entries.
                    HebrewReorderPosition clusterIx = clusterPositionMap[ (int)currClass ] ; 
                    switch (clusterIx) 
                    {
                         case HebrewReorderPosition.SinShinPosition: 
                         case HebrewReorderPosition.MasoraCirclePosition:
                            // verify that sin/shin dot or masora circle is legal on this
                            // base
                            _clusterIsInvalid = 
                                IsIllegalPair(_baseCharClass, currClass);
                            break; 
 
                         case HebrewReorderPosition.DageshRafePosition:
                            // diacritic is dagesh, rafe 
                            char firstDageshRafe = _clusterChars[ (ushort)clusterIx ];

                            // We've combined the dagesh and rafe into one reordering class.
                            // However we can have one dagesh and one rafe, but not two of 
                            // either class types.  If there is currently a dagesh/rafe class
                            // member already in this cluster, cluster is done 
                            // if it is the same char class or if there's actually already 
                            // 2 dagesh/rafe char's in array.  If there's not a dagesh in
                            // this cluster, cluster is done if the current base char doesn't 
                            // support dagesh.
                            if ( firstDageshRafe != '\u0000' )
                            {
                                HebrewCharClass prevDageshClass = (HebrewCharClass) 
                                    (currentRun.CharConverter.ToShapeInfo(firstDageshRafe) & CharShapeInfo.ShaperClassMask);
                                _clusterIsInvalid = (prevDageshClass == currClass); 
                                ++clusterIx; 
                            }
                            else 
                            {
                                _clusterIsInvalid = IsIllegalPair(_baseCharClass, currClass);
                            }
                            break; 

                         case HebrewReorderPosition.UnresolvedPosition: 
                            // all marks that need to be checked against the other 
                            // marks in the cluster will come here...
                            _clusterIsInvalid = IsIllegalMark(currChar, currClass); 

                            if ( !_clusterIsInvalid )
                            {
                                // need to resolve the cluster index (this character is of 
                                // a class that there can be more than one of per cluster)
                                clusterIx = ResolveClusterIx(newState); 
                                if (clusterIx == 0) 
                                {
                                    // ok, it is a puncta.  Punctas don't have their own 
                                    // reordering class.  They are part of the lower marks
                                    // or upper marks class.
                                    if (currChar == HebrewCharClassifier.BelowPuncta)
                                    { 
                                    // change the state, 'cause this is a lower puncta
                                        newState = HebrewClusterState.LowerMarkSeen; 
                                        clusterIx = (HebrewReorderPosition)((ushort)HebrewReorderPosition.LowerMarksStart 
                                                  + _lowerMarksCount++);
                                    } 
                                    else
                                    {
                                    // change the state, 'cause this is an upper puncta
                                        newState = HebrewClusterState.UpperMarkSeen; 
                                        clusterIx = (HebrewReorderPosition)((ushort)HebrewReorderPosition.UpperMarksStart
                                                  + _upperMarksCount++); 
                                    } 
                                }
                            } 
                            break;


                         case HebrewReorderPosition.CombiningMarkPosition: 
                            // combining mark.  Only one per base, won't "share" base.
                             isClusterInProgress = false; 
                            _reorderingIsSuppressed = true; // don't save in cluster array 

                            // We have mapped the CGJ character to the same class so check 
                            // for it...
                            if (currChar == UnicodeCharacter.CGJ)
                            {
                                // CGJ may only be applied to a nun base char, and 
                                // may not "share" any other mark with its base
                                _clusterIsInvalid = (_clusterSize == 0 && currentRun.PreviousChar == 0x5e0); 
                                _clusterHasCGJ = true; 
                            }
                            else 
                            {
                                _clusterIsInvalid = (_clusterSize != (_clusterHasCGJ ? 1 : 0));
                            }
                            break; 

                         case HebrewReorderPosition.ZWControl: 
                            _clusterRequiresReordering = false; 
                            _textHasUnicodeControlChars = true;
                            _reorderingIsSuppressed = true;      // we don't want to confuse things by reordering 
                            currShape |= CharShapeInfo.IsUnicodeLayoutControl;
                            break;

                         default: 
                             // This marks the end of the cluster.  We'll have to re-process this character
                             currentRun.Reset(); 
                             return _clusterRequiresReordering;     // leave now. 
                        }
 
                        // step 3.  make sure any invalid marks get their very own dotted circle base.

                        if (_clusterIsInvalid || (_clusterHasCGJ ?
                                    clusterIx != HebrewReorderPosition.CombiningMarkPosition : 
                                    _clusterChars[ (ushort)clusterIx ] != 0))
                        { 
                            // if there's some glyphs to reorder, do it now before we move on... 
                            if (_clusterRequiresReordering)
                            { 
                                GetReorderedGlyphs(ref currentRun);
                            }

                             // for consistency with unmanaged Uniscribe suppress any further reordering 
                            _clusterIsInvalid = _reorderingIsSuppressed = true;
 
                            // add a dotted circle (except if the preceding character was a ZWJ) 
                            if (!_textHasUnicodeControlChars ||
                               (currentRun.PreviousChar != UnicodeCharacter.ZWJ)) 
                            {
                                currShape |= CharShapeInfo.RequiresInsertedBase;
                            }
 
                            isClusterInProgress = false;
                        } 
                        else 
                        {
 
                            // step 4. save this new character in the reordering array
                            _clusterChars[ (ushort)clusterIx ] = currChar;

                            // step 5.  As long as reordering isn't suppressed, save the current glyph in the 
                            // reordering array.  (We may not need to reorder, but we might)
                            if (_reorderingIsSuppressed == false) 
                            { 
                                _clusterGlyphs[ (ushort)clusterIx ] = currGlyph;
                                _clusterSize++; 

                                // step 5. update the reordering state
                                if ( _lastReorderedGlyphIx < (ushort)clusterIx )
                                { 
                                    // Keep _lastReorderedGlyphIx at the "highest" member of the
                                    // reordering array 
                                    _lastReorderedGlyphIx = (ushort)clusterIx; 
                                }
 
                                if (newState >= _clusterState)
                                {
                                    // no reordering needed thus far...
                                    // keep track of the "highest" cluster member 
                                    _clusterState = newState;
 
                                } 
                                else
                                { 
                                    // this current char is "lower" than a previous character
                                    // in the cluster so we're gonna need to reorder this cluster
                                    _clusterRequiresReordering = true;
                                } 
                            }
                        } 
 
                }
 
                currentRun.SetGlyphPropertiesUsingGlyph(currShape,currGlyph);

            } while ( isClusterInProgress &&
                      currentRun.GetNextCharProperties (out currChar, out currGlyph, out currShape) ); 

            return _clusterRequiresReordering; 
 
        }
 

        /// 
        ///  HebrewClusterCop.GetNextGlyph - get the reordered glyph
        ///  
        /// 
        /// This function is called to fetch each member of a reordered cluster. 
        /// It clears the array position as it fetchs each char. 
        /// This function will return '\u0000' if there are no valid chars
        /// to return. 
        /// 
        /// next reordered character
        /// 
        /// Critical - calls critical code, uses pointers 
        /// 
        [SecurityCritical] 
        unsafe private ushort GetNextGlyph() 
        {
            ushort nextGlyph = 0; 

            if ( _lastReorderedGlyphIx != 0 &&_clusterSize != 0 &&
                 _lastReorderedGlyphIx < (ushort)HebrewReorderPosition.ReorderingArraySize)
            { 
                ushort nextGlyphIx = _nextReorderedGlyphIx;
                ushort lastGlyphIx = _lastReorderedGlyphIx; 
 
                while (nextGlyph == 0)
                { 
                    if (++nextGlyphIx > lastGlyphIx)
                    {
                        _clusterSize = 0;
                        return nextGlyph; 
                    }
 
                    nextGlyph = _clusterGlyphs[ nextGlyphIx ]; 
                }
 
                _nextReorderedGlyphIx = nextGlyphIx;
                --_clusterSize;
            }
            else 
            {
                _clusterSize = 0; 
            } 

            return nextGlyph; 
        }

        /// 
        /// Critical - calls critical code, uses pointers 
        /// 
        [SecurityCritical] 
        unsafe internal ushort GetReorderedGlyphs(ref ShapingWorkspace currentRun) 
        {
            ushort reorderedCount = ReorderedCount; 

            // get reordered glyphs
            if (reorderedCount > 0)
            { 
                // we have a number of diacritics that need reordering.  So, put the
                // new glyphs in the right order in the already filled glyphlist. 
                Debug.Assert (_firstCharIx + _clusterSize - (currentRun.IsFinished?1:0) <= currentRun.CurrentCharIx); 
                ushort nextGlyphIx = currentRun.GetGlyphIx( _firstCharIx );
                while ( _clusterSize > 0 ) 
                {
                    currentRun.SetGlyph(nextGlyphIx++, GetNextGlyph());
                }
 
                _clusterRequiresReordering = false;
            } 
 
            return reorderedCount;
 
        }

        /// 
        /// HebrewClusterCop.GetPrecomposedBaseVowel 
        /// 
        /// character to test 
        /// the vowel if classification is a precomposed base+vowel character, 
        ///         otherwise returns '\u0000'
        ///     0xfb1d == 0x05d9,0x05b4 
        ///     0xfb1f == 0x05f2,0x05b7
        ///     0xfb2e == 0x05d0,0x05b7
        ///     0xfb2f == 0x05d0,0x05b8
        ///  
        private char GetPrecomposedBaseVowel(char baseChar)
        { 
 
            int xorValue = (int) baseChar ^ 0xfb00;
            if (xorValue < 0x4c)        // if char is a member of [0xfb00,0xfb4b) 
            {
                byte tempValue = (byte)(xorValue ^ 0x2e);
                // (0x0fb00 ^ 0xfb1d) ^ 0x2e == 0x33
                // (0x0fb00 ^ 0xfb1f) ^ 0x2e == 0x31 
                // (0x0fb00 ^ 0xfb2e) ^ 0x2e == 0
                // (0x0fb00 ^ 0xfb2f) ^ 0x2e == 1 
                // (0x0fb00 ^ 0xfb4b) ^ 0x2e == 0x67 
                if (tempValue < 2)
                { 
                    return (tempValue == 0 ? '\u05b7' : '\u05b8');    // 0xfb2e or 0xfb2f
                }
                else if ((tempValue ^= 0x33) < 4)
                // ((0x0fb00 ^ 0xfb1d) ^ 0x2e) ^ 0x33 == 0 
                // ((0x0fb00 ^ 0xfb1f) ^ 0x2e) ^ 0x33 == 2
                // ((0x0fb00 ^ 0xfb4b) ^ 0x2e) ^ 0x33 == 0x54 
                { 
                    if (tempValue == 0)
                    { 
                        return '\u05b4';    // 0xfb1d
                    }
                    else if (tempValue == 2)
                    { 
                        return '\u05b7';    // 0xfb1f
                    } 
 
                }
                else if (tempValue == 0x54) 
                {
                    return '\u05b9';        // 0xfb4b
                }
            } 

            return '\u0000'; 
        } 

        ///  
        /// HebrewClusterCop.IsIllegalMark - process mark
        /// 
        /// 
        /// This routine is used to validate a mark.  It checks 
        /// for number of marks in the class (2 maximum) and for
        /// duplicates. 
        /// 
        /// the current char
        /// the char's class 
        /// 
        ///     false if can add accent.
        /// 
        ///  
        /// Critical - calls critical code, uses pointers
        ///  
        [SecurityCritical] 
        unsafe private bool IsIllegalMark(char unicodeChar, HebrewCharClass charClass)
        { 
            ushort validationPosition;
            char validationValue;

            if ( IsVowel(charClass) ) 
            {
                validationPosition = (ushort)HebrewReorderPosition.VowelValidation; 
 
                // this is a vowel.  If its the first vowel, its good.
                validationValue = _clusterChars[ validationPosition ]; 
                if (validationValue == '\u0000')
                {
                    validationValue = charClass == HebrewCharClass.Hataf ? '\uffff' : unicodeChar;
                } 
                else
                { 
                    // if this is the third vowel (only two allowed) or this char is a 
                    // duplicate or if either vowels are hatafs, then this is not a valid
                    // vowel 
                    if (validationValue == '\uffff' ||
                        validationValue == unicodeChar ||
                        charClass == HebrewCharClass.Hataf)
                    { 
                        return true;
                    } 
 
                    validationValue = '\uffff';
                } 
            }
            else
            {
                validationPosition = charClass == HebrewCharClass.Puncta ? 
                    (ushort)HebrewReorderPosition.PunctaValidation : (ushort)HebrewReorderPosition.MarkValidation;
 
                // this is a punta or mark.  If its the first one, its good. 
                validationValue = _clusterChars[ validationPosition ];
                if (validationValue == '\u0000') 
                {
                    validationValue = unicodeChar;
                }
                else 
                {
 
                    // if this is the third (only two allowed) or this char is a 
                    // duplicate then this is not a valid mark
                    if (validationValue == '\uffff' || validationValue == unicodeChar) 
                    {
                        return true;
                    }
 
                    validationValue = '\uffff';
                } 
 

            } 

            // save validation value for comparison against next mark
            _clusterChars[validationPosition] = validationValue;
            return false; 
        }
 
        ///  
        /// HebrewClusterCop.IsIllegalPair - Validates a base/mark combo
        ///  
        /// 
        /// Uniscribe does not verify that a given mark is legal on
        /// a given base. Thus, this behavior is different than legacy
        /// implementation.  Note that a space character is a valid 
        /// base, but we return "false" so that space char base can
        /// be differentiated in the ClusterControl.AddToCluster() 
        ///  
        /// current base char
        /// class of diacritic 
        /// false if diacritic can be attached to base
        private bool IsIllegalPair(HebrewCharClass baseClass, HebrewCharClass markClass)
        {
            return ( (CharClassToFlag[(int)baseClass] & HebrewCharClassFlags.AllBaseChars) == 0 || 
                     (CharClassToFlag[(int)markClass] & BaseDiacriticValidationTable[ (int)baseClass ] ) == 0);
        } 
 
        /// 
        /// HebrewClusterCop.IsVowel 
        /// 
        /// classification to test
        /// true if classification is a mark character
        private bool IsVowel(HebrewCharClass charClass) 
        {
            return (CharClassToFlag[(int)charClass] & HebrewCharClassFlags.AllVowels) != 0; 
        } 

        ///  
        /// HebrewClusterCop.ReorderedCount - get size of cluster
        /// 
        /// 
        /// this number doesn't include the base character. 
        /// 
        internal ushort ReorderedCount 
        { 
          get {
                  return _clusterRequiresReordering ? _clusterSize : (ushort)0; 
              }
        }

        ///  
        /// Critical - is unsafe code
        ///  
        [SecurityCritical] 
        unsafe private void ResetClusterCop ()
        { 
            // If last processed cluster did not require reordering, then
            // clusterSize will be non-zero and we need to re-initialize the
            // reordering array.  Otherwise, its already init'ed.
            // initialize the glyph array (its on the stack) 
            int i = (int)HebrewReorderPosition.ClusterCopArraySize;
            while (--i >= 0) 
            { 
              _clusterChars[i] = '\u0000';
              _clusterGlyphs[i] = 0; 
            }

            _lowerMarksCount = 0;
            _upperMarksCount = 0; 
            _clusterSize = 0;
 
            _clusterIsInvalid = _reorderingIsSuppressed = _clusterHasCGJ = _clusterRequiresReordering = false; 

            _clusterState = HebrewClusterState.Idle; 
            _lastReorderedGlyphIx = _nextReorderedGlyphIx = 0;
        }

        private HebrewReorderPosition ResolveClusterIx (HebrewClusterState newState) 
        {
            int clusterIx = 0; 
 
            // need to resolve the cluster index (this character is of
            // a class that there can be more than one of per cluster) 
            switch(newState)
            {
                case HebrewClusterState.HolamSeen:
                    clusterIx = (int)HebrewReorderPosition.HolamPosition; 
                    break;
                case HebrewClusterState.LowerMarkSeen: 
                    clusterIx = (int)HebrewReorderPosition.LowerMarksStart 
                                        + _lowerMarksCount++;
                    break; 
                case HebrewClusterState.LowerRightMarkSeen:
                    // diacritic is lower right
                    clusterIx = (int)HebrewReorderPosition.LowerRightMarkPosition;
                    break; 
                case HebrewClusterState.UpperMarkSeen:
                    clusterIx = (int)HebrewReorderPosition.UpperMarksStart 
                                        + _upperMarksCount++; 
                    break;
                case HebrewClusterState.UpperLeftMarkSeen: 
                    // diacritic is an upper left mark
                    clusterIx = (int)HebrewReorderPosition.UpperLeftMarkPosition;
                    break;
                case HebrewClusterState.PunctaSeen: 
                    break;      // leave w' clusterIx == 0...
                 default: 
                    Invariant.Assert(false,"HebrewClusterCop.AddToCluster() - invalid newState"); 
                    break;
            } 

            return (HebrewReorderPosition)clusterIx;
        }
 
        /// 
        /// HebrewClusterCop.StartCluster - start of new cluster (potentially) 
        ///  
        /// 
        /// This routine is used at the start of each cluster.  If the base + this 
        /// first non-base char are the valid start of a new cluster, return
        /// 
        /// true if more characters can be added to this cluster
        ///  
        /// Critical - calls critical code, uses unsafe accessors
        ///  
        [SecurityCritical] 
        unsafe private bool StartCluster( ref ShapingWorkspace currentRun,
                                             CharShapeInfo currShape ) 
        {

            ResetClusterCop();
 
            CharShapeInfo baseShape = currentRun.PreviousShape;  // default base shape
 
            // Keep track of our current character index for checking 
            // reordering later (when next base is detected)
            _firstCharIx = currentRun.CurrentCharIx; 

            _textHasUnicodeControlChars =
                    ((currShape & CharShapeInfo.IsUnicodeLayoutControl) != 0) ||
                    ((baseShape & CharShapeInfo.IsUnicodeLayoutControl) != 0) ; // note any ZWNJ/ZWJ chars 

            if ( _textHasUnicodeControlChars ) 
            { 
                // if this is a ZW joiner just allow whatever follows
               _reorderingIsSuppressed = true; 
            }
            else
            {
                 _baseCharClass = (HebrewCharClass)(baseShape & CharShapeInfo.ShaperClassMask); 

                  _clusterIsInvalid = IsIllegalPair (_baseCharClass, (HebrewCharClass)(currShape & CharShapeInfo.ShaperClassMask)); 
 
                  if (_clusterIsInvalid)
                  { 
                      if (currentRun.PreviousChar == UnicodeCharacter.NoBreakSpace)
                      {
                          // if the base is a NBSP, this diacritic is acceptable (and we're done).
                          _clusterIsInvalid = false; 
                          return false;         // this is the only character we'll allow in this cluster
                      } 
                  } 
                  else
                  { 
                      // the base class is good, we can start to process a cluster - so prepare.

                      // if the base character is a precomposed form that includes a vowel,
                      // get the vowel... 
                      _clusterChars[ (int)HebrewReorderPosition.VowelValidation ] =
                                                GetPrecomposedBaseVowel(currentRun.PreviousChar); 
 
                  }
            } 


            return !_clusterIsInvalid;
        } 

 
        ///  
        /// HebrewCharClassFlags - enumeration of Hebrew character classification flags
        ///  
        /// 
        /// This enum is linked to the list of char classes in the HebrewCharClass
        /// enum; keep them in [....].  ()
        /// There are several masks that are defined here: 
        ///     the "All..." masks are used to test the char class of a unicode char
        ///  
        [Flags()] 
        private enum HebrewCharClassFlags: uint
        { 
            UnknownCharClass,
            PunctuationMark    = 0x0001,
            AcceptsVowel       = 0x0002, // Thaana base glyph can have a vowel (only)
            AcceptsNotShin     = 0x0004, // Hebrew base can have cantillation | diacritic 
            AcceptsRafe        = 0x0008, // Hebrew base can have rafe | cantillation | diacritic
            AcceptsDagesh      = 0x0010, // Hebrew base can have dagesh | cantillation | diacritic 
            ShinBase           = 0x0020, // Hebrew shin can have shin/sin dot | dagesh | cantillation | diacritic 
            SpaceChar          = 0x0040, // Space char may take one mark
            AllBaseChars       = 0x003E, // mask for all possible bases 
            ShinSinDot         = 0x0080, // Hebrew shin/sin dot (05C1, 05C2)
            Dagesh             = 0x0100, // Hebrew dagesh/mapiq (05BC)
            Rafe               = 0x0200, // Hebrew Rafe (05BF)
            DageshRafe         = 0x0300, // dagesh and rafe mask 
            Holam              = 0x0400, // Hebrew Holam (05B9)
            Hataf              = 0x0800, // Hebrew hataf -- only one per base 
            ThaanaVowel        = 0x0800, // Thaana vowel -- only one per base 
            Vowel              = 0x1000, // Hebrew vowel -- only two per base
            AllVowels          = 0x1c00, // mask for vowels only 
            BelowMark          = 0x2000, // Hebrew lower accents
            LowerRightMark     = 0x4000, // Hebrew Dehi, Yetiv accent (always last in lower sequence)
            UpperMark          = 0x8000, // Hebrew upper marks
            UpperLeftMark     = 0x10000, // Hebrew zinor (upper left mark) and others 
            Puncta            = 0x20000, // Hebrew upper dot (05C4)
            AllLowerAccents   = 0x06000,  // mask for lower cantillation marks only 
            AllAccents        = 0x1e000,  // mask for cantillation marks only 
            MasoraCircle      = 0x40000, // Hebrew cantillation - MasoraCircle Circle
            CombiningMark     = 0x80000, 
            ControlChar      = 0x100000,
            AllDiacritics    = 0x0FFF80,
         AllVowelsAndAccents = 0x1FFC00 // includes ZWJ, ZWNJ
        }; 

        // BaseDiacriticValidationTable - table used for validating a given base/mark pair 
        // Used to quickly determine whether a given diacritic/mark are 
        // legal to add to a particular base type.
        private static HebrewCharClassFlags[] BaseDiacriticValidationTable = 
        {
           0,            // unknown chars accept no marks
           0,            // punctuation marks accept no marks
           HebrewCharClassFlags.ThaanaVowel, // base accepts vowels only (thaana bases) 
           HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.DageshRafe,
           HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.Rafe, 
           HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.Dagesh, 
           HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.DageshRafe | HebrewCharClassFlags.ShinSinDot,
           0             // space char 
        };

        private static HebrewCharClassFlags[] CharClassToFlag =
        { 
            HebrewCharClassFlags.UnknownCharClass,
            HebrewCharClassFlags.PunctuationMark, 
            HebrewCharClassFlags.AcceptsVowel, // Thaana base glyph can have a vowel (only) 
            HebrewCharClassFlags.AcceptsNotShin, // Hebrew base can have cantillation | diacritic
            HebrewCharClassFlags.AcceptsRafe, // Hebrew base can have rafe | cantillation | diacritic 
            HebrewCharClassFlags.AcceptsDagesh, // Hebrew base can have dagesh | cantillation | diacritic
            HebrewCharClassFlags.ShinBase, // Hebrew shin can have shin/sin dot | dagesh | cantillation | diacritic
            HebrewCharClassFlags.SpaceChar, // Space char may take one mark
            HebrewCharClassFlags.ShinSinDot, // Hebrew shin/sin dot (05C1, 05C2) 
            HebrewCharClassFlags.Dagesh,  // Hebrew Dagesh/mapiq (05BC)
            HebrewCharClassFlags.Rafe,      // Hebrew Rafe 
            HebrewCharClassFlags.Holam,  // Hebrew Holam 
            HebrewCharClassFlags.Hataf, // Hebrew hataf -- only one per base
            HebrewCharClassFlags.Vowel, // Hebrew vowel -- only two per base 
            HebrewCharClassFlags.BelowMark, // Hebrew cantillation - Below
            HebrewCharClassFlags.LowerRightMark, // Hebrew cantillation - Below right
            HebrewCharClassFlags.UpperMark, // Hebrew cantillation - Above marks
            HebrewCharClassFlags.UpperLeftMark, // Hebrew cantillation - Above left 
            HebrewCharClassFlags.Puncta, // Hebrew upper dot (05C4)
            HebrewCharClassFlags.MasoraCircle, // Hebrew cantillation - MasoraCircle Circle 
            HebrewCharClassFlags.CombiningMark, // combining marks (0323,0307,0308) 
            HebrewCharClassFlags.ControlChar // ZWJ, ZWNJ, CGJ
        }; 

        /// 
        /// HebrewClusterCop.HebrewClusterState: state definitions
        ///  
        private enum HebrewClusterState : byte
        { 
            Idle, 
            SinShinSeen,
            DageshRafeSeen, 
            HolamSeen,
            PunctaSeen,
            LowerMarkSeen,
            LowerRightMarkSeen, 
            UpperMarkSeen,
            UpperLeftMarkSeen, 
            MasoraCircleSeen, 
            CombiningMarkSeen,
            UnicodeLayoutControlSeen 
        }

        // reordering array positions...
        private enum HebrewReorderPosition : byte 
        {
            BaseCharPosition,   // not used 
            SinShinPosition, 
            DageshRafePosition,
            HolamPosition = DageshRafePosition + 2, 
            LowerMarksStart,
            // lower right marks follow all other lower marks.
            LowerRightMarkPosition = LowerMarksStart + 4,
            // upper marks follow lower marks.  There must be room for up to 
            // five lower marks (2 vowels, 2 accents, and 1 puncta )
            UpperMarksStart, 
            // upper left marks follow all other upper marks.  There must be 
            // room for one upper mark and one puncta
            UpperLeftMarkPosition = UpperMarksStart + 3, 
            // masora circle follows upper marks.  There must be room for up to
            // four upper marks (2 accents, 1 vowel (holam) and 1 puncta)
            MasoraCirclePosition,
            CombiningMarkPosition, 
            ReorderingArraySize,
            ZWControl = ReorderingArraySize,    // not added to the array 
            VowelValidation,              // for validating vowels 
            MarkValidation,               // for validating marks
            PunctaValidation,             // for validating puncta 
            ClusterCopArraySize,
            UnresolvedPosition = ClusterCopArraySize        // not in array (index too big)
        }
 
        public const int ReorderArraySize = (int)HebrewReorderPosition.ClusterCopArraySize;
 
        // this table is used to determine if reordering will be required. 
        private static readonly HebrewReorderPosition[] clusterPositionMap =
        { 
            // This maps all the Hebrew char classes to the corresponding cluster
            // state.
            HebrewReorderPosition.BaseCharPosition,    // Unknown class -- can take any diacritic
            HebrewReorderPosition.BaseCharPosition,    // PunctuationMark -- cannot take diacritics 
            HebrewReorderPosition.BaseCharPosition,    // Thaana base glyph can have single vowel
            HebrewReorderPosition.BaseCharPosition,    // Hebrew base can have cantillation | vowel 
            HebrewReorderPosition.BaseCharPosition,    // Hebrew base can have rafe | cantillation | vowel 
            HebrewReorderPosition.BaseCharPosition,    // Hebrew base can have dagesh | cantillation | vowel
            HebrewReorderPosition.BaseCharPosition,    // Hebrew shin can have shin/sin dot && dagesh && rafe && cantillation && vowel 
            HebrewReorderPosition.BaseCharPosition,    // space char
            HebrewReorderPosition.SinShinPosition,     // Hebrew shin/sin dot (05C1, 05C2)
            HebrewReorderPosition.DageshRafePosition,  // Hebrew dagesh/mapiq (05BC)
            HebrewReorderPosition.DageshRafePosition,  // Hebrew rafe 
            HebrewReorderPosition.UnresolvedPosition,  // Hebrew holam
            HebrewReorderPosition.UnresolvedPosition,  // Hebrew hataf -- only one per base 
            HebrewReorderPosition.UnresolvedPosition,  // Hebrew/Thaana vowel -- only two per base 
            HebrewReorderPosition.UnresolvedPosition,  // Hebrew cantillation - Below
            HebrewReorderPosition.UnresolvedPosition,  // Hebrew cantillation - Below right 
            HebrewReorderPosition.UnresolvedPosition,  // Hebrew cantillation - Above
            HebrewReorderPosition.UnresolvedPosition,  // Hebrew cantillation - Above left
            HebrewReorderPosition.UnresolvedPosition,  // puncta seen
            HebrewReorderPosition.MasoraCirclePosition,// Hebrew cantillation - MasoraCircle Circle 
            HebrewReorderPosition.CombiningMarkPosition,   // combining marks
            HebrewReorderPosition.ZWControl 
            // extent of array; NumberOfHebrewCharClasses members 
        };
 
        // this table is used to determine if reordering will be required.
        private static readonly HebrewClusterState[] clusterStateMap =
        {
            // This maps all the Hebrew char classes to the corresponding cluster 
            // state.
            HebrewClusterState.Idle,     // Unknown class -- can take any diacritic 
            HebrewClusterState.Idle,     // PunctuationMark -- cannot take diacritics 
            HebrewClusterState.Idle,     // Thaana base glyph can have single vowel
            HebrewClusterState.Idle,     // Hebrew base can have cantillation | vowel 
            HebrewClusterState.Idle,     // Hebrew base can have rafe | cantillation | vowel
            HebrewClusterState.Idle,     // Hebrew base can have dagesh | cantillation | vowel
            HebrewClusterState.Idle,     // Hebrew shin can have shin/sin dot | dagesh | cantillation | vowel
            HebrewClusterState.Idle,     // base of shin/sin dot | dagesh | cantillation | vowel 
            HebrewClusterState.SinShinSeen, // Hebrew shin/sin dot (05C1, 05C2)
            HebrewClusterState.DageshRafeSeen,// Hebrew dagesh/mapiq (05BC) 
            HebrewClusterState.DageshRafeSeen,// Hebrew rafe 
            HebrewClusterState.HolamSeen,    // Hebrew holam
            HebrewClusterState.LowerMarkSeen,// Hebrew hataf -- only one per base 
            HebrewClusterState.LowerMarkSeen,// Hebrew/Thaana vowel -- only two per base
            HebrewClusterState.LowerMarkSeen,// Hebrew cantillation - Below
            HebrewClusterState.LowerRightMarkSeen, // Hebrew cantillation - Below right
            HebrewClusterState.UpperMarkSeen, // Hebrew cantillation - Above 
            HebrewClusterState.UpperLeftMarkSeen, // Hebrew cantillation - Above left
            HebrewClusterState.PunctaSeen,    // puncta seen 
            HebrewClusterState.MasoraCircleSeen, // Hebrew cantillation - MasoraCircle Circle 
            HebrewClusterState.CombiningMarkSeen,
            HebrewClusterState.UnicodeLayoutControlSeen 
            // extent of array; NumberOfHebrewCharClasses members
        };
    }
 

  ///  
  /// HebrewCharClassifier - The char converter for Hebrew. 
  /// 
  internal class HebrewCharClassifier : ShaperCharacterClassifier 
  {

      public HebrewCharClassifier(ScriptTags scriptTag, GlyphTypeface fontFace) : base(scriptTag, fontFace)
      { 
        _unknownClass =   UnknownCharClass;
        _spaceClass =     (byte)HebrewCharClass.SpaceChar; 
        _zwControlClass = (byte)HebrewCharClass.UnicodeLayoutControl; 
        _zwjClass =       (byte)HebrewCharClass.UnicodeLayoutControl;
        _zwnjClass =      (byte)HebrewCharClass.UnicodeLayoutControl; 
        _shyClass =       UnknownCharClass;

        if (scriptTag == ScriptTags.Hebrew)
        { 
            _firstChar     = '\u0590';     // this is the first Hebrew Unicode char
            _lastChar      = '\u05FF';     // this is the last Hebrew Unicode char 
            _xorMask       = 0x580;     // this mask is used in GetCharShapeInfo 
            _xorRange      = 0x080;     // this is used in GetCharShapeInfo
 
            _charClassTable = _hebrewCharClasses;
        }
        else if (scriptTag == ScriptTags.Thaana)
        { 
            _firstChar     = '\u0780';
            _lastChar      = '\u07BF'; 
            _xorMask       = 0x780; 
            _xorRange      = 0x040;
            _charClassTable = _thaanaCharClasses; 
        }
      }

      const char AlphabeticPresentationangeStart = '\uFB1D'; 
      const char AlphabeticPresentationRangeEnd = '\uFB4F';
 
      const char UnicodeCombiningDotAbove  =   '\u0307'; // combining dot above 
      const char UnicodeCombiningDiaresis  =   '\u0308'; // combining diaresis
      internal const char BelowPuncta               =   '\u0323'; // combining dot below 

      /// 
      /// HebrewCharClassifier.this[] - returns CharShapeInfo for the Unicode character
      ///  
      public override CharShapeInfo ToShapeInfo(char unicodeChar)
       { 
           CharShapeInfo charShape = base.ToShapeInfo(unicodeChar); 
           HebrewCharClass charClass = (HebrewCharClass)(charShape & CharShapeInfo.ShaperClassMask);
           if (charClass == HebrewCharClass.UnknownCharClass) 
           {
               if (unicodeChar >= AlphabeticPresentationangeStart && unicodeChar <= AlphabeticPresentationRangeEnd)
               {
                   charShape = (CharShapeInfo) 
                            _alphabeticPresentationFormsCharClasses[unicodeChar - AlphabeticPresentationangeStart];
               } 
               else if ( unicodeChar == UnicodeCombiningDotAbove || 
                        unicodeChar == UnicodeCombiningDiaresis)
               { 
                   charShape = (CharShapeInfo) HebrewCharClass.UnicodeCombiningMark;
               }
               else if (unicodeChar == BelowPuncta)
               { 
                   charShape = (CharShapeInfo) HebrewCharClass.Puncta;
               } 
               else if (unicodeChar == UnicodeCharacter.CGJ) 
               {
                   charShape = (CharShapeInfo) HebrewCharClass.UnicodeCGJ | CharShapeInfo.IsUnicodeLayoutControl; 
               }

           }
 
           return charShape;
 
       } 

 

#region Classification Tables

        // these consts are so the tables below will be more readable 
        private const HebrewCharClass StartOfCluster = (HebrewCharClass)CharShapeInfo.IsStartOfCluster;
        private const byte UnknownCharClass = (byte)(HebrewCharClass.UnknownCharClass | StartOfCluster); 
        private const byte PunctuationMark = (byte)(HebrewCharClass.PunctuationMark | StartOfCluster); 
        private const byte BaseAcceptsVowel = (byte)(HebrewCharClass.BaseAcceptsVowel | StartOfCluster);
        private const byte BaseAcceptsAll = (byte)(HebrewCharClass.BaseAcceptsNotShin | StartOfCluster); 
        private const byte BaseAcceptsRafe = (byte)(HebrewCharClass.BaseAcceptsRafe | StartOfCluster);
        private const byte BaseAcceptsDagesh = (byte)(HebrewCharClass.BaseAcceptsDagesh | StartOfCluster);
        private const byte ShinBaseChar = (byte)(HebrewCharClass.ShinBaseChar | StartOfCluster);
        private const byte ShinSinDot = (byte)HebrewCharClass.ShinSinDot; 
        private const byte Dagesh = (byte)HebrewCharClass.Dagesh;
        private const byte Rafe = (byte)HebrewCharClass.Rafe; 
        private const byte Holam = (byte)HebrewCharClass.Holam; 
        private const byte Hataf = (byte)HebrewCharClass.Hataf;
        private const byte ThaanaVowel = (byte)HebrewCharClass.Hataf; 
        private const byte Vowel = (byte)HebrewCharClass.Vowel;
        private const byte Meteg = (byte)HebrewCharClass.LowerMark;
        private const byte BelowCenterRightMark = (byte)HebrewCharClass.LowerMark;
        private const byte BelowCenterLeftMark = (byte)HebrewCharClass.LowerMark; 
        private const byte BelowLeftMark = (byte)HebrewCharClass.LowerMark;
        private const byte Puncta = (byte)HebrewCharClass.Puncta; 
        private const byte LowerRightMark = (byte)HebrewCharClass.LowerRightMark; 
        private const byte AboveRightMark = (byte)HebrewCharClass.UpperMark;
        private const byte AboveCenterRightMark = (byte)HebrewCharClass.UpperMark; 
        private const byte AboveCenterMark = (byte)HebrewCharClass.UpperMark;
        private const byte AboveCenterLeftMark = (byte)HebrewCharClass.UpperMark;
        private const byte UpperLeftMark = (byte)HebrewCharClass.UpperLeftMark;
        private const byte MasoraCircle = (byte)HebrewCharClass.MasoraCircle; 

        // All the Hebrew Unicode chars (U+590 - U+5FF) classified 
         private static readonly byte[] _hebrewCharClasses  = //new HebrewCharClass[] 
         {
                        // U+590 (Hebrew Unicode) 
            // 0, 4, 8, C       1, 5, 9, D        2, 6, A, E         3, 7, B, F
            UnknownCharClass,   BelowCenterLeftMark,AboveCenterMark, AboveCenterMark,
            AboveCenterMark,    AboveCenterMark,  BelowCenterRightMark, AboveCenterLeftMark,
            AboveCenterLeftMark,UpperLeftMark,    LowerRightMark,    BelowLeftMark, 
            AboveCenterLeftMark,AboveRightMark,   AboveCenterLeftMark,AboveCenterLeftMark,
 
            // U+5A0 (Hebrew Unicode) 
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E        3, 7, B, F
            AboveRightMark,     AboveCenterLeftMark,UnknownCharClass,BelowCenterLeftMark, 
            BelowCenterRightMark,BelowLeftMark,    BelowCenterLeftMark,BelowCenterRightMark,
            AboveCenterLeftMark,UpperLeftMark,     BelowCenterRightMark,AboveCenterMark,
            AboveCenterLeftMark,LowerRightMark,    UpperLeftMark,    MasoraCircle,
 
            // U+5B0 (Hebrew Unicode)
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E        3, 7, B, F 
            Vowel,              Hataf,             Hataf,            Hataf, 
            Vowel,              Vowel,             Vowel,            Vowel,
            Vowel,              Holam,             UnknownCharClass, Vowel, 
            Dagesh,             Meteg,             PunctuationMark,  Rafe,

            // U+5C0 (Hebrew Unicode)
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E        3, 7, B, F 
            PunctuationMark,    ShinSinDot,        ShinSinDot,       PunctuationMark,
            Puncta,             UnknownCharClass,  UnknownCharClass, UnknownCharClass, 
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass, UnknownCharClass, 
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass, UnknownCharClass,
 
            // U+5D0 (Hebrew Unicode)
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll,
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll, 
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll,
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll, 
 
            // U+5E0 (Hebrew Unicode)
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F 
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll,
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll,
            BaseAcceptsAll,     ShinBaseChar,      BaseAcceptsAll,    UnknownCharClass,
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass,  UnknownCharClass, 

            // U+5F0 (Hebrew Unicode) 
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F 
            BaseAcceptsRafe,    BaseAcceptsRafe,   BaseAcceptsRafe,   PunctuationMark,
            PunctuationMark,    UnknownCharClass,  UnknownCharClass,  UnknownCharClass, 
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass,  UnknownCharClass,
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass,  UnknownCharClass,
        };
 
        // All the Thaana Unicode chars (U+780 - U+7BF) classifications
        private static readonly byte[] _thaanaCharClasses = // new HebrewCharClass[] 
        { 
            // U+780 (Thaana Unicode)
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F 
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel,
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel,
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel,
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel, 

            // U+790 (Thaana Unicode) 
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F 
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel,
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel, 
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel,
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel,

            // U+7A0 (Thaana Unicode) 
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F
            BaseAcceptsVowel,   BaseAcceptsVowel,  BaseAcceptsVowel,  BaseAcceptsVowel, 
            BaseAcceptsVowel,   BaseAcceptsVowel,  ThaanaVowel,       ThaanaVowel, 
            ThaanaVowel,        ThaanaVowel,       ThaanaVowel,       ThaanaVowel,
            ThaanaVowel,        ThaanaVowel,       ThaanaVowel,       ThaanaVowel, 

            // U+7B0 (Thaana Unicode)
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F
            ThaanaVowel,        BaseAcceptsVowel,  UnknownCharClass,  UnknownCharClass, 
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass,  UnknownCharClass,
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass,  UnknownCharClass, 
            UnknownCharClass,   UnknownCharClass,  UnknownCharClass,  UnknownCharClass, 
        };
 
        // All the Hebrew Alphabetic Presentation Forms Unicode chars (U+FB1D - U+FB4F)
        private static readonly byte[] _alphabeticPresentationFormsCharClasses = // new HebrewCharClass[]
        {
            // U+FB10 (Presentation Forms Unicode) 
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F
                                BaseAcceptsAll,    Rafe,              BaseAcceptsRafe, 
 
            // U+FB20 (Presentation Forms Unicode)
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F 
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll,
            BaseAcceptsAll,     BaseAcceptsAll,    BaseAcceptsAll,    BaseAcceptsAll,
            BaseAcceptsAll,     PunctuationMark,   BaseAcceptsAll,    BaseAcceptsAll,
            BaseAcceptsRafe,    BaseAcceptsRafe,   BaseAcceptsAll,    BaseAcceptsAll, 

            // U+FB30 (Presentation Forms Unicode) 
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F 
            BaseAcceptsRafe,    BaseAcceptsRafe,   BaseAcceptsRafe,   BaseAcceptsRafe,
            BaseAcceptsRafe,    BaseAcceptsRafe,   BaseAcceptsRafe,   UnknownCharClass, 
            BaseAcceptsRafe,    BaseAcceptsRafe,   BaseAcceptsRafe,   BaseAcceptsRafe,
            BaseAcceptsRafe,    UnknownCharClass,  BaseAcceptsRafe,   UnknownCharClass,

            // U+FB40 (Presentation Forms Unicode) 
            // 0, 4, 8, C       1, 5, 9, D         2, 6, A, E         3, 7, B, F
            BaseAcceptsRafe,    BaseAcceptsRafe,   UnknownCharClass,  BaseAcceptsRafe, 
            BaseAcceptsRafe,    UnknownCharClass,  BaseAcceptsRafe,   BaseAcceptsRafe, 
            BaseAcceptsRafe,    BaseAcceptsRafe,   BaseAcceptsRafe,   BaseAcceptsAll,
            BaseAcceptsDagesh,  BaseAcceptsDagesh, BaseAcceptsDagesh, UnknownCharClass, 

        };
#endregion  // end of Classification Tables
    } 

} 

// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// Copyright (c) Microsoft Corporation. All rights reserved.

                        

Link Menu

Network programming in C#, Network Programming in VB.NET, Network Programming in .NET
This book is available now!
Buy at Amazon US or
Buy at Amazon UK