Itemizer.cs source code in C# .NET

Source code for the .NET framework in C#

                        

Code:

/ Net / Net / 3.5.50727.3053 / DEVDIV / depot / DevDiv / releases / Orcas / SP / wpf / src / Core / CSharp / MS / Internal / Shaping / Itemizer.cs / 1 / Itemizer.cs

                            using System.Windows; 
using System;
using System.Security;
using System.Security.Permissions;
using System.Collections; 
using System.Globalization;
using System.Windows.Media.TextFormatting; 
using System.Diagnostics; 

namespace MS.Internal.Shaping 
{
    /// 
    /// Itemizer is the class responsible for the text itemization. it simply split the text
    /// into runs with Script ID, Flags, and state 
    /// to use it just call Itemizer.PrimaryItemization() with the suitable parameters
    /// Itemizer will split the text according to the classes of each character in the text. 
    /// 
    /// We have the following rules:
    ///   - Every sequence of the control characters (except joiner characters) should split 
    ///     in separate run.
    ///   - Any run has any Unicode extended character will be marked as HasExtendedCharacter.
    ///   - Every sequence of numbers should split in separate run. note this include the number
    ///     separators and terminators according to the numbers analysis. 
    ///   - The combined marks will be included in the previous item and will take a script ID
    ///     as the script ID of the item. 
    ///   - The weak characters will be merged with the pervious item. if the run starts with weak 
    ///     then those weak characters will be merged with the following strong characters.
    ///   - The different strong type characters will be in different items. it just can have 
    ///     some weak characters.
    /// 
    internal static class Itemizer
    { 
        /// 
        /// State is internal structure used by the intemizer. note this structure will 
        /// be allocated on the stack so the itemizer calls will be mutlithreaded safe. 
        /// 
        private struct State 
        {
            public void Initialize(CharacterBufferRange text, byte mask, CultureInfo digitCulture)
            {
                _lastZWJ           =-1; 
                _lastWeak          =-1;
                _weakRunStart      =-1; 
                _lastJoiner        =-1; 
                _numberSignIndex   =-1;
                _digitCulture = digitCulture; 

                int firstChar = text[0];
                if ((firstChar & 0xFC00) == 0xD800 && (text.Length>1) && (text[1] & 0xFC00) == 0xDC00)
                { 
                    firstChar = (((firstChar & 0x03ff) << 10) | (text[1] & 0x3ff)) + 0x10000;
                } 
 
                short unicodeClass = Classification.GetUnicodeClass(firstChar);
 
                _currentItem         = new Item();
                _currentItem.Script  = ScriptID.Latin;

                _previousItem      = new Item(); 
                _previousItem.Script = (ScriptID) Classification.CharAttributeOf((int)unicodeClass).Script;
                _previousBreak = (CharBreakingType)(Classification.CharAttributeOf((int)unicodeClass).BreakType & mask); 
 
                _currentItemClass  = ItemClass.StrongClass;
                _previousItemClass = ItemClass.StrongClass; 
                _items             = new SpanVector(null);
            }

            public int              _ch;                    // The current parsing character 
            public int              _lastZWJ;               // position of last ZWJ character
            public int              _lastWeak;              // position of last weak character 
            public int              _weakRunStart;          // hold the position of the run start in case it start with weak character 
            public int              _charIndex;             // the current character index
            public int              _nextStart;             // next item start. 
            public int              _lastJoiner;            // the last position of joiner character
            public int              _lastCharLength;        // previous charcater length (2 for surrogate, 1 otherwise)
            public int              _previousStart;         // The start of previous item
            public int              _startOfCS;             // start of common seperator characters 
            public int              _startOfES;             // start of European seperator characters
            public int              _startOfET;             // start of European terminator characters 
            public int              _numberSignIndex;       // index of the number sign 
            public Item             _currentItem;           // the current item
            public Item             _previousItem;          // the previous item 
            public ItemClass        _currentItemClass;      // current character class
            public ItemClass        _previousItemClass;     // previous character class
            public DigitFlags       _digitFlags;            // flag hold the last digit status
            public SpanVector       _items;                 // the items vector. 
            public ScriptID         _digitSeperatorScript;  // the script of CS/ES characters
            public CharBreakingType _previousBreak;         // previous break type 
            public CultureInfo      _digitCulture;         // culture to use if ScriptID.Digit (for number substitution) 
        }
 
        /// 
        /// PrimaryItemization is the main Itemizer method which doing the whole work.
        /// 
        internal static SpanVector PrimaryItemization( 
            CharacterBufferRange text,
            bool                 rightToLeft, 
            CultureInfo          digitCulture 
            )
        { 
            int wordCount = 0;
            Debug.Assert(((int) ItemClass.DigitClass == 0 && (int) ItemClass.ETClass == 4),
                        ("Don't change the ItemClass members order"));
 
            byte mirroringMask;
            if (rightToLeft) 
            { 
                mirroringMask = 0xFF;
            } 
            else
            {
                mirroringMask = 0xFF & (~((byte) CharBreakingType.PairMirrorBreak |
                (byte) CharBreakingType.SingleMirrorBreak)); 
            }
 
            State  state = new State(); // all fields initialized to Zeros here 
            state.Initialize(text, mirroringMask, digitCulture);
            CharBreakingType breakType = state._previousBreak; 

            while (state._charIndex < text.Length)
            {
                wordCount = 1; 
                state._ch = text[state._charIndex];
                if ((state._ch & 0xFC00) == 0xD800 && (state._charIndex < text.Length-1) && (text[state._charIndex+1] & 0xFC00) == 0xDC00) 
                { 
                    wordCount = 2;
                    state._ch = (((state._ch & 0x03ff) << 10) | (text[state._charIndex+1] & 0x3ff)) + 0x10000; 
                }

                ItemFlags currentItemFlags = ItemFlags.Default;
 
                short unicodeClass = Classification.GetUnicodeClass(state._ch);
                state._currentItem.Script = (ScriptID)Classification.CharAttributeOf((int) unicodeClass).Script; 
                if ((Classification.CharAttributeOf((int)unicodeClass).Flags & (byte)CharacterAttributeFlags.CharacterExtended) != 0) 
                    currentItemFlags |= ItemFlags.HasExtendedCharacter;
                state._currentItemClass = (ItemClass)Classification.CharAttributeOf((int)unicodeClass).ItemClass; 

                //
                // Extended characters can only be processed by surrogate shaper. However, the surrogate shaper is not suitable
                // for non-extended ones because it doesn't apply GSUB features. So in order to send purely the extended characters 
                // to surrogate shaper, such characters are grouped into a separate item with the HasExtendedCharacter flag.
                // 
                if (state._currentItemClass == ItemClass.StrongClass && 
                    state._currentItem.Script == state._previousItem.Script &&
                    (state._previousItem.Flags & ItemFlags.HasExtendedCharacter) == (currentItemFlags & ItemFlags.HasExtendedCharacter) 
                   )
                {
                    state._charIndex          += wordCount;
                    state._lastCharLength      = wordCount; 
                    state._previousItemClass   = ItemClass.StrongClass;
                    state._previousItem.Flags |= currentItemFlags; 
                    continue; 
                }
 
                breakType = (CharBreakingType) (Classification.CharAttributeOf((int) unicodeClass).BreakType & mirroringMask);

                if (   state._previousBreak != breakType
                   ||  (state._previousItem.Flags & ItemFlags.HasExtendedCharacter) != (currentItemFlags & ItemFlags.HasExtendedCharacter) 
                   ) // break item here
                { 
                    state._nextStart = state._charIndex; 
                    if (state._previousBreak == CharBreakingType.DigitBreak)
                    { 
                        // We have numerics class followed by any other type.
                        NumbericfollowedByOther(ref state, (breakType & CharBreakingType.NoBreak) == 0);
                    }
                } 

                (ItemizerState[ (int) state._currentItemClass])(ref state); 
 
                if (state._nextStart > state._previousStart)
                { 
                    BreakRun(ref state, state._nextStart - state._previousStart);
                    state._previousBreak = breakType;
                }
 
                state._previousItemClass = state._currentItemClass;
                state._charIndex += wordCount; 
                state._lastCharLength = wordCount; 
                state._previousItem.Flags |= currentItemFlags;
            } 

            if (text.Length > state._previousStart)
            {
                if (state._previousBreak == CharBreakingType.DigitBreak) 
                {
                    // We have numerics class followed by any other type. 
                    NumbericfollowedByOther(ref state, false); 
                }
                else 
                    CheckMirroring (ref state);

                // Create the last item
                BreakRun(ref state, text.Length - state._previousStart); 
            }
 
 
            #if DEBUG
            int spanLength = 0; 
            foreach (Span s in state._items)
            {
                spanLength += s.length;
            } 

            Debug.Assert(spanLength == text.Length, ("total itemized text length not equal the original text length.")); 
            #endif 

            return state._items; 
        }


        private static void CheckMirroring (ref State state) 
        {
            if ((state._previousBreak & (CharBreakingType.PairMirrorBreak | CharBreakingType.SingleMirrorBreak)) != 0) 
            { 
                state._previousItem.Script = ScriptID.Mirror;
                if (state._previousBreak == CharBreakingType.PairMirrorBreak) 
                    state._previousItem.Flags |= ItemFlags.PairMirrorForm;
                else if (state._previousBreak == CharBreakingType.SingleMirrorBreak)
                    state._previousItem.Flags |= ItemFlags.SingleMirrorForm;
            } 
        }
 
        ///  
        /// This is a helper method which break the text from the start position with the length
        /// Also it creates and intialize the new Item. 
        /// 
        private static void BreakRun(ref State state, int length)
        {
            CheckMirroring (ref state); 

            state._previousItem.DigitCulture = null; 
 
            if (state._previousItem.Script == ScriptID.Digit)
            { 
                // Digit item requires appropriate culture for number substitution.
                state._previousItem.DigitCulture = state._digitCulture;

                if (state._previousItem.DigitCulture == null) 
                {
                    // Shape digits w/o number substitution in latin shaping engine, so that 
                    // digit item will automatically merge with the surrounding latin item 
                    // to form more complete context (e.g. for runs like "1", "/" and "2",
                    // stacked fraction works on all of them together). 
                    state._previousItem.Script = ScriptID.Latin;
                }
            }
 
            state._items.SetValue(state._previousStart, length, state._previousItem);
 
            state._previousItem = new Item(); 
            state._previousItem.Script      = state._currentItem.Script;
            state._currentItem.Flags        = ItemFlags.Default; 
            state._previousStart            = state._nextStart;

            if (state._lastZWJ>0 && state._lastZWJ == state._nextStart - state._lastCharLength)
            { 
                state._previousItem.Flags |= ItemFlags.LeadingJoin;
            } 
            else 
            {
                state._previousItem.Flags &= ~ItemFlags.LeadingJoin; 
            }

            // if the new run starts with weak character then remember its start poition
            // so we can merge it into strong class characters. 
            if (state._currentItemClass == ItemClass.WeakClass &&
                state._charIndex == state._previousStart) 
            { 
                state._weakRunStart = state._previousStart;
            } 
        }

        private const int U_ZWJ = 0x200D;
 
        private delegate void ItemizerStateDelegate(ref State state);
 
        ///  
        /// DigitClassState will handle the Digit class characters.
        ///  
        private static void DigitClassState(ref State state)
        {
            // Don't break the item
            // state._nextStart = state._previousStart; 

            ItemizerStateDelegate digitFSM  = 
                DigitFSM[Math.Min((int) state._previousItemClass, (int)(ItemClass.ETClass + 1)) , 
                (int) state._currentItemClass];
 
            if (digitFSM != null)
            {
                digitFSM(ref state);
            } 
        }
 
        ///  
        /// StrongClassState will handle the strong class characters.
        ///  
        private static void StrongClassState(ref State state)
        {
            if (state._previousItem.Script != state._currentItem.Script)
            { 
                if (state._weakRunStart != state._previousStart)
                { 
                    // the previous run are not started with weak characters then break it 
                    state._nextStart = state._charIndex;
                } 
                else
                {
                    // in case the previous run start with weak character then merge it with the strong characters.
                    // And set the item script to current script so we speed up the processing of incomming strong characters. 
                    state._previousItem.Script = state._currentItem.Script;
                } 
 
                // reset the weak start
                state._weakRunStart = -1; 
            }
        }

        ///  
        /// WeakClassState will handle the weak class characters.
        ///  
        private static void WeakClassState(ref State state) 
        {
            state._lastWeak = state._charIndex; 
        }

        /// 
        /// SimpleMarkClassState will handle the combining marks characters. 
        /// 
        private static void SimpleMarkClassState(ref State state) 
        { 
            state._previousItem.Flags |= ItemFlags.HasCombiningMark;
        } 

        /// 
        /// ComplexMarkClassState will handle the complex marks characters. like
        /// the Arabic diacritics. 
        /// 
        private static void ComplexMarkClassState(ref State state) 
        { 
            if (state._previousItem.Script != state._currentItem.Script &&
                (state._previousItem.Script != ScriptID.Syriac ||  state._currentItem.Script != ScriptID.Arabic)) 
            {
                if (state._lastJoiner == state._charIndex-state._lastCharLength)
                {
                    state._nextStart = state._lastWeak; 
                    if (state._nextStart <= state._previousStart)
                    { 
                        state._previousItem.Script = state._currentItem.Script; 
                    }
                } 
                else
                {
                    state._nextStart = state._charIndex;
                } 
            }
        } 
 
        /// 
        /// ControlClassState will handle the control characters. like 
        /// code point 0x200f
        /// 
        private static void ControlClassState(ref State state)
        { 
        }
 
        ///  
        /// JoinerClassState will handle the Joiner characters. like
        /// code point 0x200D 
        /// 
        private static void JoinerClassState(ref State state)
        {
            if (state._lastWeak == state._charIndex - state._lastCharLength) 
            {
                state._lastJoiner = state._charIndex; 
            } 

            if (state._ch == U_ZWJ)    // Zero width joiner 
            {
                state._lastZWJ = state._charIndex;
            }
        } 

 
        ///  
        /// NumberSignClassState will handle the Arabic number signs (0x0600~0x0603, 0x06DD)
        ///  
        private static void NumberSignClassState(ref State state)
        {
            StrongClassState(ref state);
            state._numberSignIndex = state._charIndex; 
        }
 
        private static ItemizerStateDelegate digitClassDelegate = new ItemizerStateDelegate(DigitClassState); 

        ///  
        /// This is the delegate table which hold the FSM for the main itemizer.
        /// 
        private static ItemizerStateDelegate [] ItemizerState = new ItemizerStateDelegate[(int) ItemClass.MaxClass]
        { 
            digitClassDelegate,
            digitClassDelegate, 
            digitClassDelegate, 
            digitClassDelegate,
            digitClassDelegate, 
            new ItemizerStateDelegate(StrongClassState),
            new ItemizerStateDelegate(WeakClassState),
            new ItemizerStateDelegate(SimpleMarkClassState),
            new ItemizerStateDelegate(ComplexMarkClassState), 
            new ItemizerStateDelegate(ControlClassState),
            new ItemizerStateDelegate(JoinerClassState), 
            new ItemizerStateDelegate(NumberSignClassState) 
        };
 
        // The following types are used for handling the numbers analysis inside the itemizer.
        // we have a small finite state machine for numbers which work inside the itemizer finite
        // state machine. I made like that to keep itemizer finite state machine small as possible
        // and not to have to many states depending on each other. 

 
 
        /// 
        /// DigitFlags enum used as digit flag. 
        /// 
        internal enum DigitFlags
        {
            ///  
            /// NONE means not a number.
            ///  
            NONE, 
            /// 
            /// AN means Arabic number. 
            /// 
            AN,
            /// 
            /// EN means European number. 
            /// 
            EN, 
            ///  
            /// ET means European terminator.
            ///  
            ET,
            /// 
            /// ENCS means European number followed by common seperator.
            ///  
            ENCS,
            ///  
            /// ENES means European number followed by European seperator. 
            /// 
            ENES, 
            /// 
            /// ENET means European number followed by European terminator.
            /// 
            ENET, 
            /// 
            /// ANCS means Arabic number followed by common seperator. 
            ///  
            ANCS
        } 

        /// 
        /// EnAnStateDelegate handle the case En followed by An.
        ///  
        private static void EnAnStateDelegate(ref State state)
        { 
            state._nextStart     = state._charIndex; 
            state._digitFlags    = DigitFlags.AN;
        } 

        /// 
        /// EnCsStateDelegate handle the case En followed by Cs.
        ///  
        private static void EnCsStateDelegate(ref State state)
        { 
            state._startOfCS     = state._charIndex; 
            state._digitFlags    = DigitFlags.ENCS;
            state._digitSeperatorScript = state._currentItem.Script; 
        }

        /// 
        /// EnEsStateDelegate handle the case En followed by Es. 
        /// 
        private static void EnEsStateDelegate(ref State state) 
        { 
            state._startOfES     = state._charIndex;
            state._digitFlags    = DigitFlags.ENES; 
            state._digitSeperatorScript = state._currentItem.Script;
        }

        ///  
        /// EnEtStateDelegate handle the case En followed by Et.
        ///  
        private static void EnEtStateDelegate(ref State state) 
        {
            state._startOfET    = state._nextStart; 
            state._digitFlags   = DigitFlags.ENET;
        }

        ///  
        /// AnEnStateDelegate handle the case An followed by En.
        ///  
        private static void AnEnStateDelegate(ref State state) 
        {
            state._nextStart     = state._charIndex; 
            state._digitFlags    = DigitFlags.EN;
        }

        ///  
        /// AnCSStateDelegate handle the case An followed by Cs.
        ///  
        private static void AnCSStateDelegate(ref State state) 
        {
            state._startOfCS     = state._charIndex; 
            state._digitFlags    = DigitFlags.ANCS;
            state._digitSeperatorScript = state._currentItem.Script;
        }
 
        /// 
        /// AnEsStateDelegate handle the case An followed by Es. 
        ///  
        private static void AnEsStateDelegate(ref State state)
        { 
            state._nextStart     = state._charIndex;
            state._digitFlags    = DigitFlags.NONE;
        }
 
        /// 
        /// AnEtStateDelegate handle the case An followed by Et. 
        ///  
        private static void AnEtStateDelegate(ref State state)
        { 
            state._nextStart     = state._charIndex;
            state._startOfET     = state._charIndex;
            state._digitFlags    = DigitFlags.ET;
        } 

        ///  
        /// CsEnStateDelegate handle the case Cs followed by En. 
        /// 
        private static void CsEnStateDelegate(ref State state) 
        {
            if (state._digitFlags != DigitFlags.ENCS)
            {
                state._nextStart     = state._charIndex; 
            }
 
            state._digitFlags = DigitFlags.EN; 
        }
 
        /// 
        /// CsAnStateDelegate handle the case Es followed by An.
        /// 
        private static void CsAnStateDelegate(ref State state) 
        {
            if (state._digitFlags != DigitFlags.ANCS) 
            { 
                state._nextStart     = state._charIndex;
            } 

            state._digitFlags = DigitFlags.AN;
        }
 
        /// 
        /// CsCstateDelegate handle the case Cs followed by Cs. 
        ///  
        private static void CsCstateDelegate(ref State state)
        { 
            if (state._digitFlags == DigitFlags.ENCS || state._digitFlags == DigitFlags.ANCS)
            {
                state._nextStart     = state._startOfCS;
            } 
            state._digitFlags = DigitFlags.NONE;
        } 
 
        /// 
        /// CsEsStateDelegate handle the case Cs followed by Es. 
        /// 
        private static void CsEsStateDelegate(ref State state)
        {
            if (state._digitFlags == DigitFlags.ENCS || state._digitFlags == DigitFlags.ANCS) 
            {
                state._nextStart     = state._startOfCS; 
            } 
            state._digitFlags = DigitFlags.NONE;
        } 

        /// 
        /// CsEtStateDelegate handle the case Cs followed by Et.
        ///  
        private static void CsEtStateDelegate(ref State state)
        { 
            if (state._digitFlags == DigitFlags.ENCS || state._digitFlags == DigitFlags.ANCS) 
            {
                state._nextStart     = state._startOfCS; 
            }
            state._digitFlags = DigitFlags.ET;
            state._startOfET  = state._charIndex;
        } 

        ///  
        /// EsEnStateDelegate handle the case Es followed by En. 
        /// 
        private static void EsEnStateDelegate(ref State state) 
        {
            if (state._digitFlags != DigitFlags.ENES)
            {
                state._nextStart     = state._charIndex; 
            }
            state._digitFlags = DigitFlags.EN; 
        } 

        ///  
        /// EsAnStateDelegate handle the case Es followed by An.
        /// 
        private static void EsAnStateDelegate(ref State state)
        { 
            if (state._digitFlags == DigitFlags.ENES)
            { 
                BreakRun(ref state, state._startOfES - state._previousStart); 
                state._previousItem.Script = ScriptID.Latin;
                state._previousStart = state._startOfES; 
            }

            state._nextStart        = state._charIndex;
            state._digitFlags       = DigitFlags.AN; 
        }
 
        ///  
        /// EsCsStateDelegate handle the case En followed by Cs.
        ///  
        private static void EsCsStateDelegate(ref State state)
        {
            if (state._digitFlags == DigitFlags.ENES)
            { 
                BreakRun(ref state, state._startOfES - state._previousStart);
                state._previousItem.Script = state._currentItem.Script; 
                state._previousStart = state._startOfES; 
            }
 
            state._digitFlags       = DigitFlags.NONE;
        }

        ///  
        /// EsEsStateDelegate handle the case Es followed by Es.
        ///  
        private static void EsEsStateDelegate(ref State state) 
        {
            if (state._digitFlags == DigitFlags.ENES) 
            {
                BreakRun(ref state, state._startOfES - state._previousStart);
                state._previousItem.Script = state._currentItem.Script;
                state._previousStart = state._startOfES; 
            }
 
            // we don't need to do any more run break here so reset state._nextStart 
            state._nextStart   = state._previousStart;
            state._digitFlags  = DigitFlags.NONE; 
        }

        /// 
        /// EsEtStateDelegate handle the case Es followed by Et. 
        /// 
        private static void EsEtStateDelegate(ref State state) 
        { 
            if (state._digitFlags == DigitFlags.ENES)
            { 
                BreakRun(ref state, state._startOfES - state._previousStart);
                state._previousItem.Script = state._currentItem.Script;
                state._previousStart = state._startOfES;
            } 

            state._startOfET     = state._charIndex; 
            state._digitFlags    = DigitFlags.ET; 
        }
 
        /// 
        /// EtEnStateDelegate handle the case Et followed by En.
        /// 
        private static void EtEnStateDelegate(ref State state) 
        {
            if (state._digitFlags == DigitFlags.ENET) 
                state._nextStart = state._charIndex; 
            else
                state._nextStart = state._startOfET; 

            state._digitFlags          = DigitFlags.EN;

            // we may have state._previousStart point to previous non-digit run 
            // so overwite the previous script only if state._previousStart point
            // to the start of the digit run. 
            if (state._nextStart <= state._previousStart) 
            {
                // Set the script to be Digit 
                state._previousItem.Script = state._currentItem.Script;
            }
        }
 
        /// 
        /// EtAnStateDelegate handle the case Et followed by An. 
        ///  
        private static void EtAnStateDelegate(ref State state)
        { 
            state._nextStart = state._charIndex;
            state._digitFlags       = DigitFlags.AN;
        }
 
        /// 
        /// EtCsStateDelegate handle the case Et followed by Cs. 
        ///  
        private static void EtCsStateDelegate(ref State state)
        { 
            if (state._digitFlags == DigitFlags.ENET)
            {
                state._nextStart = state._charIndex;
            } 
            state._digitFlags       = DigitFlags.NONE;
        } 
 
        /// 
        /// EtEsStateDelegate handle the case Et followed by Es. 
        /// 
        private static void EtEsStateDelegate(ref State state)
        {
            if (state._digitFlags == DigitFlags.ENET) 
            {
                state._nextStart = state._charIndex; 
            } 
            state._digitFlags       = DigitFlags.NONE;
        } 

        /// 
        /// OtherEnStateDelegate handle the case none number class followed by En.
        ///  
        private static void OtherEnStateDelegate(ref State state)
        { 
            if (state._numberSignIndex >= 0 && 
                state._charIndex - state._lastCharLength == state._numberSignIndex)
            { 
                state._nextStart     = state._numberSignIndex;
                // Set digit breaking so we don't split subsequent numbers
                state._previousBreak = CharBreakingType.DigitBreak;
            } 
            else
            { 
                state._nextStart    = state._charIndex; 
            }
            state._digitFlags   = DigitFlags.EN; 
        }

        /// 
        /// OtherAnStateDelegate handle the case none number class followed by An. 
        /// 
        private static void OtherAnStateDelegate(ref State state) 
        { 
            if (state._numberSignIndex >= 0 &&
                state._charIndex - state._lastCharLength == state._numberSignIndex) 
            {
                state._nextStart     = state._numberSignIndex;
                // Set digit breaking so we don't split subsequent numbers
                state._previousBreak = CharBreakingType.DigitBreak; 
            }
            else 
            { 
                state._nextStart    = state._charIndex;
            } 
            state._digitFlags   = DigitFlags.AN;
        }

        ///  
        /// OtherCsStateDelegate handle the case none number class followed by Cs.
        ///  
        private static void OtherCsStateDelegate(ref State state) 
        {
            if (state._previousBreak == CharBreakingType.NoBreak) 
            {
                state._nextStart    = state._previousStart;
            }
            state._digitFlags   = DigitFlags.NONE; 
        }
 
        ///  
        /// OtherEsStateDelegate handle the case none number class followed by Es.
        ///  
        private static void OtherEsStateDelegate(ref State state)
        {
            if (state._previousBreak == CharBreakingType.NoBreak)
            { 
                state._nextStart    = state._previousStart;
            } 
            state._digitFlags   = DigitFlags.NONE; 
        }
 
        /// 
        /// OtherEtStateDelegate handle the case none number class followed by Et.
        /// 
        private static void OtherEtStateDelegate(ref State state) 
        {
            if (state._previousBreak == CharBreakingType.NoBreak) 
            { 
                state._nextStart     = state._previousStart;
            } 
            state._startOfET     = state._charIndex;
            state._digitFlags    = DigitFlags.ET;
        }
 

        ///  
        /// NumbericfollowedByOther handle the number class followed by any non number class. 
        /// note this one is not static.
        ///  
        private static void NumbericfollowedByOther(ref State state, bool runBreak)
        {
            switch (state._digitFlags)
            { 
                case DigitFlags.AN:
                case DigitFlags.EN: 
                case DigitFlags.ENET: 
                    return;
 
                case DigitFlags.ANCS:
                case DigitFlags.ENCS:
                    BreakRun(ref state, state._startOfCS - state._previousStart);
                    state._previousItem.Script = state._digitSeperatorScript; 
                    state._previousStart = state._startOfCS;
                    break; 
 
                case DigitFlags.ENES:
                    BreakRun(ref state, state._startOfES - state._previousStart); 
                    state._previousItem.Script = state._digitSeperatorScript;
                    state._previousStart = state._startOfES;
                    break;
            } 

            if (!runBreak) 
            { 
                state._nextStart   = state._previousStart; // don't break
            } 
            state._digitFlags = DigitFlags.NONE;
        }

        ///  
        /// The following will define the finite state machine for numbers.
        ///  
        private static ItemizerStateDelegate EnAnState       = new ItemizerStateDelegate(EnAnStateDelegate); 
        private static ItemizerStateDelegate EnCsState       = new ItemizerStateDelegate(EnCsStateDelegate);
        private static ItemizerStateDelegate EnEsState       = new ItemizerStateDelegate(EnEsStateDelegate); 
        private static ItemizerStateDelegate EnEtState       = new ItemizerStateDelegate(EnEtStateDelegate);
        private static ItemizerStateDelegate AnEnState       = new ItemizerStateDelegate(AnEnStateDelegate);
        private static ItemizerStateDelegate AnCSState       = new ItemizerStateDelegate(AnCSStateDelegate);
        private static ItemizerStateDelegate AnEsState       = new ItemizerStateDelegate(AnEsStateDelegate); 
        private static ItemizerStateDelegate AnEtState       = new ItemizerStateDelegate(AnEtStateDelegate);
        private static ItemizerStateDelegate CsEnState       = new ItemizerStateDelegate(CsEnStateDelegate); 
        private static ItemizerStateDelegate CsAnState       = new ItemizerStateDelegate(CsAnStateDelegate); 
        private static ItemizerStateDelegate CsCstate        = new ItemizerStateDelegate(CsCstateDelegate);
        private static ItemizerStateDelegate CsEsState       = new ItemizerStateDelegate(CsEsStateDelegate); 
        private static ItemizerStateDelegate CsEtState       = new ItemizerStateDelegate(CsEtStateDelegate);
        private static ItemizerStateDelegate EsEnState       = new ItemizerStateDelegate(EsEnStateDelegate);
        private static ItemizerStateDelegate EsAnState       = new ItemizerStateDelegate(EsAnStateDelegate);
        private static ItemizerStateDelegate EsCsState       = new ItemizerStateDelegate(EsCsStateDelegate); 
        private static ItemizerStateDelegate EsEsState       = new ItemizerStateDelegate(EsEsStateDelegate);
        private static ItemizerStateDelegate EsEtState       = new ItemizerStateDelegate(EsEtStateDelegate); 
        private static ItemizerStateDelegate EtEnState       = new ItemizerStateDelegate(EtEnStateDelegate); 
        private static ItemizerStateDelegate EtAnState       = new ItemizerStateDelegate(EtAnStateDelegate);
        private static ItemizerStateDelegate EtCsState       = new ItemizerStateDelegate(EtCsStateDelegate); 
        private static ItemizerStateDelegate EtEsState       = new ItemizerStateDelegate(EtEsStateDelegate);
        private static ItemizerStateDelegate OtherEnState    = new ItemizerStateDelegate(OtherEnStateDelegate);
        private static ItemizerStateDelegate OtherAnState    = new ItemizerStateDelegate(OtherAnStateDelegate);
        private static ItemizerStateDelegate OtherCsState    = new ItemizerStateDelegate(OtherCsStateDelegate); 
        private static ItemizerStateDelegate OtherEsState    = new ItemizerStateDelegate(OtherEsStateDelegate);
        private static ItemizerStateDelegate OtherEtState    = new ItemizerStateDelegate(OtherEtStateDelegate); 
 
        private static ItemizerStateDelegate [,] DigitFSM  = new ItemizerStateDelegate [6, 5]
        { 
                        //   En            AN              CS              Es              Et
/* En     */            { null,         EnAnState,      EnCsState,      EnEsState,      EnEtState    },
/* An     */            { AnEnState,    null,           AnCSState,      AnEsState,      AnEtState    },
/* Cs     */            { CsEnState,    CsAnState,      CsCstate,       CsEsState,      CsEtState    }, 
/* Es     */            { EsEnState,    EsAnState,      EsCsState,      EsEsState,      EsEtState    },
/* Et     */            { EtEnState,    EtAnState,      EtCsState,      EtEsState,      null         }, 
/* Other  */            { OtherEnState, OtherAnState,   OtherCsState,   OtherEsState,   OtherEtState } 
        };
 
    }
}


// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// Copyright (c) Microsoft Corporation. All rights reserved.
using System.Windows; 
using System;
using System.Security;
using System.Security.Permissions;
using System.Collections; 
using System.Globalization;
using System.Windows.Media.TextFormatting; 
using System.Diagnostics; 

namespace MS.Internal.Shaping 
{
    /// 
    /// Itemizer is the class responsible for the text itemization. it simply split the text
    /// into runs with Script ID, Flags, and state 
    /// to use it just call Itemizer.PrimaryItemization() with the suitable parameters
    /// Itemizer will split the text according to the classes of each character in the text. 
    /// 
    /// We have the following rules:
    ///   - Every sequence of the control characters (except joiner characters) should split 
    ///     in separate run.
    ///   - Any run has any Unicode extended character will be marked as HasExtendedCharacter.
    ///   - Every sequence of numbers should split in separate run. note this include the number
    ///     separators and terminators according to the numbers analysis. 
    ///   - The combined marks will be included in the previous item and will take a script ID
    ///     as the script ID of the item. 
    ///   - The weak characters will be merged with the pervious item. if the run starts with weak 
    ///     then those weak characters will be merged with the following strong characters.
    ///   - The different strong type characters will be in different items. it just can have 
    ///     some weak characters.
    /// 
    internal static class Itemizer
    { 
        /// 
        /// State is internal structure used by the intemizer. note this structure will 
        /// be allocated on the stack so the itemizer calls will be mutlithreaded safe. 
        /// 
        private struct State 
        {
            public void Initialize(CharacterBufferRange text, byte mask, CultureInfo digitCulture)
            {
                _lastZWJ           =-1; 
                _lastWeak          =-1;
                _weakRunStart      =-1; 
                _lastJoiner        =-1; 
                _numberSignIndex   =-1;
                _digitCulture = digitCulture; 

                int firstChar = text[0];
                if ((firstChar & 0xFC00) == 0xD800 && (text.Length>1) && (text[1] & 0xFC00) == 0xDC00)
                { 
                    firstChar = (((firstChar & 0x03ff) << 10) | (text[1] & 0x3ff)) + 0x10000;
                } 
 
                short unicodeClass = Classification.GetUnicodeClass(firstChar);
 
                _currentItem         = new Item();
                _currentItem.Script  = ScriptID.Latin;

                _previousItem      = new Item(); 
                _previousItem.Script = (ScriptID) Classification.CharAttributeOf((int)unicodeClass).Script;
                _previousBreak = (CharBreakingType)(Classification.CharAttributeOf((int)unicodeClass).BreakType & mask); 
 
                _currentItemClass  = ItemClass.StrongClass;
                _previousItemClass = ItemClass.StrongClass; 
                _items             = new SpanVector(null);
            }

            public int              _ch;                    // The current parsing character 
            public int              _lastZWJ;               // position of last ZWJ character
            public int              _lastWeak;              // position of last weak character 
            public int              _weakRunStart;          // hold the position of the run start in case it start with weak character 
            public int              _charIndex;             // the current character index
            public int              _nextStart;             // next item start. 
            public int              _lastJoiner;            // the last position of joiner character
            public int              _lastCharLength;        // previous charcater length (2 for surrogate, 1 otherwise)
            public int              _previousStart;         // The start of previous item
            public int              _startOfCS;             // start of common seperator characters 
            public int              _startOfES;             // start of European seperator characters
            public int              _startOfET;             // start of European terminator characters 
            public int              _numberSignIndex;       // index of the number sign 
            public Item             _currentItem;           // the current item
            public Item             _previousItem;          // the previous item 
            public ItemClass        _currentItemClass;      // current character class
            public ItemClass        _previousItemClass;     // previous character class
            public DigitFlags       _digitFlags;            // flag hold the last digit status
            public SpanVector       _items;                 // the items vector. 
            public ScriptID         _digitSeperatorScript;  // the script of CS/ES characters
            public CharBreakingType _previousBreak;         // previous break type 
            public CultureInfo      _digitCulture;         // culture to use if ScriptID.Digit (for number substitution) 
        }
 
        /// 
        /// PrimaryItemization is the main Itemizer method which doing the whole work.
        /// 
        internal static SpanVector PrimaryItemization( 
            CharacterBufferRange text,
            bool                 rightToLeft, 
            CultureInfo          digitCulture 
            )
        { 
            int wordCount = 0;
            Debug.Assert(((int) ItemClass.DigitClass == 0 && (int) ItemClass.ETClass == 4),
                        ("Don't change the ItemClass members order"));
 
            byte mirroringMask;
            if (rightToLeft) 
            { 
                mirroringMask = 0xFF;
            } 
            else
            {
                mirroringMask = 0xFF & (~((byte) CharBreakingType.PairMirrorBreak |
                (byte) CharBreakingType.SingleMirrorBreak)); 
            }
 
            State  state = new State(); // all fields initialized to Zeros here 
            state.Initialize(text, mirroringMask, digitCulture);
            CharBreakingType breakType = state._previousBreak; 

            while (state._charIndex < text.Length)
            {
                wordCount = 1; 
                state._ch = text[state._charIndex];
                if ((state._ch & 0xFC00) == 0xD800 && (state._charIndex < text.Length-1) && (text[state._charIndex+1] & 0xFC00) == 0xDC00) 
                { 
                    wordCount = 2;
                    state._ch = (((state._ch & 0x03ff) << 10) | (text[state._charIndex+1] & 0x3ff)) + 0x10000; 
                }

                ItemFlags currentItemFlags = ItemFlags.Default;
 
                short unicodeClass = Classification.GetUnicodeClass(state._ch);
                state._currentItem.Script = (ScriptID)Classification.CharAttributeOf((int) unicodeClass).Script; 
                if ((Classification.CharAttributeOf((int)unicodeClass).Flags & (byte)CharacterAttributeFlags.CharacterExtended) != 0) 
                    currentItemFlags |= ItemFlags.HasExtendedCharacter;
                state._currentItemClass = (ItemClass)Classification.CharAttributeOf((int)unicodeClass).ItemClass; 

                //
                // Extended characters can only be processed by surrogate shaper. However, the surrogate shaper is not suitable
                // for non-extended ones because it doesn't apply GSUB features. So in order to send purely the extended characters 
                // to surrogate shaper, such characters are grouped into a separate item with the HasExtendedCharacter flag.
                // 
                if (state._currentItemClass == ItemClass.StrongClass && 
                    state._currentItem.Script == state._previousItem.Script &&
                    (state._previousItem.Flags & ItemFlags.HasExtendedCharacter) == (currentItemFlags & ItemFlags.HasExtendedCharacter) 
                   )
                {
                    state._charIndex          += wordCount;
                    state._lastCharLength      = wordCount; 
                    state._previousItemClass   = ItemClass.StrongClass;
                    state._previousItem.Flags |= currentItemFlags; 
                    continue; 
                }
 
                breakType = (CharBreakingType) (Classification.CharAttributeOf((int) unicodeClass).BreakType & mirroringMask);

                if (   state._previousBreak != breakType
                   ||  (state._previousItem.Flags & ItemFlags.HasExtendedCharacter) != (currentItemFlags & ItemFlags.HasExtendedCharacter) 
                   ) // break item here
                { 
                    state._nextStart = state._charIndex; 
                    if (state._previousBreak == CharBreakingType.DigitBreak)
                    { 
                        // We have numerics class followed by any other type.
                        NumbericfollowedByOther(ref state, (breakType & CharBreakingType.NoBreak) == 0);
                    }
                } 

                (ItemizerState[ (int) state._currentItemClass])(ref state); 
 
                if (state._nextStart > state._previousStart)
                { 
                    BreakRun(ref state, state._nextStart - state._previousStart);
                    state._previousBreak = breakType;
                }
 
                state._previousItemClass = state._currentItemClass;
                state._charIndex += wordCount; 
                state._lastCharLength = wordCount; 
                state._previousItem.Flags |= currentItemFlags;
            } 

            if (text.Length > state._previousStart)
            {
                if (state._previousBreak == CharBreakingType.DigitBreak) 
                {
                    // We have numerics class followed by any other type. 
                    NumbericfollowedByOther(ref state, false); 
                }
                else 
                    CheckMirroring (ref state);

                // Create the last item
                BreakRun(ref state, text.Length - state._previousStart); 
            }
 
 
            #if DEBUG
            int spanLength = 0; 
            foreach (Span s in state._items)
            {
                spanLength += s.length;
            } 

            Debug.Assert(spanLength == text.Length, ("total itemized text length not equal the original text length.")); 
            #endif 

            return state._items; 
        }


        private static void CheckMirroring (ref State state) 
        {
            if ((state._previousBreak & (CharBreakingType.PairMirrorBreak | CharBreakingType.SingleMirrorBreak)) != 0) 
            { 
                state._previousItem.Script = ScriptID.Mirror;
                if (state._previousBreak == CharBreakingType.PairMirrorBreak) 
                    state._previousItem.Flags |= ItemFlags.PairMirrorForm;
                else if (state._previousBreak == CharBreakingType.SingleMirrorBreak)
                    state._previousItem.Flags |= ItemFlags.SingleMirrorForm;
            } 
        }
 
        ///  
        /// This is a helper method which break the text from the start position with the length
        /// Also it creates and intialize the new Item. 
        /// 
        private static void BreakRun(ref State state, int length)
        {
            CheckMirroring (ref state); 

            state._previousItem.DigitCulture = null; 
 
            if (state._previousItem.Script == ScriptID.Digit)
            { 
                // Digit item requires appropriate culture for number substitution.
                state._previousItem.DigitCulture = state._digitCulture;

                if (state._previousItem.DigitCulture == null) 
                {
                    // Shape digits w/o number substitution in latin shaping engine, so that 
                    // digit item will automatically merge with the surrounding latin item 
                    // to form more complete context (e.g. for runs like "1", "/" and "2",
                    // stacked fraction works on all of them together). 
                    state._previousItem.Script = ScriptID.Latin;
                }
            }
 
            state._items.SetValue(state._previousStart, length, state._previousItem);
 
            state._previousItem = new Item(); 
            state._previousItem.Script      = state._currentItem.Script;
            state._currentItem.Flags        = ItemFlags.Default; 
            state._previousStart            = state._nextStart;

            if (state._lastZWJ>0 && state._lastZWJ == state._nextStart - state._lastCharLength)
            { 
                state._previousItem.Flags |= ItemFlags.LeadingJoin;
            } 
            else 
            {
                state._previousItem.Flags &= ~ItemFlags.LeadingJoin; 
            }

            // if the new run starts with weak character then remember its start poition
            // so we can merge it into strong class characters. 
            if (state._currentItemClass == ItemClass.WeakClass &&
                state._charIndex == state._previousStart) 
            { 
                state._weakRunStart = state._previousStart;
            } 
        }

        private const int U_ZWJ = 0x200D;
 
        private delegate void ItemizerStateDelegate(ref State state);
 
        ///  
        /// DigitClassState will handle the Digit class characters.
        ///  
        private static void DigitClassState(ref State state)
        {
            // Don't break the item
            // state._nextStart = state._previousStart; 

            ItemizerStateDelegate digitFSM  = 
                DigitFSM[Math.Min((int) state._previousItemClass, (int)(ItemClass.ETClass + 1)) , 
                (int) state._currentItemClass];
 
            if (digitFSM != null)
            {
                digitFSM(ref state);
            } 
        }
 
        ///  
        /// StrongClassState will handle the strong class characters.
        ///  
        private static void StrongClassState(ref State state)
        {
            if (state._previousItem.Script != state._currentItem.Script)
            { 
                if (state._weakRunStart != state._previousStart)
                { 
                    // the previous run are not started with weak characters then break it 
                    state._nextStart = state._charIndex;
                } 
                else
                {
                    // in case the previous run start with weak character then merge it with the strong characters.
                    // And set the item script to current script so we speed up the processing of incomming strong characters. 
                    state._previousItem.Script = state._currentItem.Script;
                } 
 
                // reset the weak start
                state._weakRunStart = -1; 
            }
        }

        ///  
        /// WeakClassState will handle the weak class characters.
        ///  
        private static void WeakClassState(ref State state) 
        {
            state._lastWeak = state._charIndex; 
        }

        /// 
        /// SimpleMarkClassState will handle the combining marks characters. 
        /// 
        private static void SimpleMarkClassState(ref State state) 
        { 
            state._previousItem.Flags |= ItemFlags.HasCombiningMark;
        } 

        /// 
        /// ComplexMarkClassState will handle the complex marks characters. like
        /// the Arabic diacritics. 
        /// 
        private static void ComplexMarkClassState(ref State state) 
        { 
            if (state._previousItem.Script != state._currentItem.Script &&
                (state._previousItem.Script != ScriptID.Syriac ||  state._currentItem.Script != ScriptID.Arabic)) 
            {
                if (state._lastJoiner == state._charIndex-state._lastCharLength)
                {
                    state._nextStart = state._lastWeak; 
                    if (state._nextStart <= state._previousStart)
                    { 
                        state._previousItem.Script = state._currentItem.Script; 
                    }
                } 
                else
                {
                    state._nextStart = state._charIndex;
                } 
            }
        } 
 
        /// 
        /// ControlClassState will handle the control characters. like 
        /// code point 0x200f
        /// 
        private static void ControlClassState(ref State state)
        { 
        }
 
        ///  
        /// JoinerClassState will handle the Joiner characters. like
        /// code point 0x200D 
        /// 
        private static void JoinerClassState(ref State state)
        {
            if (state._lastWeak == state._charIndex - state._lastCharLength) 
            {
                state._lastJoiner = state._charIndex; 
            } 

            if (state._ch == U_ZWJ)    // Zero width joiner 
            {
                state._lastZWJ = state._charIndex;
            }
        } 

 
        ///  
        /// NumberSignClassState will handle the Arabic number signs (0x0600~0x0603, 0x06DD)
        ///  
        private static void NumberSignClassState(ref State state)
        {
            StrongClassState(ref state);
            state._numberSignIndex = state._charIndex; 
        }
 
        private static ItemizerStateDelegate digitClassDelegate = new ItemizerStateDelegate(DigitClassState); 

        ///  
        /// This is the delegate table which hold the FSM for the main itemizer.
        /// 
        private static ItemizerStateDelegate [] ItemizerState = new ItemizerStateDelegate[(int) ItemClass.MaxClass]
        { 
            digitClassDelegate,
            digitClassDelegate, 
            digitClassDelegate, 
            digitClassDelegate,
            digitClassDelegate, 
            new ItemizerStateDelegate(StrongClassState),
            new ItemizerStateDelegate(WeakClassState),
            new ItemizerStateDelegate(SimpleMarkClassState),
            new ItemizerStateDelegate(ComplexMarkClassState), 
            new ItemizerStateDelegate(ControlClassState),
            new ItemizerStateDelegate(JoinerClassState), 
            new ItemizerStateDelegate(NumberSignClassState) 
        };
 
        // The following types are used for handling the numbers analysis inside the itemizer.
        // we have a small finite state machine for numbers which work inside the itemizer finite
        // state machine. I made like that to keep itemizer finite state machine small as possible
        // and not to have to many states depending on each other. 

 
 
        /// 
        /// DigitFlags enum used as digit flag. 
        /// 
        internal enum DigitFlags
        {
            ///  
            /// NONE means not a number.
            ///  
            NONE, 
            /// 
            /// AN means Arabic number. 
            /// 
            AN,
            /// 
            /// EN means European number. 
            /// 
            EN, 
            ///  
            /// ET means European terminator.
            ///  
            ET,
            /// 
            /// ENCS means European number followed by common seperator.
            ///  
            ENCS,
            ///  
            /// ENES means European number followed by European seperator. 
            /// 
            ENES, 
            /// 
            /// ENET means European number followed by European terminator.
            /// 
            ENET, 
            /// 
            /// ANCS means Arabic number followed by common seperator. 
            ///  
            ANCS
        } 

        /// 
        /// EnAnStateDelegate handle the case En followed by An.
        ///  
        private static void EnAnStateDelegate(ref State state)
        { 
            state._nextStart     = state._charIndex; 
            state._digitFlags    = DigitFlags.AN;
        } 

        /// 
        /// EnCsStateDelegate handle the case En followed by Cs.
        ///  
        private static void EnCsStateDelegate(ref State state)
        { 
            state._startOfCS     = state._charIndex; 
            state._digitFlags    = DigitFlags.ENCS;
            state._digitSeperatorScript = state._currentItem.Script; 
        }

        /// 
        /// EnEsStateDelegate handle the case En followed by Es. 
        /// 
        private static void EnEsStateDelegate(ref State state) 
        { 
            state._startOfES     = state._charIndex;
            state._digitFlags    = DigitFlags.ENES; 
            state._digitSeperatorScript = state._currentItem.Script;
        }

        ///  
        /// EnEtStateDelegate handle the case En followed by Et.
        ///  
        private static void EnEtStateDelegate(ref State state) 
        {
            state._startOfET    = state._nextStart; 
            state._digitFlags   = DigitFlags.ENET;
        }

        ///  
        /// AnEnStateDelegate handle the case An followed by En.
        ///  
        private static void AnEnStateDelegate(ref State state) 
        {
            state._nextStart     = state._charIndex; 
            state._digitFlags    = DigitFlags.EN;
        }

        ///  
        /// AnCSStateDelegate handle the case An followed by Cs.
        ///  
        private static void AnCSStateDelegate(ref State state) 
        {
            state._startOfCS     = state._charIndex; 
            state._digitFlags    = DigitFlags.ANCS;
            state._digitSeperatorScript = state._currentItem.Script;
        }
 
        /// 
        /// AnEsStateDelegate handle the case An followed by Es. 
        ///  
        private static void AnEsStateDelegate(ref State state)
        { 
            state._nextStart     = state._charIndex;
            state._digitFlags    = DigitFlags.NONE;
        }
 
        /// 
        /// AnEtStateDelegate handle the case An followed by Et. 
        ///  
        private static void AnEtStateDelegate(ref State state)
        { 
            state._nextStart     = state._charIndex;
            state._startOfET     = state._charIndex;
            state._digitFlags    = DigitFlags.ET;
        } 

        ///  
        /// CsEnStateDelegate handle the case Cs followed by En. 
        /// 
        private static void CsEnStateDelegate(ref State state) 
        {
            if (state._digitFlags != DigitFlags.ENCS)
            {
                state._nextStart     = state._charIndex; 
            }
 
            state._digitFlags = DigitFlags.EN; 
        }
 
        /// 
        /// CsAnStateDelegate handle the case Es followed by An.
        /// 
        private static void CsAnStateDelegate(ref State state) 
        {
            if (state._digitFlags != DigitFlags.ANCS) 
            { 
                state._nextStart     = state._charIndex;
            } 

            state._digitFlags = DigitFlags.AN;
        }
 
        /// 
        /// CsCstateDelegate handle the case Cs followed by Cs. 
        ///  
        private static void CsCstateDelegate(ref State state)
        { 
            if (state._digitFlags == DigitFlags.ENCS || state._digitFlags == DigitFlags.ANCS)
            {
                state._nextStart     = state._startOfCS;
            } 
            state._digitFlags = DigitFlags.NONE;
        } 
 
        /// 
        /// CsEsStateDelegate handle the case Cs followed by Es. 
        /// 
        private static void CsEsStateDelegate(ref State state)
        {
            if (state._digitFlags == DigitFlags.ENCS || state._digitFlags == DigitFlags.ANCS) 
            {
                state._nextStart     = state._startOfCS; 
            } 
            state._digitFlags = DigitFlags.NONE;
        } 

        /// 
        /// CsEtStateDelegate handle the case Cs followed by Et.
        ///  
        private static void CsEtStateDelegate(ref State state)
        { 
            if (state._digitFlags == DigitFlags.ENCS || state._digitFlags == DigitFlags.ANCS) 
            {
                state._nextStart     = state._startOfCS; 
            }
            state._digitFlags = DigitFlags.ET;
            state._startOfET  = state._charIndex;
        } 

        ///  
        /// EsEnStateDelegate handle the case Es followed by En. 
        /// 
        private static void EsEnStateDelegate(ref State state) 
        {
            if (state._digitFlags != DigitFlags.ENES)
            {
                state._nextStart     = state._charIndex; 
            }
            state._digitFlags = DigitFlags.EN; 
        } 

        ///  
        /// EsAnStateDelegate handle the case Es followed by An.
        /// 
        private static void EsAnStateDelegate(ref State state)
        { 
            if (state._digitFlags == DigitFlags.ENES)
            { 
                BreakRun(ref state, state._startOfES - state._previousStart); 
                state._previousItem.Script = ScriptID.Latin;
                state._previousStart = state._startOfES; 
            }

            state._nextStart        = state._charIndex;
            state._digitFlags       = DigitFlags.AN; 
        }
 
        ///  
        /// EsCsStateDelegate handle the case En followed by Cs.
        ///  
        private static void EsCsStateDelegate(ref State state)
        {
            if (state._digitFlags == DigitFlags.ENES)
            { 
                BreakRun(ref state, state._startOfES - state._previousStart);
                state._previousItem.Script = state._currentItem.Script; 
                state._previousStart = state._startOfES; 
            }
 
            state._digitFlags       = DigitFlags.NONE;
        }

        ///  
        /// EsEsStateDelegate handle the case Es followed by Es.
        ///  
        private static void EsEsStateDelegate(ref State state) 
        {
            if (state._digitFlags == DigitFlags.ENES) 
            {
                BreakRun(ref state, state._startOfES - state._previousStart);
                state._previousItem.Script = state._currentItem.Script;
                state._previousStart = state._startOfES; 
            }
 
            // we don't need to do any more run break here so reset state._nextStart 
            state._nextStart   = state._previousStart;
            state._digitFlags  = DigitFlags.NONE; 
        }

        /// 
        /// EsEtStateDelegate handle the case Es followed by Et. 
        /// 
        private static void EsEtStateDelegate(ref State state) 
        { 
            if (state._digitFlags == DigitFlags.ENES)
            { 
                BreakRun(ref state, state._startOfES - state._previousStart);
                state._previousItem.Script = state._currentItem.Script;
                state._previousStart = state._startOfES;
            } 

            state._startOfET     = state._charIndex; 
            state._digitFlags    = DigitFlags.ET; 
        }
 
        /// 
        /// EtEnStateDelegate handle the case Et followed by En.
        /// 
        private static void EtEnStateDelegate(ref State state) 
        {
            if (state._digitFlags == DigitFlags.ENET) 
                state._nextStart = state._charIndex; 
            else
                state._nextStart = state._startOfET; 

            state._digitFlags          = DigitFlags.EN;

            // we may have state._previousStart point to previous non-digit run 
            // so overwite the previous script only if state._previousStart point
            // to the start of the digit run. 
            if (state._nextStart <= state._previousStart) 
            {
                // Set the script to be Digit 
                state._previousItem.Script = state._currentItem.Script;
            }
        }
 
        /// 
        /// EtAnStateDelegate handle the case Et followed by An. 
        ///  
        private static void EtAnStateDelegate(ref State state)
        { 
            state._nextStart = state._charIndex;
            state._digitFlags       = DigitFlags.AN;
        }
 
        /// 
        /// EtCsStateDelegate handle the case Et followed by Cs. 
        ///  
        private static void EtCsStateDelegate(ref State state)
        { 
            if (state._digitFlags == DigitFlags.ENET)
            {
                state._nextStart = state._charIndex;
            } 
            state._digitFlags       = DigitFlags.NONE;
        } 
 
        /// 
        /// EtEsStateDelegate handle the case Et followed by Es. 
        /// 
        private static void EtEsStateDelegate(ref State state)
        {
            if (state._digitFlags == DigitFlags.ENET) 
            {
                state._nextStart = state._charIndex; 
            } 
            state._digitFlags       = DigitFlags.NONE;
        } 

        /// 
        /// OtherEnStateDelegate handle the case none number class followed by En.
        ///  
        private static void OtherEnStateDelegate(ref State state)
        { 
            if (state._numberSignIndex >= 0 && 
                state._charIndex - state._lastCharLength == state._numberSignIndex)
            { 
                state._nextStart     = state._numberSignIndex;
                // Set digit breaking so we don't split subsequent numbers
                state._previousBreak = CharBreakingType.DigitBreak;
            } 
            else
            { 
                state._nextStart    = state._charIndex; 
            }
            state._digitFlags   = DigitFlags.EN; 
        }

        /// 
        /// OtherAnStateDelegate handle the case none number class followed by An. 
        /// 
        private static void OtherAnStateDelegate(ref State state) 
        { 
            if (state._numberSignIndex >= 0 &&
                state._charIndex - state._lastCharLength == state._numberSignIndex) 
            {
                state._nextStart     = state._numberSignIndex;
                // Set digit breaking so we don't split subsequent numbers
                state._previousBreak = CharBreakingType.DigitBreak; 
            }
            else 
            { 
                state._nextStart    = state._charIndex;
            } 
            state._digitFlags   = DigitFlags.AN;
        }

        ///  
        /// OtherCsStateDelegate handle the case none number class followed by Cs.
        ///  
        private static void OtherCsStateDelegate(ref State state) 
        {
            if (state._previousBreak == CharBreakingType.NoBreak) 
            {
                state._nextStart    = state._previousStart;
            }
            state._digitFlags   = DigitFlags.NONE; 
        }
 
        ///  
        /// OtherEsStateDelegate handle the case none number class followed by Es.
        ///  
        private static void OtherEsStateDelegate(ref State state)
        {
            if (state._previousBreak == CharBreakingType.NoBreak)
            { 
                state._nextStart    = state._previousStart;
            } 
            state._digitFlags   = DigitFlags.NONE; 
        }
 
        /// 
        /// OtherEtStateDelegate handle the case none number class followed by Et.
        /// 
        private static void OtherEtStateDelegate(ref State state) 
        {
            if (state._previousBreak == CharBreakingType.NoBreak) 
            { 
                state._nextStart     = state._previousStart;
            } 
            state._startOfET     = state._charIndex;
            state._digitFlags    = DigitFlags.ET;
        }
 

        ///  
        /// NumbericfollowedByOther handle the number class followed by any non number class. 
        /// note this one is not static.
        ///  
        private static void NumbericfollowedByOther(ref State state, bool runBreak)
        {
            switch (state._digitFlags)
            { 
                case DigitFlags.AN:
                case DigitFlags.EN: 
                case DigitFlags.ENET: 
                    return;
 
                case DigitFlags.ANCS:
                case DigitFlags.ENCS:
                    BreakRun(ref state, state._startOfCS - state._previousStart);
                    state._previousItem.Script = state._digitSeperatorScript; 
                    state._previousStart = state._startOfCS;
                    break; 
 
                case DigitFlags.ENES:
                    BreakRun(ref state, state._startOfES - state._previousStart); 
                    state._previousItem.Script = state._digitSeperatorScript;
                    state._previousStart = state._startOfES;
                    break;
            } 

            if (!runBreak) 
            { 
                state._nextStart   = state._previousStart; // don't break
            } 
            state._digitFlags = DigitFlags.NONE;
        }

        ///  
        /// The following will define the finite state machine for numbers.
        ///  
        private static ItemizerStateDelegate EnAnState       = new ItemizerStateDelegate(EnAnStateDelegate); 
        private static ItemizerStateDelegate EnCsState       = new ItemizerStateDelegate(EnCsStateDelegate);
        private static ItemizerStateDelegate EnEsState       = new ItemizerStateDelegate(EnEsStateDelegate); 
        private static ItemizerStateDelegate EnEtState       = new ItemizerStateDelegate(EnEtStateDelegate);
        private static ItemizerStateDelegate AnEnState       = new ItemizerStateDelegate(AnEnStateDelegate);
        private static ItemizerStateDelegate AnCSState       = new ItemizerStateDelegate(AnCSStateDelegate);
        private static ItemizerStateDelegate AnEsState       = new ItemizerStateDelegate(AnEsStateDelegate); 
        private static ItemizerStateDelegate AnEtState       = new ItemizerStateDelegate(AnEtStateDelegate);
        private static ItemizerStateDelegate CsEnState       = new ItemizerStateDelegate(CsEnStateDelegate); 
        private static ItemizerStateDelegate CsAnState       = new ItemizerStateDelegate(CsAnStateDelegate); 
        private static ItemizerStateDelegate CsCstate        = new ItemizerStateDelegate(CsCstateDelegate);
        private static ItemizerStateDelegate CsEsState       = new ItemizerStateDelegate(CsEsStateDelegate); 
        private static ItemizerStateDelegate CsEtState       = new ItemizerStateDelegate(CsEtStateDelegate);
        private static ItemizerStateDelegate EsEnState       = new ItemizerStateDelegate(EsEnStateDelegate);
        private static ItemizerStateDelegate EsAnState       = new ItemizerStateDelegate(EsAnStateDelegate);
        private static ItemizerStateDelegate EsCsState       = new ItemizerStateDelegate(EsCsStateDelegate); 
        private static ItemizerStateDelegate EsEsState       = new ItemizerStateDelegate(EsEsStateDelegate);
        private static ItemizerStateDelegate EsEtState       = new ItemizerStateDelegate(EsEtStateDelegate); 
        private static ItemizerStateDelegate EtEnState       = new ItemizerStateDelegate(EtEnStateDelegate); 
        private static ItemizerStateDelegate EtAnState       = new ItemizerStateDelegate(EtAnStateDelegate);
        private static ItemizerStateDelegate EtCsState       = new ItemizerStateDelegate(EtCsStateDelegate); 
        private static ItemizerStateDelegate EtEsState       = new ItemizerStateDelegate(EtEsStateDelegate);
        private static ItemizerStateDelegate OtherEnState    = new ItemizerStateDelegate(OtherEnStateDelegate);
        private static ItemizerStateDelegate OtherAnState    = new ItemizerStateDelegate(OtherAnStateDelegate);
        private static ItemizerStateDelegate OtherCsState    = new ItemizerStateDelegate(OtherCsStateDelegate); 
        private static ItemizerStateDelegate OtherEsState    = new ItemizerStateDelegate(OtherEsStateDelegate);
        private static ItemizerStateDelegate OtherEtState    = new ItemizerStateDelegate(OtherEtStateDelegate); 
 
        private static ItemizerStateDelegate [,] DigitFSM  = new ItemizerStateDelegate [6, 5]
        { 
                        //   En            AN              CS              Es              Et
/* En     */            { null,         EnAnState,      EnCsState,      EnEsState,      EnEtState    },
/* An     */            { AnEnState,    null,           AnCSState,      AnEsState,      AnEtState    },
/* Cs     */            { CsEnState,    CsAnState,      CsCstate,       CsEsState,      CsEtState    }, 
/* Es     */            { EsEnState,    EsAnState,      EsCsState,      EsEsState,      EsEtState    },
/* Et     */            { EtEnState,    EtAnState,      EtCsState,      EtEsState,      null         }, 
/* Other  */            { OtherEnState, OtherAnState,   OtherCsState,   OtherEsState,   OtherEtState } 
        };
 
    }
}


// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// Copyright (c) Microsoft Corporation. All rights reserved.

                        

Link Menu

Network programming in C#, Network Programming in VB.NET, Network Programming in .NET
This book is available now!
Buy at Amazon US or
Buy at Amazon UK