Code:
/ Net / Net / 3.5.50727.3053 / DEVDIV / depot / DevDiv / releases / Orcas / SP / wpf / src / Core / CSharp / MS / Internal / Shaping / hebrewshape.cs / 1 / hebrewshape.cs
//---------------------------------------------------------------------- // // Microsoft Windows Client Platform // Copyright (C) Microsoft Corporation, 2003 // // File: HebrewShape.cs // // Contents: Implementation of Hebrew shaping engine and its factory // // Created: 08-05-2003 Nick Beal (nbeal) // //----------------------------------------------------------------------- // #define VALIDATE_CLUSTER_PARAMETERS using System; using System.Security; using System.Security.Permissions; using System.Diagnostics; using System.Collections; using System.Globalization; using System.Windows; using System.Windows.Media; using System.Windows.Media.TextFormatting; using MS.Internal.FontCache; using MS.Internal.FontFace; using MS.Internal.PresentationCore; namespace MS.Internal.Shaping { ////// HebrewCharClass - enumeration of Hebrew classification flags /// ordinal position /// ////// To allow for all possible Biblical combinations of cantillation /// marks (teamin) and nikud, a number of cantillation classes /// are defined. /// Note that the order is important here. OpenType /// fonts' rules expect that the base/diacritic glyph sequences /// are arranged canonically (ie, a diacritic or cantillation mark whose /// HebrewCharClass value is small canonically preceeds a character /// whose HebrewCharClass value is larger). Note that our "canonical" /// order is not entirely consistent with the Unicode canonical weighting /// ( internal enum HebrewCharClass : byte { UnknownCharClass, // Unknown class -- can take any diacritic PunctuationMark, // PunctuationMark -- cannot take diacritics BaseAcceptsVowel, // Thaana base glyph can have vowel FirstBaseChar = BaseAcceptsVowel, BaseAcceptsNotShin, // Hebrew base can have dagesh && rafe && cantillation & vowel BaseAcceptsRafe, // Hebrew base can have rafe && cantillation && vowel BaseAcceptsDagesh, // Hebrew base can have dagesh && cantillation && vowel ShinBaseChar, // Hebrew shin can have shin/sin dot && dagesh && rafe && cantillation && vowel SpaceChar, // space may be base of shin/sin dot | dagesh | cantillation | vowel (only one) LastBaseChar = SpaceChar, ShinSinDot, // Hebrew shin/sin dot (05C1, 05C2) Dagesh, // Hebrew dagesh/mapiq (05BC) Rafe, // Hebrew Rafe (05BF) Holam, // Hebrew Holam (05B9) Hataf, // Hebrew hataf vowels -- only one per base Vowel, // Hebrew vowels -- only two per base LowerMark, // LowerRightMark, // Hebrew cantillation - Below right UpperMark, UpperLeftMark, // Hebrew cantillation - Above left Puncta, // Hebrew upper dot (05C4) MasoraCircle, // Hebrew cantillation - MasoraCircle Circle LastDiacritic = MasoraCircle, UnicodeCombiningMark, // Combining marks UnicodeCGJ = UnicodeCombiningMark,// Unicode CGJ UnicodeLayoutControl, // ZWNJ, ZWJ NumberOfHebrewCharClasses, }; ///for more info on this) /// /// The Hebrew Shaping Engine - (shapes Hebrew text) /// ////// The IShaper and IShapingEngine interfaces are implemented to /// provide the shaping methods for Hebrew Scripts. /// There are four Hebrew private types defined/used in this class: /// 1.) HebrewShapeInfo - this class manages the shape information /// 2.) HebrewClusterCop - this class manages the canonical ordering /// 3.) HebrewFontInfo - this class manages the font interface /// 4.) HebrewCharClassInfo - contains the char classification tables /// internal sealed class HebrewShape : BaseShape { public const char UnicodeCombiningDotBelow = '\u0323'; // combining dot below // // See static HebrewShape() constructor for notes and initialization. // private static readonly ScriptTags[] _supportedScripts = new ScriptTags[] {ScriptTags.Hebrew,ScriptTags.Thaana}; //-------------------------------------- // // Constructors // //-------------------------------------- #region Constructors internal HebrewShape() { _textFlowDirection = TextFlowDirection.RTL; } #endregion //-------------------------------------- // // Internal Methods // //-------------------------------------- #region Internal methods ////// HebrewShape.GetCharClassifier - Base shape /// ////// This will normally be overridden by derived shapers. It is used in OnLoadFont /// protected override ShaperCharacterClassifier GetCharClassifier(ScriptTags scriptTag, GlyphTypeface fontFace) { return new HebrewCharClassifier (scriptTag, fontFace); } ////// HebrewShape.GetGlyphs - Hebrew override of the GetGlyphs() helper function. /// /// shaping currentRun /// Text item ///number of glyphs ////// Critical - calls critical code /// [SecurityCritical] unsafe protected override int GetGlyphs ( ref ShapingWorkspace currentRun, Item item ) { // get shape info for every character. If there are no diacritics this loop // will process all the text. CharShapeInfo currShape; while ( currentRun.SetNextGlyphProperties (out currShape) ) { // The only thing we need to check for is whether there are any diacritics (or ZWJ/ZWNJ) // If there are, we need to break out of this loop and run the if ( (currShape & (CharShapeInfo.IsStartOfCluster | CharShapeInfo.IsUnicodeLayoutControl)) == 0 ) { if (currentRun.CurrentCharIx < 3) { // if we've only done the first 3 glyphs just restart for loop below currentRun.Reset(0,0,currentRun.CharsCount); } else { ushort restartCharIx = currentRun.PreviousCharIx; // restart at the previous char (the base char) ushort restartGlyphIx = currentRun.PreviousGlyphIx; if ((currentRun.GetShapeInfo(restartCharIx) & CharShapeInfo.IsUnicodeLayoutControl) != 0) { // if zero width unicode control, go back one more character restartGlyphIx = currentRun.GetGlyphIx( --restartCharIx ); } // if not a base char or layout control character, we have to do more // elaborate loop below currentRun.Reset(restartCharIx, // restart at the previous char (the base char) restartGlyphIx, (ushort)(currentRun.CharsCount - restartCharIx));// add back this char and the previous char } break; } } // modern Hebrew has no diacritics so all the text will normally be done in loop above. // If there's more to do (ie, a diacritic has been found, do this... if ( !currentRun.IsFinished ) { // Create support for diacritic reordering. "clusterControl" is // on the stack; fast to alloc, easy to throw away. char *clusterChars = stackalloc char [ HebrewClusterCop.ReorderArraySize ]; ushort *clusterGlyphs = stackalloc ushort [ HebrewClusterCop.ReorderArraySize ]; HebrewClusterCop clusterCop = new HebrewClusterCop( clusterChars, clusterGlyphs ); bool isClusterInProgress = false; // get shape info for every character while (currentRun.GetNextShape(out currShape)) { if ( (currShape & CharShapeInfo.IsStartOfCluster) != 0 ) { // this is a base character. If there is a just completed cluster which // needs reordering, nows the time to change the glyphs. if (isClusterInProgress) { // get reordered glyphs clusterCop.GetReorderedGlyphs( ref currentRun ); isClusterInProgress = false; } currentRun.SetGlyphPropertiesUsingShapeInfo(currShape); } else { isClusterInProgress = clusterCop.AddCluster( ref currentRun, currShape ); } } if (isClusterInProgress) { // get reordered glyphs clusterCop.GetReorderedGlyphs( ref currentRun ); } } return currentRun.GlyphsCount; } ////// HebrewShape.SupportedScripts - /// IShapingEngine member override /// ///Our supported scripts (Hebrew, Thaana). public override ScriptTags[] SupportedScripts { get { return _supportedScripts; } } #endregion } ////// Class HebrewClusterCop: /// Manages the diacritic ordering requirement /// ////// This class is used to provide diacritic ordering and invalid /// diacritic enforcement for the diacritics and cantillation marks /// found in the unicode stream. /// Diacritic ordering is done by maintaining an array whose extent /// is HebrewCharClassifier.NumberOfClasses. /// internal struct HebrewClusterCop { [SecurityCritical] unsafe private char* _clusterChars; // our temporary array for reordering [SecurityCritical] unsafe private ushort* _clusterGlyphs; // our temporary array for reordering private HebrewClusterState _clusterState; // current reordering state private bool _textHasUnicodeControlChars; // private bool _reorderingIsSuppressed; // private bool _clusterIsInvalid; // private bool _clusterRequiresReordering; private bool _clusterHasCGJ; private HebrewCharClass _baseCharClass; // base char's class info private ushort _clusterSize; // number of diacritics currently in cluster private ushort _nextReorderedGlyphIx; // for returning reordered glyphs (0 if no reordering) private ushort _lastReorderedGlyphIx; // for returning reordered glyphs (0 if no reordering) private ushort _firstCharIx; private ushort _lowerMarksCount; private ushort _upperMarksCount; ///for comments about the /// char class enum. /// For each base character in the unicode run, the diacritics and /// other marks are added to the arrays at the index corresponding /// to the HebrewCharClass value. Only one mark per HebrewCharClass /// value is allowed per base character. Once the extent of the valid /// diacritics and cantillation marks has been determined (ie, when an /// illegal mark is found, when a new base is found, or at the end of /// the unicode run) the array used to reorder the glyphs collected in /// the GlyphList object if reordering is needed. /// The diacritics reordering done here is not consistent with Unicode /// canonical ordering, but is instead consistent with the needs of /// current Hebrew OpenType fonts. /// /// HebrewClusterCop constructor /// ////// Critical - accepts a pointer input, uses a raw buffer /// [SecurityCritical] unsafe public HebrewClusterCop ( char *clusterArray, ushort *clusterGlyphs ) { _baseCharClass = HebrewCharClass.UnknownCharClass; _clusterChars = clusterArray; _clusterGlyphs = clusterGlyphs; _clusterState = HebrewClusterState.Idle; _clusterIsInvalid = _reorderingIsSuppressed = _clusterHasCGJ = _clusterRequiresReordering = _textHasUnicodeControlChars = false; _firstCharIx = _lastReorderedGlyphIx =_nextReorderedGlyphIx = 0; _clusterSize = 0; _lowerMarksCount = _upperMarksCount = 0; // initialize the diacritics class array (its on the stack) int i = (int)HebrewReorderPosition.ClusterCopArraySize; while (--i >= 0) { _clusterChars[i] = '\u0000'; _clusterGlyphs[i] = 0; } } ////// HebrewClusterCop.AddCluster - adds as many characters as possible to the /// current cluster /// ////// This routine is used to create a diacritics cluster on the current /// base char. It continues to add characters till it notes a non-legal /// character. Enforces member count restrictions for the various diacritic /// classes and keeps track of reordering needs. /// ////// true if cluster remains valid /// ////// Critical - calls critical code, uses unsafe accessors /// [SecurityCritical] unsafe internal bool AddCluster ( ref ShapingWorkspace currentRun, CharShapeInfo currShape ) { bool isClusterInProgress = StartCluster( ref currentRun, currShape ); char currChar = currentRun.CurrentChar; ushort currGlyph = currentRun.CharConverter.ToGlyph(currChar); do { HebrewCharClass currClass = (HebrewCharClass)(currShape & CharShapeInfo.ShaperClassMask); // add this character's glyph to the cluster if ( _clusterIsInvalid) { _reorderingIsSuppressed = true; currShape |= CharShapeInfo.RequiresInsertedBase; isClusterInProgress = false; } else { // step 1. get the reordering state HebrewClusterState newState = clusterStateMap[ (int)currClass ]; // step 2. get the reordering array entry (clusterIx) // The position (clusterIx) for a character in the reordering // array depends on the reordering state for the character. // Several states can contain multiple entries. HebrewReorderPosition clusterIx = clusterPositionMap[ (int)currClass ] ; switch (clusterIx) { case HebrewReorderPosition.SinShinPosition: case HebrewReorderPosition.MasoraCirclePosition: // verify that sin/shin dot or masora circle is legal on this // base _clusterIsInvalid = IsIllegalPair(_baseCharClass, currClass); break; case HebrewReorderPosition.DageshRafePosition: // diacritic is dagesh, rafe char firstDageshRafe = _clusterChars[ (ushort)clusterIx ]; // We've combined the dagesh and rafe into one reordering class. // However we can have one dagesh and one rafe, but not two of // either class types. If there is currently a dagesh/rafe class // member already in this cluster, cluster is done // if it is the same char class or if there's actually already // 2 dagesh/rafe char's in array. If there's not a dagesh in // this cluster, cluster is done if the current base char doesn't // support dagesh. if ( firstDageshRafe != '\u0000' ) { HebrewCharClass prevDageshClass = (HebrewCharClass) (currentRun.CharConverter.ToShapeInfo(firstDageshRafe) & CharShapeInfo.ShaperClassMask); _clusterIsInvalid = (prevDageshClass == currClass); ++clusterIx; } else { _clusterIsInvalid = IsIllegalPair(_baseCharClass, currClass); } break; case HebrewReorderPosition.UnresolvedPosition: // all marks that need to be checked against the other // marks in the cluster will come here... _clusterIsInvalid = IsIllegalMark(currChar, currClass); if ( !_clusterIsInvalid ) { // need to resolve the cluster index (this character is of // a class that there can be more than one of per cluster) clusterIx = ResolveClusterIx(newState); if (clusterIx == 0) { // ok, it is a puncta. Punctas don't have their own // reordering class. They are part of the lower marks // or upper marks class. if (currChar == HebrewCharClassifier.BelowPuncta) { // change the state, 'cause this is a lower puncta newState = HebrewClusterState.LowerMarkSeen; clusterIx = (HebrewReorderPosition)((ushort)HebrewReorderPosition.LowerMarksStart + _lowerMarksCount++); } else { // change the state, 'cause this is an upper puncta newState = HebrewClusterState.UpperMarkSeen; clusterIx = (HebrewReorderPosition)((ushort)HebrewReorderPosition.UpperMarksStart + _upperMarksCount++); } } } break; case HebrewReorderPosition.CombiningMarkPosition: // combining mark. Only one per base, won't "share" base. isClusterInProgress = false; _reorderingIsSuppressed = true; // don't save in cluster array // We have mapped the CGJ character to the same class so check // for it... if (currChar == UnicodeCharacter.CGJ) { // CGJ may only be applied to a nun base char, and // may not "share" any other mark with its base _clusterIsInvalid = (_clusterSize == 0 && currentRun.PreviousChar == 0x5e0); _clusterHasCGJ = true; } else { _clusterIsInvalid = (_clusterSize != (_clusterHasCGJ ? 1 : 0)); } break; case HebrewReorderPosition.ZWControl: _clusterRequiresReordering = false; _textHasUnicodeControlChars = true; _reorderingIsSuppressed = true; // we don't want to confuse things by reordering currShape |= CharShapeInfo.IsUnicodeLayoutControl; break; default: // This marks the end of the cluster. We'll have to re-process this character currentRun.Reset(); return _clusterRequiresReordering; // leave now. } // step 3. make sure any invalid marks get their very own dotted circle base. if (_clusterIsInvalid || (_clusterHasCGJ ? clusterIx != HebrewReorderPosition.CombiningMarkPosition : _clusterChars[ (ushort)clusterIx ] != 0)) { // if there's some glyphs to reorder, do it now before we move on... if (_clusterRequiresReordering) { GetReorderedGlyphs(ref currentRun); } // for consistency with unmanaged Uniscribe suppress any further reordering _clusterIsInvalid = _reorderingIsSuppressed = true; // add a dotted circle (except if the preceding character was a ZWJ) if (!_textHasUnicodeControlChars || (currentRun.PreviousChar != UnicodeCharacter.ZWJ)) { currShape |= CharShapeInfo.RequiresInsertedBase; } isClusterInProgress = false; } else { // step 4. save this new character in the reordering array _clusterChars[ (ushort)clusterIx ] = currChar; // step 5. As long as reordering isn't suppressed, save the current glyph in the // reordering array. (We may not need to reorder, but we might) if (_reorderingIsSuppressed == false) { _clusterGlyphs[ (ushort)clusterIx ] = currGlyph; _clusterSize++; // step 5. update the reordering state if ( _lastReorderedGlyphIx < (ushort)clusterIx ) { // Keep _lastReorderedGlyphIx at the "highest" member of the // reordering array _lastReorderedGlyphIx = (ushort)clusterIx; } if (newState >= _clusterState) { // no reordering needed thus far... // keep track of the "highest" cluster member _clusterState = newState; } else { // this current char is "lower" than a previous character // in the cluster so we're gonna need to reorder this cluster _clusterRequiresReordering = true; } } } } currentRun.SetGlyphPropertiesUsingGlyph(currShape,currGlyph); } while ( isClusterInProgress && currentRun.GetNextCharProperties (out currChar, out currGlyph, out currShape) ); return _clusterRequiresReordering; } ////// HebrewClusterCop.GetNextGlyph - get the reordered glyph /// ////// This function is called to fetch each member of a reordered cluster. /// It clears the array position as it fetchs each char. /// This function will return '\u0000' if there are no valid chars /// to return. /// ///next reordered character ////// Critical - calls critical code, uses pointers /// [SecurityCritical] unsafe private ushort GetNextGlyph() { ushort nextGlyph = 0; if ( _lastReorderedGlyphIx != 0 &&_clusterSize != 0 && _lastReorderedGlyphIx < (ushort)HebrewReorderPosition.ReorderingArraySize) { ushort nextGlyphIx = _nextReorderedGlyphIx; ushort lastGlyphIx = _lastReorderedGlyphIx; while (nextGlyph == 0) { if (++nextGlyphIx > lastGlyphIx) { _clusterSize = 0; return nextGlyph; } nextGlyph = _clusterGlyphs[ nextGlyphIx ]; } _nextReorderedGlyphIx = nextGlyphIx; --_clusterSize; } else { _clusterSize = 0; } return nextGlyph; } ////// Critical - calls critical code, uses pointers /// [SecurityCritical] unsafe internal ushort GetReorderedGlyphs(ref ShapingWorkspace currentRun) { ushort reorderedCount = ReorderedCount; // get reordered glyphs if (reorderedCount > 0) { // we have a number of diacritics that need reordering. So, put the // new glyphs in the right order in the already filled glyphlist. Debug.Assert (_firstCharIx + _clusterSize - (currentRun.IsFinished?1:0) <= currentRun.CurrentCharIx); ushort nextGlyphIx = currentRun.GetGlyphIx( _firstCharIx ); while ( _clusterSize > 0 ) { currentRun.SetGlyph(nextGlyphIx++, GetNextGlyph()); } _clusterRequiresReordering = false; } return reorderedCount; } ////// HebrewClusterCop.GetPrecomposedBaseVowel /// /// character to test ///the vowel if classification is a precomposed base+vowel character, /// otherwise returns '\u0000' /// 0xfb1d == 0x05d9,0x05b4 /// 0xfb1f == 0x05f2,0x05b7 /// 0xfb2e == 0x05d0,0x05b7 /// 0xfb2f == 0x05d0,0x05b8 /// private char GetPrecomposedBaseVowel(char baseChar) { int xorValue = (int) baseChar ^ 0xfb00; if (xorValue < 0x4c) // if char is a member of [0xfb00,0xfb4b) { byte tempValue = (byte)(xorValue ^ 0x2e); // (0x0fb00 ^ 0xfb1d) ^ 0x2e == 0x33 // (0x0fb00 ^ 0xfb1f) ^ 0x2e == 0x31 // (0x0fb00 ^ 0xfb2e) ^ 0x2e == 0 // (0x0fb00 ^ 0xfb2f) ^ 0x2e == 1 // (0x0fb00 ^ 0xfb4b) ^ 0x2e == 0x67 if (tempValue < 2) { return (tempValue == 0 ? '\u05b7' : '\u05b8'); // 0xfb2e or 0xfb2f } else if ((tempValue ^= 0x33) < 4) // ((0x0fb00 ^ 0xfb1d) ^ 0x2e) ^ 0x33 == 0 // ((0x0fb00 ^ 0xfb1f) ^ 0x2e) ^ 0x33 == 2 // ((0x0fb00 ^ 0xfb4b) ^ 0x2e) ^ 0x33 == 0x54 { if (tempValue == 0) { return '\u05b4'; // 0xfb1d } else if (tempValue == 2) { return '\u05b7'; // 0xfb1f } } else if (tempValue == 0x54) { return '\u05b9'; // 0xfb4b } } return '\u0000'; } ////// HebrewClusterCop.IsIllegalMark - process mark /// ////// This routine is used to validate a mark. It checks /// for number of marks in the class (2 maximum) and for /// duplicates. /// /// the current char /// the char's class ////// false if can add accent. /// ////// Critical - calls critical code, uses pointers /// [SecurityCritical] unsafe private bool IsIllegalMark(char unicodeChar, HebrewCharClass charClass) { ushort validationPosition; char validationValue; if ( IsVowel(charClass) ) { validationPosition = (ushort)HebrewReorderPosition.VowelValidation; // this is a vowel. If its the first vowel, its good. validationValue = _clusterChars[ validationPosition ]; if (validationValue == '\u0000') { validationValue = charClass == HebrewCharClass.Hataf ? '\uffff' : unicodeChar; } else { // if this is the third vowel (only two allowed) or this char is a // duplicate or if either vowels are hatafs, then this is not a valid // vowel if (validationValue == '\uffff' || validationValue == unicodeChar || charClass == HebrewCharClass.Hataf) { return true; } validationValue = '\uffff'; } } else { validationPosition = charClass == HebrewCharClass.Puncta ? (ushort)HebrewReorderPosition.PunctaValidation : (ushort)HebrewReorderPosition.MarkValidation; // this is a punta or mark. If its the first one, its good. validationValue = _clusterChars[ validationPosition ]; if (validationValue == '\u0000') { validationValue = unicodeChar; } else { // if this is the third (only two allowed) or this char is a // duplicate then this is not a valid mark if (validationValue == '\uffff' || validationValue == unicodeChar) { return true; } validationValue = '\uffff'; } } // save validation value for comparison against next mark _clusterChars[validationPosition] = validationValue; return false; } ////// HebrewClusterCop.IsIllegalPair - Validates a base/mark combo /// ////// Uniscribe does not verify that a given mark is legal on /// a given base. Thus, this behavior is different than legacy /// implementation. Note that a space character is a valid /// base, but we return "false" so that space char base can /// be differentiated in the ClusterControl.AddToCluster() /// /// current base char /// class of diacritic ///false if diacritic can be attached to base private bool IsIllegalPair(HebrewCharClass baseClass, HebrewCharClass markClass) { return ( (CharClassToFlag[(int)baseClass] & HebrewCharClassFlags.AllBaseChars) == 0 || (CharClassToFlag[(int)markClass] & BaseDiacriticValidationTable[ (int)baseClass ] ) == 0); } ////// HebrewClusterCop.IsVowel /// /// classification to test ///true if classification is a mark character private bool IsVowel(HebrewCharClass charClass) { return (CharClassToFlag[(int)charClass] & HebrewCharClassFlags.AllVowels) != 0; } ////// HebrewClusterCop.ReorderedCount - get size of cluster /// ////// this number doesn't include the base character. /// internal ushort ReorderedCount { get { return _clusterRequiresReordering ? _clusterSize : (ushort)0; } } ////// Critical - is unsafe code /// [SecurityCritical] unsafe private void ResetClusterCop () { // If last processed cluster did not require reordering, then // clusterSize will be non-zero and we need to re-initialize the // reordering array. Otherwise, its already init'ed. // initialize the glyph array (its on the stack) int i = (int)HebrewReorderPosition.ClusterCopArraySize; while (--i >= 0) { _clusterChars[i] = '\u0000'; _clusterGlyphs[i] = 0; } _lowerMarksCount = 0; _upperMarksCount = 0; _clusterSize = 0; _clusterIsInvalid = _reorderingIsSuppressed = _clusterHasCGJ = _clusterRequiresReordering = false; _clusterState = HebrewClusterState.Idle; _lastReorderedGlyphIx = _nextReorderedGlyphIx = 0; } private HebrewReorderPosition ResolveClusterIx (HebrewClusterState newState) { int clusterIx = 0; // need to resolve the cluster index (this character is of // a class that there can be more than one of per cluster) switch(newState) { case HebrewClusterState.HolamSeen: clusterIx = (int)HebrewReorderPosition.HolamPosition; break; case HebrewClusterState.LowerMarkSeen: clusterIx = (int)HebrewReorderPosition.LowerMarksStart + _lowerMarksCount++; break; case HebrewClusterState.LowerRightMarkSeen: // diacritic is lower right clusterIx = (int)HebrewReorderPosition.LowerRightMarkPosition; break; case HebrewClusterState.UpperMarkSeen: clusterIx = (int)HebrewReorderPosition.UpperMarksStart + _upperMarksCount++; break; case HebrewClusterState.UpperLeftMarkSeen: // diacritic is an upper left mark clusterIx = (int)HebrewReorderPosition.UpperLeftMarkPosition; break; case HebrewClusterState.PunctaSeen: break; // leave w' clusterIx == 0... default: Invariant.Assert(false,"HebrewClusterCop.AddToCluster() - invalid newState"); break; } return (HebrewReorderPosition)clusterIx; } ////// HebrewClusterCop.StartCluster - start of new cluster (potentially) /// ////// This routine is used at the start of each cluster. If the base + this /// first non-base char are the valid start of a new cluster, return /// ///true if more characters can be added to this cluster ////// Critical - calls critical code, uses unsafe accessors /// [SecurityCritical] unsafe private bool StartCluster( ref ShapingWorkspace currentRun, CharShapeInfo currShape ) { ResetClusterCop(); CharShapeInfo baseShape = currentRun.PreviousShape; // default base shape // Keep track of our current character index for checking // reordering later (when next base is detected) _firstCharIx = currentRun.CurrentCharIx; _textHasUnicodeControlChars = ((currShape & CharShapeInfo.IsUnicodeLayoutControl) != 0) || ((baseShape & CharShapeInfo.IsUnicodeLayoutControl) != 0) ; // note any ZWNJ/ZWJ chars if ( _textHasUnicodeControlChars ) { // if this is a ZW joiner just allow whatever follows _reorderingIsSuppressed = true; } else { _baseCharClass = (HebrewCharClass)(baseShape & CharShapeInfo.ShaperClassMask); _clusterIsInvalid = IsIllegalPair (_baseCharClass, (HebrewCharClass)(currShape & CharShapeInfo.ShaperClassMask)); if (_clusterIsInvalid) { if (currentRun.PreviousChar == UnicodeCharacter.NoBreakSpace) { // if the base is a NBSP, this diacritic is acceptable (and we're done). _clusterIsInvalid = false; return false; // this is the only character we'll allow in this cluster } } else { // the base class is good, we can start to process a cluster - so prepare. // if the base character is a precomposed form that includes a vowel, // get the vowel... _clusterChars[ (int)HebrewReorderPosition.VowelValidation ] = GetPrecomposedBaseVowel(currentRun.PreviousChar); } } return !_clusterIsInvalid; } ////// HebrewCharClassFlags - enumeration of Hebrew character classification flags /// ////// This enum is linked to the list of char classes in the HebrewCharClass /// enum; keep them in [....]. ( [Flags()] private enum HebrewCharClassFlags: uint { UnknownCharClass, PunctuationMark = 0x0001, AcceptsVowel = 0x0002, // Thaana base glyph can have a vowel (only) AcceptsNotShin = 0x0004, // Hebrew base can have cantillation | diacritic AcceptsRafe = 0x0008, // Hebrew base can have rafe | cantillation | diacritic AcceptsDagesh = 0x0010, // Hebrew base can have dagesh | cantillation | diacritic ShinBase = 0x0020, // Hebrew shin can have shin/sin dot | dagesh | cantillation | diacritic SpaceChar = 0x0040, // Space char may take one mark AllBaseChars = 0x003E, // mask for all possible bases ShinSinDot = 0x0080, // Hebrew shin/sin dot (05C1, 05C2) Dagesh = 0x0100, // Hebrew dagesh/mapiq (05BC) Rafe = 0x0200, // Hebrew Rafe (05BF) DageshRafe = 0x0300, // dagesh and rafe mask Holam = 0x0400, // Hebrew Holam (05B9) Hataf = 0x0800, // Hebrew hataf -- only one per base ThaanaVowel = 0x0800, // Thaana vowel -- only one per base Vowel = 0x1000, // Hebrew vowel -- only two per base AllVowels = 0x1c00, // mask for vowels only BelowMark = 0x2000, // Hebrew lower accents LowerRightMark = 0x4000, // Hebrew Dehi, Yetiv accent (always last in lower sequence) UpperMark = 0x8000, // Hebrew upper marks UpperLeftMark = 0x10000, // Hebrew zinor (upper left mark) and others Puncta = 0x20000, // Hebrew upper dot (05C4) AllLowerAccents = 0x06000, // mask for lower cantillation marks only AllAccents = 0x1e000, // mask for cantillation marks only MasoraCircle = 0x40000, // Hebrew cantillation - MasoraCircle Circle CombiningMark = 0x80000, ControlChar = 0x100000, AllDiacritics = 0x0FFF80, AllVowelsAndAccents = 0x1FFC00 // includes ZWJ, ZWNJ }; // BaseDiacriticValidationTable - table used for validating a given base/mark pair // Used to quickly determine whether a given diacritic/mark are // legal to add to a particular base type. private static HebrewCharClassFlags[] BaseDiacriticValidationTable = { 0, // unknown chars accept no marks 0, // punctuation marks accept no marks HebrewCharClassFlags.ThaanaVowel, // base accepts vowels only (thaana bases) HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.DageshRafe, HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.Rafe, HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.Dagesh, HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.DageshRafe | HebrewCharClassFlags.ShinSinDot, 0 // space char }; private static HebrewCharClassFlags[] CharClassToFlag = { HebrewCharClassFlags.UnknownCharClass, HebrewCharClassFlags.PunctuationMark, HebrewCharClassFlags.AcceptsVowel, // Thaana base glyph can have a vowel (only) HebrewCharClassFlags.AcceptsNotShin, // Hebrew base can have cantillation | diacritic HebrewCharClassFlags.AcceptsRafe, // Hebrew base can have rafe | cantillation | diacritic HebrewCharClassFlags.AcceptsDagesh, // Hebrew base can have dagesh | cantillation | diacritic HebrewCharClassFlags.ShinBase, // Hebrew shin can have shin/sin dot | dagesh | cantillation | diacritic HebrewCharClassFlags.SpaceChar, // Space char may take one mark HebrewCharClassFlags.ShinSinDot, // Hebrew shin/sin dot (05C1, 05C2) HebrewCharClassFlags.Dagesh, // Hebrew Dagesh/mapiq (05BC) HebrewCharClassFlags.Rafe, // Hebrew Rafe HebrewCharClassFlags.Holam, // Hebrew Holam HebrewCharClassFlags.Hataf, // Hebrew hataf -- only one per base HebrewCharClassFlags.Vowel, // Hebrew vowel -- only two per base HebrewCharClassFlags.BelowMark, // Hebrew cantillation - Below HebrewCharClassFlags.LowerRightMark, // Hebrew cantillation - Below right HebrewCharClassFlags.UpperMark, // Hebrew cantillation - Above marks HebrewCharClassFlags.UpperLeftMark, // Hebrew cantillation - Above left HebrewCharClassFlags.Puncta, // Hebrew upper dot (05C4) HebrewCharClassFlags.MasoraCircle, // Hebrew cantillation - MasoraCircle Circle HebrewCharClassFlags.CombiningMark, // combining marks (0323,0307,0308) HebrewCharClassFlags.ControlChar // ZWJ, ZWNJ, CGJ }; ///) /// There are several masks that are defined here: /// the "All..." masks are used to test the char class of a unicode char /// /// HebrewClusterCop.HebrewClusterState: state definitions /// private enum HebrewClusterState : byte { Idle, SinShinSeen, DageshRafeSeen, HolamSeen, PunctaSeen, LowerMarkSeen, LowerRightMarkSeen, UpperMarkSeen, UpperLeftMarkSeen, MasoraCircleSeen, CombiningMarkSeen, UnicodeLayoutControlSeen } // reordering array positions... private enum HebrewReorderPosition : byte { BaseCharPosition, // not used SinShinPosition, DageshRafePosition, HolamPosition = DageshRafePosition + 2, LowerMarksStart, // lower right marks follow all other lower marks. LowerRightMarkPosition = LowerMarksStart + 4, // upper marks follow lower marks. There must be room for up to // five lower marks (2 vowels, 2 accents, and 1 puncta ) UpperMarksStart, // upper left marks follow all other upper marks. There must be // room for one upper mark and one puncta UpperLeftMarkPosition = UpperMarksStart + 3, // masora circle follows upper marks. There must be room for up to // four upper marks (2 accents, 1 vowel (holam) and 1 puncta) MasoraCirclePosition, CombiningMarkPosition, ReorderingArraySize, ZWControl = ReorderingArraySize, // not added to the array VowelValidation, // for validating vowels MarkValidation, // for validating marks PunctaValidation, // for validating puncta ClusterCopArraySize, UnresolvedPosition = ClusterCopArraySize // not in array (index too big) } public const int ReorderArraySize = (int)HebrewReorderPosition.ClusterCopArraySize; // this table is used to determine if reordering will be required. private static readonly HebrewReorderPosition[] clusterPositionMap = { // This maps all the Hebrew char classes to the corresponding cluster // state. HebrewReorderPosition.BaseCharPosition, // Unknown class -- can take any diacritic HebrewReorderPosition.BaseCharPosition, // PunctuationMark -- cannot take diacritics HebrewReorderPosition.BaseCharPosition, // Thaana base glyph can have single vowel HebrewReorderPosition.BaseCharPosition, // Hebrew base can have cantillation | vowel HebrewReorderPosition.BaseCharPosition, // Hebrew base can have rafe | cantillation | vowel HebrewReorderPosition.BaseCharPosition, // Hebrew base can have dagesh | cantillation | vowel HebrewReorderPosition.BaseCharPosition, // Hebrew shin can have shin/sin dot && dagesh && rafe && cantillation && vowel HebrewReorderPosition.BaseCharPosition, // space char HebrewReorderPosition.SinShinPosition, // Hebrew shin/sin dot (05C1, 05C2) HebrewReorderPosition.DageshRafePosition, // Hebrew dagesh/mapiq (05BC) HebrewReorderPosition.DageshRafePosition, // Hebrew rafe HebrewReorderPosition.UnresolvedPosition, // Hebrew holam HebrewReorderPosition.UnresolvedPosition, // Hebrew hataf -- only one per base HebrewReorderPosition.UnresolvedPosition, // Hebrew/Thaana vowel -- only two per base HebrewReorderPosition.UnresolvedPosition, // Hebrew cantillation - Below HebrewReorderPosition.UnresolvedPosition, // Hebrew cantillation - Below right HebrewReorderPosition.UnresolvedPosition, // Hebrew cantillation - Above HebrewReorderPosition.UnresolvedPosition, // Hebrew cantillation - Above left HebrewReorderPosition.UnresolvedPosition, // puncta seen HebrewReorderPosition.MasoraCirclePosition,// Hebrew cantillation - MasoraCircle Circle HebrewReorderPosition.CombiningMarkPosition, // combining marks HebrewReorderPosition.ZWControl // extent of array; NumberOfHebrewCharClasses members }; // this table is used to determine if reordering will be required. private static readonly HebrewClusterState[] clusterStateMap = { // This maps all the Hebrew char classes to the corresponding cluster // state. HebrewClusterState.Idle, // Unknown class -- can take any diacritic HebrewClusterState.Idle, // PunctuationMark -- cannot take diacritics HebrewClusterState.Idle, // Thaana base glyph can have single vowel HebrewClusterState.Idle, // Hebrew base can have cantillation | vowel HebrewClusterState.Idle, // Hebrew base can have rafe | cantillation | vowel HebrewClusterState.Idle, // Hebrew base can have dagesh | cantillation | vowel HebrewClusterState.Idle, // Hebrew shin can have shin/sin dot | dagesh | cantillation | vowel HebrewClusterState.Idle, // base of shin/sin dot | dagesh | cantillation | vowel HebrewClusterState.SinShinSeen, // Hebrew shin/sin dot (05C1, 05C2) HebrewClusterState.DageshRafeSeen,// Hebrew dagesh/mapiq (05BC) HebrewClusterState.DageshRafeSeen,// Hebrew rafe HebrewClusterState.HolamSeen, // Hebrew holam HebrewClusterState.LowerMarkSeen,// Hebrew hataf -- only one per base HebrewClusterState.LowerMarkSeen,// Hebrew/Thaana vowel -- only two per base HebrewClusterState.LowerMarkSeen,// Hebrew cantillation - Below HebrewClusterState.LowerRightMarkSeen, // Hebrew cantillation - Below right HebrewClusterState.UpperMarkSeen, // Hebrew cantillation - Above HebrewClusterState.UpperLeftMarkSeen, // Hebrew cantillation - Above left HebrewClusterState.PunctaSeen, // puncta seen HebrewClusterState.MasoraCircleSeen, // Hebrew cantillation - MasoraCircle Circle HebrewClusterState.CombiningMarkSeen, HebrewClusterState.UnicodeLayoutControlSeen // extent of array; NumberOfHebrewCharClasses members }; } ////// HebrewCharClassifier - The char converter for Hebrew. /// internal class HebrewCharClassifier : ShaperCharacterClassifier { public HebrewCharClassifier(ScriptTags scriptTag, GlyphTypeface fontFace) : base(scriptTag, fontFace) { _unknownClass = UnknownCharClass; _spaceClass = (byte)HebrewCharClass.SpaceChar; _zwControlClass = (byte)HebrewCharClass.UnicodeLayoutControl; _zwjClass = (byte)HebrewCharClass.UnicodeLayoutControl; _zwnjClass = (byte)HebrewCharClass.UnicodeLayoutControl; _shyClass = UnknownCharClass; if (scriptTag == ScriptTags.Hebrew) { _firstChar = '\u0590'; // this is the first Hebrew Unicode char _lastChar = '\u05FF'; // this is the last Hebrew Unicode char _xorMask = 0x580; // this mask is used in GetCharShapeInfo _xorRange = 0x080; // this is used in GetCharShapeInfo _charClassTable = _hebrewCharClasses; } else if (scriptTag == ScriptTags.Thaana) { _firstChar = '\u0780'; _lastChar = '\u07BF'; _xorMask = 0x780; _xorRange = 0x040; _charClassTable = _thaanaCharClasses; } } const char AlphabeticPresentationangeStart = '\uFB1D'; const char AlphabeticPresentationRangeEnd = '\uFB4F'; const char UnicodeCombiningDotAbove = '\u0307'; // combining dot above const char UnicodeCombiningDiaresis = '\u0308'; // combining diaresis internal const char BelowPuncta = '\u0323'; // combining dot below ////// HebrewCharClassifier.this[] - returns CharShapeInfo for the Unicode character /// public override CharShapeInfo ToShapeInfo(char unicodeChar) { CharShapeInfo charShape = base.ToShapeInfo(unicodeChar); HebrewCharClass charClass = (HebrewCharClass)(charShape & CharShapeInfo.ShaperClassMask); if (charClass == HebrewCharClass.UnknownCharClass) { if (unicodeChar >= AlphabeticPresentationangeStart && unicodeChar <= AlphabeticPresentationRangeEnd) { charShape = (CharShapeInfo) _alphabeticPresentationFormsCharClasses[unicodeChar - AlphabeticPresentationangeStart]; } else if ( unicodeChar == UnicodeCombiningDotAbove || unicodeChar == UnicodeCombiningDiaresis) { charShape = (CharShapeInfo) HebrewCharClass.UnicodeCombiningMark; } else if (unicodeChar == BelowPuncta) { charShape = (CharShapeInfo) HebrewCharClass.Puncta; } else if (unicodeChar == UnicodeCharacter.CGJ) { charShape = (CharShapeInfo) HebrewCharClass.UnicodeCGJ | CharShapeInfo.IsUnicodeLayoutControl; } } return charShape; } #region Classification Tables // these consts are so the tables below will be more readable private const HebrewCharClass StartOfCluster = (HebrewCharClass)CharShapeInfo.IsStartOfCluster; private const byte UnknownCharClass = (byte)(HebrewCharClass.UnknownCharClass | StartOfCluster); private const byte PunctuationMark = (byte)(HebrewCharClass.PunctuationMark | StartOfCluster); private const byte BaseAcceptsVowel = (byte)(HebrewCharClass.BaseAcceptsVowel | StartOfCluster); private const byte BaseAcceptsAll = (byte)(HebrewCharClass.BaseAcceptsNotShin | StartOfCluster); private const byte BaseAcceptsRafe = (byte)(HebrewCharClass.BaseAcceptsRafe | StartOfCluster); private const byte BaseAcceptsDagesh = (byte)(HebrewCharClass.BaseAcceptsDagesh | StartOfCluster); private const byte ShinBaseChar = (byte)(HebrewCharClass.ShinBaseChar | StartOfCluster); private const byte ShinSinDot = (byte)HebrewCharClass.ShinSinDot; private const byte Dagesh = (byte)HebrewCharClass.Dagesh; private const byte Rafe = (byte)HebrewCharClass.Rafe; private const byte Holam = (byte)HebrewCharClass.Holam; private const byte Hataf = (byte)HebrewCharClass.Hataf; private const byte ThaanaVowel = (byte)HebrewCharClass.Hataf; private const byte Vowel = (byte)HebrewCharClass.Vowel; private const byte Meteg = (byte)HebrewCharClass.LowerMark; private const byte BelowCenterRightMark = (byte)HebrewCharClass.LowerMark; private const byte BelowCenterLeftMark = (byte)HebrewCharClass.LowerMark; private const byte BelowLeftMark = (byte)HebrewCharClass.LowerMark; private const byte Puncta = (byte)HebrewCharClass.Puncta; private const byte LowerRightMark = (byte)HebrewCharClass.LowerRightMark; private const byte AboveRightMark = (byte)HebrewCharClass.UpperMark; private const byte AboveCenterRightMark = (byte)HebrewCharClass.UpperMark; private const byte AboveCenterMark = (byte)HebrewCharClass.UpperMark; private const byte AboveCenterLeftMark = (byte)HebrewCharClass.UpperMark; private const byte UpperLeftMark = (byte)HebrewCharClass.UpperLeftMark; private const byte MasoraCircle = (byte)HebrewCharClass.MasoraCircle; // All the Hebrew Unicode chars (U+590 - U+5FF) classified private static readonly byte[] _hebrewCharClasses = //new HebrewCharClass[] { // U+590 (Hebrew Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F UnknownCharClass, BelowCenterLeftMark,AboveCenterMark, AboveCenterMark, AboveCenterMark, AboveCenterMark, BelowCenterRightMark, AboveCenterLeftMark, AboveCenterLeftMark,UpperLeftMark, LowerRightMark, BelowLeftMark, AboveCenterLeftMark,AboveRightMark, AboveCenterLeftMark,AboveCenterLeftMark, // U+5A0 (Hebrew Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F AboveRightMark, AboveCenterLeftMark,UnknownCharClass,BelowCenterLeftMark, BelowCenterRightMark,BelowLeftMark, BelowCenterLeftMark,BelowCenterRightMark, AboveCenterLeftMark,UpperLeftMark, BelowCenterRightMark,AboveCenterMark, AboveCenterLeftMark,LowerRightMark, UpperLeftMark, MasoraCircle, // U+5B0 (Hebrew Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F Vowel, Hataf, Hataf, Hataf, Vowel, Vowel, Vowel, Vowel, Vowel, Holam, UnknownCharClass, Vowel, Dagesh, Meteg, PunctuationMark, Rafe, // U+5C0 (Hebrew Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F PunctuationMark, ShinSinDot, ShinSinDot, PunctuationMark, Puncta, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, // U+5D0 (Hebrew Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, // U+5E0 (Hebrew Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, ShinBaseChar, BaseAcceptsAll, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, // U+5F0 (Hebrew Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, PunctuationMark, PunctuationMark, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, }; // All the Thaana Unicode chars (U+780 - U+7BF) classifications private static readonly byte[] _thaanaCharClasses = // new HebrewCharClass[] { // U+780 (Thaana Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, // U+790 (Thaana Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, // U+7A0 (Thaana Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, // U+7B0 (Thaana Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F ThaanaVowel, BaseAcceptsVowel, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, }; // All the Hebrew Alphabetic Presentation Forms Unicode chars (U+FB1D - U+FB4F) private static readonly byte[] _alphabeticPresentationFormsCharClasses = // new HebrewCharClass[] { // U+FB10 (Presentation Forms Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsAll, Rafe, BaseAcceptsRafe, // U+FB20 (Presentation Forms Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, PunctuationMark, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsAll, BaseAcceptsAll, // U+FB30 (Presentation Forms Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, UnknownCharClass, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, UnknownCharClass, BaseAcceptsRafe, UnknownCharClass, // U+FB40 (Presentation Forms Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsRafe, BaseAcceptsRafe, UnknownCharClass, BaseAcceptsRafe, BaseAcceptsRafe, UnknownCharClass, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsAll, BaseAcceptsDagesh, BaseAcceptsDagesh, BaseAcceptsDagesh, UnknownCharClass, }; #endregion // end of Classification Tables } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007. // Copyright (c) Microsoft Corporation. All rights reserved. //---------------------------------------------------------------------- // // Microsoft Windows Client Platform // Copyright (C) Microsoft Corporation, 2003 // // File: HebrewShape.cs // // Contents: Implementation of Hebrew shaping engine and its factory // // Created: 08-05-2003 Nick Beal (nbeal) // //----------------------------------------------------------------------- // #define VALIDATE_CLUSTER_PARAMETERS using System; using System.Security; using System.Security.Permissions; using System.Diagnostics; using System.Collections; using System.Globalization; using System.Windows; using System.Windows.Media; using System.Windows.Media.TextFormatting; using MS.Internal.FontCache; using MS.Internal.FontFace; using MS.Internal.PresentationCore; namespace MS.Internal.Shaping { ////// HebrewCharClass - enumeration of Hebrew classification flags /// ordinal position /// ////// To allow for all possible Biblical combinations of cantillation /// marks (teamin) and nikud, a number of cantillation classes /// are defined. /// Note that the order is important here. OpenType /// fonts' rules expect that the base/diacritic glyph sequences /// are arranged canonically (ie, a diacritic or cantillation mark whose /// HebrewCharClass value is small canonically preceeds a character /// whose HebrewCharClass value is larger). Note that our "canonical" /// order is not entirely consistent with the Unicode canonical weighting /// ( internal enum HebrewCharClass : byte { UnknownCharClass, // Unknown class -- can take any diacritic PunctuationMark, // PunctuationMark -- cannot take diacritics BaseAcceptsVowel, // Thaana base glyph can have vowel FirstBaseChar = BaseAcceptsVowel, BaseAcceptsNotShin, // Hebrew base can have dagesh && rafe && cantillation & vowel BaseAcceptsRafe, // Hebrew base can have rafe && cantillation && vowel BaseAcceptsDagesh, // Hebrew base can have dagesh && cantillation && vowel ShinBaseChar, // Hebrew shin can have shin/sin dot && dagesh && rafe && cantillation && vowel SpaceChar, // space may be base of shin/sin dot | dagesh | cantillation | vowel (only one) LastBaseChar = SpaceChar, ShinSinDot, // Hebrew shin/sin dot (05C1, 05C2) Dagesh, // Hebrew dagesh/mapiq (05BC) Rafe, // Hebrew Rafe (05BF) Holam, // Hebrew Holam (05B9) Hataf, // Hebrew hataf vowels -- only one per base Vowel, // Hebrew vowels -- only two per base LowerMark, // LowerRightMark, // Hebrew cantillation - Below right UpperMark, UpperLeftMark, // Hebrew cantillation - Above left Puncta, // Hebrew upper dot (05C4) MasoraCircle, // Hebrew cantillation - MasoraCircle Circle LastDiacritic = MasoraCircle, UnicodeCombiningMark, // Combining marks UnicodeCGJ = UnicodeCombiningMark,// Unicode CGJ UnicodeLayoutControl, // ZWNJ, ZWJ NumberOfHebrewCharClasses, }; ///for more info on this) /// /// The Hebrew Shaping Engine - (shapes Hebrew text) /// ////// The IShaper and IShapingEngine interfaces are implemented to /// provide the shaping methods for Hebrew Scripts. /// There are four Hebrew private types defined/used in this class: /// 1.) HebrewShapeInfo - this class manages the shape information /// 2.) HebrewClusterCop - this class manages the canonical ordering /// 3.) HebrewFontInfo - this class manages the font interface /// 4.) HebrewCharClassInfo - contains the char classification tables /// internal sealed class HebrewShape : BaseShape { public const char UnicodeCombiningDotBelow = '\u0323'; // combining dot below // // See static HebrewShape() constructor for notes and initialization. // private static readonly ScriptTags[] _supportedScripts = new ScriptTags[] {ScriptTags.Hebrew,ScriptTags.Thaana}; //-------------------------------------- // // Constructors // //-------------------------------------- #region Constructors internal HebrewShape() { _textFlowDirection = TextFlowDirection.RTL; } #endregion //-------------------------------------- // // Internal Methods // //-------------------------------------- #region Internal methods ////// HebrewShape.GetCharClassifier - Base shape /// ////// This will normally be overridden by derived shapers. It is used in OnLoadFont /// protected override ShaperCharacterClassifier GetCharClassifier(ScriptTags scriptTag, GlyphTypeface fontFace) { return new HebrewCharClassifier (scriptTag, fontFace); } ////// HebrewShape.GetGlyphs - Hebrew override of the GetGlyphs() helper function. /// /// shaping currentRun /// Text item ///number of glyphs ////// Critical - calls critical code /// [SecurityCritical] unsafe protected override int GetGlyphs ( ref ShapingWorkspace currentRun, Item item ) { // get shape info for every character. If there are no diacritics this loop // will process all the text. CharShapeInfo currShape; while ( currentRun.SetNextGlyphProperties (out currShape) ) { // The only thing we need to check for is whether there are any diacritics (or ZWJ/ZWNJ) // If there are, we need to break out of this loop and run the if ( (currShape & (CharShapeInfo.IsStartOfCluster | CharShapeInfo.IsUnicodeLayoutControl)) == 0 ) { if (currentRun.CurrentCharIx < 3) { // if we've only done the first 3 glyphs just restart for loop below currentRun.Reset(0,0,currentRun.CharsCount); } else { ushort restartCharIx = currentRun.PreviousCharIx; // restart at the previous char (the base char) ushort restartGlyphIx = currentRun.PreviousGlyphIx; if ((currentRun.GetShapeInfo(restartCharIx) & CharShapeInfo.IsUnicodeLayoutControl) != 0) { // if zero width unicode control, go back one more character restartGlyphIx = currentRun.GetGlyphIx( --restartCharIx ); } // if not a base char or layout control character, we have to do more // elaborate loop below currentRun.Reset(restartCharIx, // restart at the previous char (the base char) restartGlyphIx, (ushort)(currentRun.CharsCount - restartCharIx));// add back this char and the previous char } break; } } // modern Hebrew has no diacritics so all the text will normally be done in loop above. // If there's more to do (ie, a diacritic has been found, do this... if ( !currentRun.IsFinished ) { // Create support for diacritic reordering. "clusterControl" is // on the stack; fast to alloc, easy to throw away. char *clusterChars = stackalloc char [ HebrewClusterCop.ReorderArraySize ]; ushort *clusterGlyphs = stackalloc ushort [ HebrewClusterCop.ReorderArraySize ]; HebrewClusterCop clusterCop = new HebrewClusterCop( clusterChars, clusterGlyphs ); bool isClusterInProgress = false; // get shape info for every character while (currentRun.GetNextShape(out currShape)) { if ( (currShape & CharShapeInfo.IsStartOfCluster) != 0 ) { // this is a base character. If there is a just completed cluster which // needs reordering, nows the time to change the glyphs. if (isClusterInProgress) { // get reordered glyphs clusterCop.GetReorderedGlyphs( ref currentRun ); isClusterInProgress = false; } currentRun.SetGlyphPropertiesUsingShapeInfo(currShape); } else { isClusterInProgress = clusterCop.AddCluster( ref currentRun, currShape ); } } if (isClusterInProgress) { // get reordered glyphs clusterCop.GetReorderedGlyphs( ref currentRun ); } } return currentRun.GlyphsCount; } ////// HebrewShape.SupportedScripts - /// IShapingEngine member override /// ///Our supported scripts (Hebrew, Thaana). public override ScriptTags[] SupportedScripts { get { return _supportedScripts; } } #endregion } ////// Class HebrewClusterCop: /// Manages the diacritic ordering requirement /// ////// This class is used to provide diacritic ordering and invalid /// diacritic enforcement for the diacritics and cantillation marks /// found in the unicode stream. /// Diacritic ordering is done by maintaining an array whose extent /// is HebrewCharClassifier.NumberOfClasses. /// internal struct HebrewClusterCop { [SecurityCritical] unsafe private char* _clusterChars; // our temporary array for reordering [SecurityCritical] unsafe private ushort* _clusterGlyphs; // our temporary array for reordering private HebrewClusterState _clusterState; // current reordering state private bool _textHasUnicodeControlChars; // private bool _reorderingIsSuppressed; // private bool _clusterIsInvalid; // private bool _clusterRequiresReordering; private bool _clusterHasCGJ; private HebrewCharClass _baseCharClass; // base char's class info private ushort _clusterSize; // number of diacritics currently in cluster private ushort _nextReorderedGlyphIx; // for returning reordered glyphs (0 if no reordering) private ushort _lastReorderedGlyphIx; // for returning reordered glyphs (0 if no reordering) private ushort _firstCharIx; private ushort _lowerMarksCount; private ushort _upperMarksCount; ///for comments about the /// char class enum. /// For each base character in the unicode run, the diacritics and /// other marks are added to the arrays at the index corresponding /// to the HebrewCharClass value. Only one mark per HebrewCharClass /// value is allowed per base character. Once the extent of the valid /// diacritics and cantillation marks has been determined (ie, when an /// illegal mark is found, when a new base is found, or at the end of /// the unicode run) the array used to reorder the glyphs collected in /// the GlyphList object if reordering is needed. /// The diacritics reordering done here is not consistent with Unicode /// canonical ordering, but is instead consistent with the needs of /// current Hebrew OpenType fonts. /// /// HebrewClusterCop constructor /// ////// Critical - accepts a pointer input, uses a raw buffer /// [SecurityCritical] unsafe public HebrewClusterCop ( char *clusterArray, ushort *clusterGlyphs ) { _baseCharClass = HebrewCharClass.UnknownCharClass; _clusterChars = clusterArray; _clusterGlyphs = clusterGlyphs; _clusterState = HebrewClusterState.Idle; _clusterIsInvalid = _reorderingIsSuppressed = _clusterHasCGJ = _clusterRequiresReordering = _textHasUnicodeControlChars = false; _firstCharIx = _lastReorderedGlyphIx =_nextReorderedGlyphIx = 0; _clusterSize = 0; _lowerMarksCount = _upperMarksCount = 0; // initialize the diacritics class array (its on the stack) int i = (int)HebrewReorderPosition.ClusterCopArraySize; while (--i >= 0) { _clusterChars[i] = '\u0000'; _clusterGlyphs[i] = 0; } } ////// HebrewClusterCop.AddCluster - adds as many characters as possible to the /// current cluster /// ////// This routine is used to create a diacritics cluster on the current /// base char. It continues to add characters till it notes a non-legal /// character. Enforces member count restrictions for the various diacritic /// classes and keeps track of reordering needs. /// ////// true if cluster remains valid /// ////// Critical - calls critical code, uses unsafe accessors /// [SecurityCritical] unsafe internal bool AddCluster ( ref ShapingWorkspace currentRun, CharShapeInfo currShape ) { bool isClusterInProgress = StartCluster( ref currentRun, currShape ); char currChar = currentRun.CurrentChar; ushort currGlyph = currentRun.CharConverter.ToGlyph(currChar); do { HebrewCharClass currClass = (HebrewCharClass)(currShape & CharShapeInfo.ShaperClassMask); // add this character's glyph to the cluster if ( _clusterIsInvalid) { _reorderingIsSuppressed = true; currShape |= CharShapeInfo.RequiresInsertedBase; isClusterInProgress = false; } else { // step 1. get the reordering state HebrewClusterState newState = clusterStateMap[ (int)currClass ]; // step 2. get the reordering array entry (clusterIx) // The position (clusterIx) for a character in the reordering // array depends on the reordering state for the character. // Several states can contain multiple entries. HebrewReorderPosition clusterIx = clusterPositionMap[ (int)currClass ] ; switch (clusterIx) { case HebrewReorderPosition.SinShinPosition: case HebrewReorderPosition.MasoraCirclePosition: // verify that sin/shin dot or masora circle is legal on this // base _clusterIsInvalid = IsIllegalPair(_baseCharClass, currClass); break; case HebrewReorderPosition.DageshRafePosition: // diacritic is dagesh, rafe char firstDageshRafe = _clusterChars[ (ushort)clusterIx ]; // We've combined the dagesh and rafe into one reordering class. // However we can have one dagesh and one rafe, but not two of // either class types. If there is currently a dagesh/rafe class // member already in this cluster, cluster is done // if it is the same char class or if there's actually already // 2 dagesh/rafe char's in array. If there's not a dagesh in // this cluster, cluster is done if the current base char doesn't // support dagesh. if ( firstDageshRafe != '\u0000' ) { HebrewCharClass prevDageshClass = (HebrewCharClass) (currentRun.CharConverter.ToShapeInfo(firstDageshRafe) & CharShapeInfo.ShaperClassMask); _clusterIsInvalid = (prevDageshClass == currClass); ++clusterIx; } else { _clusterIsInvalid = IsIllegalPair(_baseCharClass, currClass); } break; case HebrewReorderPosition.UnresolvedPosition: // all marks that need to be checked against the other // marks in the cluster will come here... _clusterIsInvalid = IsIllegalMark(currChar, currClass); if ( !_clusterIsInvalid ) { // need to resolve the cluster index (this character is of // a class that there can be more than one of per cluster) clusterIx = ResolveClusterIx(newState); if (clusterIx == 0) { // ok, it is a puncta. Punctas don't have their own // reordering class. They are part of the lower marks // or upper marks class. if (currChar == HebrewCharClassifier.BelowPuncta) { // change the state, 'cause this is a lower puncta newState = HebrewClusterState.LowerMarkSeen; clusterIx = (HebrewReorderPosition)((ushort)HebrewReorderPosition.LowerMarksStart + _lowerMarksCount++); } else { // change the state, 'cause this is an upper puncta newState = HebrewClusterState.UpperMarkSeen; clusterIx = (HebrewReorderPosition)((ushort)HebrewReorderPosition.UpperMarksStart + _upperMarksCount++); } } } break; case HebrewReorderPosition.CombiningMarkPosition: // combining mark. Only one per base, won't "share" base. isClusterInProgress = false; _reorderingIsSuppressed = true; // don't save in cluster array // We have mapped the CGJ character to the same class so check // for it... if (currChar == UnicodeCharacter.CGJ) { // CGJ may only be applied to a nun base char, and // may not "share" any other mark with its base _clusterIsInvalid = (_clusterSize == 0 && currentRun.PreviousChar == 0x5e0); _clusterHasCGJ = true; } else { _clusterIsInvalid = (_clusterSize != (_clusterHasCGJ ? 1 : 0)); } break; case HebrewReorderPosition.ZWControl: _clusterRequiresReordering = false; _textHasUnicodeControlChars = true; _reorderingIsSuppressed = true; // we don't want to confuse things by reordering currShape |= CharShapeInfo.IsUnicodeLayoutControl; break; default: // This marks the end of the cluster. We'll have to re-process this character currentRun.Reset(); return _clusterRequiresReordering; // leave now. } // step 3. make sure any invalid marks get their very own dotted circle base. if (_clusterIsInvalid || (_clusterHasCGJ ? clusterIx != HebrewReorderPosition.CombiningMarkPosition : _clusterChars[ (ushort)clusterIx ] != 0)) { // if there's some glyphs to reorder, do it now before we move on... if (_clusterRequiresReordering) { GetReorderedGlyphs(ref currentRun); } // for consistency with unmanaged Uniscribe suppress any further reordering _clusterIsInvalid = _reorderingIsSuppressed = true; // add a dotted circle (except if the preceding character was a ZWJ) if (!_textHasUnicodeControlChars || (currentRun.PreviousChar != UnicodeCharacter.ZWJ)) { currShape |= CharShapeInfo.RequiresInsertedBase; } isClusterInProgress = false; } else { // step 4. save this new character in the reordering array _clusterChars[ (ushort)clusterIx ] = currChar; // step 5. As long as reordering isn't suppressed, save the current glyph in the // reordering array. (We may not need to reorder, but we might) if (_reorderingIsSuppressed == false) { _clusterGlyphs[ (ushort)clusterIx ] = currGlyph; _clusterSize++; // step 5. update the reordering state if ( _lastReorderedGlyphIx < (ushort)clusterIx ) { // Keep _lastReorderedGlyphIx at the "highest" member of the // reordering array _lastReorderedGlyphIx = (ushort)clusterIx; } if (newState >= _clusterState) { // no reordering needed thus far... // keep track of the "highest" cluster member _clusterState = newState; } else { // this current char is "lower" than a previous character // in the cluster so we're gonna need to reorder this cluster _clusterRequiresReordering = true; } } } } currentRun.SetGlyphPropertiesUsingGlyph(currShape,currGlyph); } while ( isClusterInProgress && currentRun.GetNextCharProperties (out currChar, out currGlyph, out currShape) ); return _clusterRequiresReordering; } ////// HebrewClusterCop.GetNextGlyph - get the reordered glyph /// ////// This function is called to fetch each member of a reordered cluster. /// It clears the array position as it fetchs each char. /// This function will return '\u0000' if there are no valid chars /// to return. /// ///next reordered character ////// Critical - calls critical code, uses pointers /// [SecurityCritical] unsafe private ushort GetNextGlyph() { ushort nextGlyph = 0; if ( _lastReorderedGlyphIx != 0 &&_clusterSize != 0 && _lastReorderedGlyphIx < (ushort)HebrewReorderPosition.ReorderingArraySize) { ushort nextGlyphIx = _nextReorderedGlyphIx; ushort lastGlyphIx = _lastReorderedGlyphIx; while (nextGlyph == 0) { if (++nextGlyphIx > lastGlyphIx) { _clusterSize = 0; return nextGlyph; } nextGlyph = _clusterGlyphs[ nextGlyphIx ]; } _nextReorderedGlyphIx = nextGlyphIx; --_clusterSize; } else { _clusterSize = 0; } return nextGlyph; } ////// Critical - calls critical code, uses pointers /// [SecurityCritical] unsafe internal ushort GetReorderedGlyphs(ref ShapingWorkspace currentRun) { ushort reorderedCount = ReorderedCount; // get reordered glyphs if (reorderedCount > 0) { // we have a number of diacritics that need reordering. So, put the // new glyphs in the right order in the already filled glyphlist. Debug.Assert (_firstCharIx + _clusterSize - (currentRun.IsFinished?1:0) <= currentRun.CurrentCharIx); ushort nextGlyphIx = currentRun.GetGlyphIx( _firstCharIx ); while ( _clusterSize > 0 ) { currentRun.SetGlyph(nextGlyphIx++, GetNextGlyph()); } _clusterRequiresReordering = false; } return reorderedCount; } ////// HebrewClusterCop.GetPrecomposedBaseVowel /// /// character to test ///the vowel if classification is a precomposed base+vowel character, /// otherwise returns '\u0000' /// 0xfb1d == 0x05d9,0x05b4 /// 0xfb1f == 0x05f2,0x05b7 /// 0xfb2e == 0x05d0,0x05b7 /// 0xfb2f == 0x05d0,0x05b8 /// private char GetPrecomposedBaseVowel(char baseChar) { int xorValue = (int) baseChar ^ 0xfb00; if (xorValue < 0x4c) // if char is a member of [0xfb00,0xfb4b) { byte tempValue = (byte)(xorValue ^ 0x2e); // (0x0fb00 ^ 0xfb1d) ^ 0x2e == 0x33 // (0x0fb00 ^ 0xfb1f) ^ 0x2e == 0x31 // (0x0fb00 ^ 0xfb2e) ^ 0x2e == 0 // (0x0fb00 ^ 0xfb2f) ^ 0x2e == 1 // (0x0fb00 ^ 0xfb4b) ^ 0x2e == 0x67 if (tempValue < 2) { return (tempValue == 0 ? '\u05b7' : '\u05b8'); // 0xfb2e or 0xfb2f } else if ((tempValue ^= 0x33) < 4) // ((0x0fb00 ^ 0xfb1d) ^ 0x2e) ^ 0x33 == 0 // ((0x0fb00 ^ 0xfb1f) ^ 0x2e) ^ 0x33 == 2 // ((0x0fb00 ^ 0xfb4b) ^ 0x2e) ^ 0x33 == 0x54 { if (tempValue == 0) { return '\u05b4'; // 0xfb1d } else if (tempValue == 2) { return '\u05b7'; // 0xfb1f } } else if (tempValue == 0x54) { return '\u05b9'; // 0xfb4b } } return '\u0000'; } ////// HebrewClusterCop.IsIllegalMark - process mark /// ////// This routine is used to validate a mark. It checks /// for number of marks in the class (2 maximum) and for /// duplicates. /// /// the current char /// the char's class ////// false if can add accent. /// ////// Critical - calls critical code, uses pointers /// [SecurityCritical] unsafe private bool IsIllegalMark(char unicodeChar, HebrewCharClass charClass) { ushort validationPosition; char validationValue; if ( IsVowel(charClass) ) { validationPosition = (ushort)HebrewReorderPosition.VowelValidation; // this is a vowel. If its the first vowel, its good. validationValue = _clusterChars[ validationPosition ]; if (validationValue == '\u0000') { validationValue = charClass == HebrewCharClass.Hataf ? '\uffff' : unicodeChar; } else { // if this is the third vowel (only two allowed) or this char is a // duplicate or if either vowels are hatafs, then this is not a valid // vowel if (validationValue == '\uffff' || validationValue == unicodeChar || charClass == HebrewCharClass.Hataf) { return true; } validationValue = '\uffff'; } } else { validationPosition = charClass == HebrewCharClass.Puncta ? (ushort)HebrewReorderPosition.PunctaValidation : (ushort)HebrewReorderPosition.MarkValidation; // this is a punta or mark. If its the first one, its good. validationValue = _clusterChars[ validationPosition ]; if (validationValue == '\u0000') { validationValue = unicodeChar; } else { // if this is the third (only two allowed) or this char is a // duplicate then this is not a valid mark if (validationValue == '\uffff' || validationValue == unicodeChar) { return true; } validationValue = '\uffff'; } } // save validation value for comparison against next mark _clusterChars[validationPosition] = validationValue; return false; } ////// HebrewClusterCop.IsIllegalPair - Validates a base/mark combo /// ////// Uniscribe does not verify that a given mark is legal on /// a given base. Thus, this behavior is different than legacy /// implementation. Note that a space character is a valid /// base, but we return "false" so that space char base can /// be differentiated in the ClusterControl.AddToCluster() /// /// current base char /// class of diacritic ///false if diacritic can be attached to base private bool IsIllegalPair(HebrewCharClass baseClass, HebrewCharClass markClass) { return ( (CharClassToFlag[(int)baseClass] & HebrewCharClassFlags.AllBaseChars) == 0 || (CharClassToFlag[(int)markClass] & BaseDiacriticValidationTable[ (int)baseClass ] ) == 0); } ////// HebrewClusterCop.IsVowel /// /// classification to test ///true if classification is a mark character private bool IsVowel(HebrewCharClass charClass) { return (CharClassToFlag[(int)charClass] & HebrewCharClassFlags.AllVowels) != 0; } ////// HebrewClusterCop.ReorderedCount - get size of cluster /// ////// this number doesn't include the base character. /// internal ushort ReorderedCount { get { return _clusterRequiresReordering ? _clusterSize : (ushort)0; } } ////// Critical - is unsafe code /// [SecurityCritical] unsafe private void ResetClusterCop () { // If last processed cluster did not require reordering, then // clusterSize will be non-zero and we need to re-initialize the // reordering array. Otherwise, its already init'ed. // initialize the glyph array (its on the stack) int i = (int)HebrewReorderPosition.ClusterCopArraySize; while (--i >= 0) { _clusterChars[i] = '\u0000'; _clusterGlyphs[i] = 0; } _lowerMarksCount = 0; _upperMarksCount = 0; _clusterSize = 0; _clusterIsInvalid = _reorderingIsSuppressed = _clusterHasCGJ = _clusterRequiresReordering = false; _clusterState = HebrewClusterState.Idle; _lastReorderedGlyphIx = _nextReorderedGlyphIx = 0; } private HebrewReorderPosition ResolveClusterIx (HebrewClusterState newState) { int clusterIx = 0; // need to resolve the cluster index (this character is of // a class that there can be more than one of per cluster) switch(newState) { case HebrewClusterState.HolamSeen: clusterIx = (int)HebrewReorderPosition.HolamPosition; break; case HebrewClusterState.LowerMarkSeen: clusterIx = (int)HebrewReorderPosition.LowerMarksStart + _lowerMarksCount++; break; case HebrewClusterState.LowerRightMarkSeen: // diacritic is lower right clusterIx = (int)HebrewReorderPosition.LowerRightMarkPosition; break; case HebrewClusterState.UpperMarkSeen: clusterIx = (int)HebrewReorderPosition.UpperMarksStart + _upperMarksCount++; break; case HebrewClusterState.UpperLeftMarkSeen: // diacritic is an upper left mark clusterIx = (int)HebrewReorderPosition.UpperLeftMarkPosition; break; case HebrewClusterState.PunctaSeen: break; // leave w' clusterIx == 0... default: Invariant.Assert(false,"HebrewClusterCop.AddToCluster() - invalid newState"); break; } return (HebrewReorderPosition)clusterIx; } ////// HebrewClusterCop.StartCluster - start of new cluster (potentially) /// ////// This routine is used at the start of each cluster. If the base + this /// first non-base char are the valid start of a new cluster, return /// ///true if more characters can be added to this cluster ////// Critical - calls critical code, uses unsafe accessors /// [SecurityCritical] unsafe private bool StartCluster( ref ShapingWorkspace currentRun, CharShapeInfo currShape ) { ResetClusterCop(); CharShapeInfo baseShape = currentRun.PreviousShape; // default base shape // Keep track of our current character index for checking // reordering later (when next base is detected) _firstCharIx = currentRun.CurrentCharIx; _textHasUnicodeControlChars = ((currShape & CharShapeInfo.IsUnicodeLayoutControl) != 0) || ((baseShape & CharShapeInfo.IsUnicodeLayoutControl) != 0) ; // note any ZWNJ/ZWJ chars if ( _textHasUnicodeControlChars ) { // if this is a ZW joiner just allow whatever follows _reorderingIsSuppressed = true; } else { _baseCharClass = (HebrewCharClass)(baseShape & CharShapeInfo.ShaperClassMask); _clusterIsInvalid = IsIllegalPair (_baseCharClass, (HebrewCharClass)(currShape & CharShapeInfo.ShaperClassMask)); if (_clusterIsInvalid) { if (currentRun.PreviousChar == UnicodeCharacter.NoBreakSpace) { // if the base is a NBSP, this diacritic is acceptable (and we're done). _clusterIsInvalid = false; return false; // this is the only character we'll allow in this cluster } } else { // the base class is good, we can start to process a cluster - so prepare. // if the base character is a precomposed form that includes a vowel, // get the vowel... _clusterChars[ (int)HebrewReorderPosition.VowelValidation ] = GetPrecomposedBaseVowel(currentRun.PreviousChar); } } return !_clusterIsInvalid; } ////// HebrewCharClassFlags - enumeration of Hebrew character classification flags /// ////// This enum is linked to the list of char classes in the HebrewCharClass /// enum; keep them in [....]. ( [Flags()] private enum HebrewCharClassFlags: uint { UnknownCharClass, PunctuationMark = 0x0001, AcceptsVowel = 0x0002, // Thaana base glyph can have a vowel (only) AcceptsNotShin = 0x0004, // Hebrew base can have cantillation | diacritic AcceptsRafe = 0x0008, // Hebrew base can have rafe | cantillation | diacritic AcceptsDagesh = 0x0010, // Hebrew base can have dagesh | cantillation | diacritic ShinBase = 0x0020, // Hebrew shin can have shin/sin dot | dagesh | cantillation | diacritic SpaceChar = 0x0040, // Space char may take one mark AllBaseChars = 0x003E, // mask for all possible bases ShinSinDot = 0x0080, // Hebrew shin/sin dot (05C1, 05C2) Dagesh = 0x0100, // Hebrew dagesh/mapiq (05BC) Rafe = 0x0200, // Hebrew Rafe (05BF) DageshRafe = 0x0300, // dagesh and rafe mask Holam = 0x0400, // Hebrew Holam (05B9) Hataf = 0x0800, // Hebrew hataf -- only one per base ThaanaVowel = 0x0800, // Thaana vowel -- only one per base Vowel = 0x1000, // Hebrew vowel -- only two per base AllVowels = 0x1c00, // mask for vowels only BelowMark = 0x2000, // Hebrew lower accents LowerRightMark = 0x4000, // Hebrew Dehi, Yetiv accent (always last in lower sequence) UpperMark = 0x8000, // Hebrew upper marks UpperLeftMark = 0x10000, // Hebrew zinor (upper left mark) and others Puncta = 0x20000, // Hebrew upper dot (05C4) AllLowerAccents = 0x06000, // mask for lower cantillation marks only AllAccents = 0x1e000, // mask for cantillation marks only MasoraCircle = 0x40000, // Hebrew cantillation - MasoraCircle Circle CombiningMark = 0x80000, ControlChar = 0x100000, AllDiacritics = 0x0FFF80, AllVowelsAndAccents = 0x1FFC00 // includes ZWJ, ZWNJ }; // BaseDiacriticValidationTable - table used for validating a given base/mark pair // Used to quickly determine whether a given diacritic/mark are // legal to add to a particular base type. private static HebrewCharClassFlags[] BaseDiacriticValidationTable = { 0, // unknown chars accept no marks 0, // punctuation marks accept no marks HebrewCharClassFlags.ThaanaVowel, // base accepts vowels only (thaana bases) HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.DageshRafe, HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.Rafe, HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.Dagesh, HebrewCharClassFlags.AllVowelsAndAccents | HebrewCharClassFlags.DageshRafe | HebrewCharClassFlags.ShinSinDot, 0 // space char }; private static HebrewCharClassFlags[] CharClassToFlag = { HebrewCharClassFlags.UnknownCharClass, HebrewCharClassFlags.PunctuationMark, HebrewCharClassFlags.AcceptsVowel, // Thaana base glyph can have a vowel (only) HebrewCharClassFlags.AcceptsNotShin, // Hebrew base can have cantillation | diacritic HebrewCharClassFlags.AcceptsRafe, // Hebrew base can have rafe | cantillation | diacritic HebrewCharClassFlags.AcceptsDagesh, // Hebrew base can have dagesh | cantillation | diacritic HebrewCharClassFlags.ShinBase, // Hebrew shin can have shin/sin dot | dagesh | cantillation | diacritic HebrewCharClassFlags.SpaceChar, // Space char may take one mark HebrewCharClassFlags.ShinSinDot, // Hebrew shin/sin dot (05C1, 05C2) HebrewCharClassFlags.Dagesh, // Hebrew Dagesh/mapiq (05BC) HebrewCharClassFlags.Rafe, // Hebrew Rafe HebrewCharClassFlags.Holam, // Hebrew Holam HebrewCharClassFlags.Hataf, // Hebrew hataf -- only one per base HebrewCharClassFlags.Vowel, // Hebrew vowel -- only two per base HebrewCharClassFlags.BelowMark, // Hebrew cantillation - Below HebrewCharClassFlags.LowerRightMark, // Hebrew cantillation - Below right HebrewCharClassFlags.UpperMark, // Hebrew cantillation - Above marks HebrewCharClassFlags.UpperLeftMark, // Hebrew cantillation - Above left HebrewCharClassFlags.Puncta, // Hebrew upper dot (05C4) HebrewCharClassFlags.MasoraCircle, // Hebrew cantillation - MasoraCircle Circle HebrewCharClassFlags.CombiningMark, // combining marks (0323,0307,0308) HebrewCharClassFlags.ControlChar // ZWJ, ZWNJ, CGJ }; ///) /// There are several masks that are defined here: /// the "All..." masks are used to test the char class of a unicode char /// /// HebrewClusterCop.HebrewClusterState: state definitions /// private enum HebrewClusterState : byte { Idle, SinShinSeen, DageshRafeSeen, HolamSeen, PunctaSeen, LowerMarkSeen, LowerRightMarkSeen, UpperMarkSeen, UpperLeftMarkSeen, MasoraCircleSeen, CombiningMarkSeen, UnicodeLayoutControlSeen } // reordering array positions... private enum HebrewReorderPosition : byte { BaseCharPosition, // not used SinShinPosition, DageshRafePosition, HolamPosition = DageshRafePosition + 2, LowerMarksStart, // lower right marks follow all other lower marks. LowerRightMarkPosition = LowerMarksStart + 4, // upper marks follow lower marks. There must be room for up to // five lower marks (2 vowels, 2 accents, and 1 puncta ) UpperMarksStart, // upper left marks follow all other upper marks. There must be // room for one upper mark and one puncta UpperLeftMarkPosition = UpperMarksStart + 3, // masora circle follows upper marks. There must be room for up to // four upper marks (2 accents, 1 vowel (holam) and 1 puncta) MasoraCirclePosition, CombiningMarkPosition, ReorderingArraySize, ZWControl = ReorderingArraySize, // not added to the array VowelValidation, // for validating vowels MarkValidation, // for validating marks PunctaValidation, // for validating puncta ClusterCopArraySize, UnresolvedPosition = ClusterCopArraySize // not in array (index too big) } public const int ReorderArraySize = (int)HebrewReorderPosition.ClusterCopArraySize; // this table is used to determine if reordering will be required. private static readonly HebrewReorderPosition[] clusterPositionMap = { // This maps all the Hebrew char classes to the corresponding cluster // state. HebrewReorderPosition.BaseCharPosition, // Unknown class -- can take any diacritic HebrewReorderPosition.BaseCharPosition, // PunctuationMark -- cannot take diacritics HebrewReorderPosition.BaseCharPosition, // Thaana base glyph can have single vowel HebrewReorderPosition.BaseCharPosition, // Hebrew base can have cantillation | vowel HebrewReorderPosition.BaseCharPosition, // Hebrew base can have rafe | cantillation | vowel HebrewReorderPosition.BaseCharPosition, // Hebrew base can have dagesh | cantillation | vowel HebrewReorderPosition.BaseCharPosition, // Hebrew shin can have shin/sin dot && dagesh && rafe && cantillation && vowel HebrewReorderPosition.BaseCharPosition, // space char HebrewReorderPosition.SinShinPosition, // Hebrew shin/sin dot (05C1, 05C2) HebrewReorderPosition.DageshRafePosition, // Hebrew dagesh/mapiq (05BC) HebrewReorderPosition.DageshRafePosition, // Hebrew rafe HebrewReorderPosition.UnresolvedPosition, // Hebrew holam HebrewReorderPosition.UnresolvedPosition, // Hebrew hataf -- only one per base HebrewReorderPosition.UnresolvedPosition, // Hebrew/Thaana vowel -- only two per base HebrewReorderPosition.UnresolvedPosition, // Hebrew cantillation - Below HebrewReorderPosition.UnresolvedPosition, // Hebrew cantillation - Below right HebrewReorderPosition.UnresolvedPosition, // Hebrew cantillation - Above HebrewReorderPosition.UnresolvedPosition, // Hebrew cantillation - Above left HebrewReorderPosition.UnresolvedPosition, // puncta seen HebrewReorderPosition.MasoraCirclePosition,// Hebrew cantillation - MasoraCircle Circle HebrewReorderPosition.CombiningMarkPosition, // combining marks HebrewReorderPosition.ZWControl // extent of array; NumberOfHebrewCharClasses members }; // this table is used to determine if reordering will be required. private static readonly HebrewClusterState[] clusterStateMap = { // This maps all the Hebrew char classes to the corresponding cluster // state. HebrewClusterState.Idle, // Unknown class -- can take any diacritic HebrewClusterState.Idle, // PunctuationMark -- cannot take diacritics HebrewClusterState.Idle, // Thaana base glyph can have single vowel HebrewClusterState.Idle, // Hebrew base can have cantillation | vowel HebrewClusterState.Idle, // Hebrew base can have rafe | cantillation | vowel HebrewClusterState.Idle, // Hebrew base can have dagesh | cantillation | vowel HebrewClusterState.Idle, // Hebrew shin can have shin/sin dot | dagesh | cantillation | vowel HebrewClusterState.Idle, // base of shin/sin dot | dagesh | cantillation | vowel HebrewClusterState.SinShinSeen, // Hebrew shin/sin dot (05C1, 05C2) HebrewClusterState.DageshRafeSeen,// Hebrew dagesh/mapiq (05BC) HebrewClusterState.DageshRafeSeen,// Hebrew rafe HebrewClusterState.HolamSeen, // Hebrew holam HebrewClusterState.LowerMarkSeen,// Hebrew hataf -- only one per base HebrewClusterState.LowerMarkSeen,// Hebrew/Thaana vowel -- only two per base HebrewClusterState.LowerMarkSeen,// Hebrew cantillation - Below HebrewClusterState.LowerRightMarkSeen, // Hebrew cantillation - Below right HebrewClusterState.UpperMarkSeen, // Hebrew cantillation - Above HebrewClusterState.UpperLeftMarkSeen, // Hebrew cantillation - Above left HebrewClusterState.PunctaSeen, // puncta seen HebrewClusterState.MasoraCircleSeen, // Hebrew cantillation - MasoraCircle Circle HebrewClusterState.CombiningMarkSeen, HebrewClusterState.UnicodeLayoutControlSeen // extent of array; NumberOfHebrewCharClasses members }; } ////// HebrewCharClassifier - The char converter for Hebrew. /// internal class HebrewCharClassifier : ShaperCharacterClassifier { public HebrewCharClassifier(ScriptTags scriptTag, GlyphTypeface fontFace) : base(scriptTag, fontFace) { _unknownClass = UnknownCharClass; _spaceClass = (byte)HebrewCharClass.SpaceChar; _zwControlClass = (byte)HebrewCharClass.UnicodeLayoutControl; _zwjClass = (byte)HebrewCharClass.UnicodeLayoutControl; _zwnjClass = (byte)HebrewCharClass.UnicodeLayoutControl; _shyClass = UnknownCharClass; if (scriptTag == ScriptTags.Hebrew) { _firstChar = '\u0590'; // this is the first Hebrew Unicode char _lastChar = '\u05FF'; // this is the last Hebrew Unicode char _xorMask = 0x580; // this mask is used in GetCharShapeInfo _xorRange = 0x080; // this is used in GetCharShapeInfo _charClassTable = _hebrewCharClasses; } else if (scriptTag == ScriptTags.Thaana) { _firstChar = '\u0780'; _lastChar = '\u07BF'; _xorMask = 0x780; _xorRange = 0x040; _charClassTable = _thaanaCharClasses; } } const char AlphabeticPresentationangeStart = '\uFB1D'; const char AlphabeticPresentationRangeEnd = '\uFB4F'; const char UnicodeCombiningDotAbove = '\u0307'; // combining dot above const char UnicodeCombiningDiaresis = '\u0308'; // combining diaresis internal const char BelowPuncta = '\u0323'; // combining dot below ////// HebrewCharClassifier.this[] - returns CharShapeInfo for the Unicode character /// public override CharShapeInfo ToShapeInfo(char unicodeChar) { CharShapeInfo charShape = base.ToShapeInfo(unicodeChar); HebrewCharClass charClass = (HebrewCharClass)(charShape & CharShapeInfo.ShaperClassMask); if (charClass == HebrewCharClass.UnknownCharClass) { if (unicodeChar >= AlphabeticPresentationangeStart && unicodeChar <= AlphabeticPresentationRangeEnd) { charShape = (CharShapeInfo) _alphabeticPresentationFormsCharClasses[unicodeChar - AlphabeticPresentationangeStart]; } else if ( unicodeChar == UnicodeCombiningDotAbove || unicodeChar == UnicodeCombiningDiaresis) { charShape = (CharShapeInfo) HebrewCharClass.UnicodeCombiningMark; } else if (unicodeChar == BelowPuncta) { charShape = (CharShapeInfo) HebrewCharClass.Puncta; } else if (unicodeChar == UnicodeCharacter.CGJ) { charShape = (CharShapeInfo) HebrewCharClass.UnicodeCGJ | CharShapeInfo.IsUnicodeLayoutControl; } } return charShape; } #region Classification Tables // these consts are so the tables below will be more readable private const HebrewCharClass StartOfCluster = (HebrewCharClass)CharShapeInfo.IsStartOfCluster; private const byte UnknownCharClass = (byte)(HebrewCharClass.UnknownCharClass | StartOfCluster); private const byte PunctuationMark = (byte)(HebrewCharClass.PunctuationMark | StartOfCluster); private const byte BaseAcceptsVowel = (byte)(HebrewCharClass.BaseAcceptsVowel | StartOfCluster); private const byte BaseAcceptsAll = (byte)(HebrewCharClass.BaseAcceptsNotShin | StartOfCluster); private const byte BaseAcceptsRafe = (byte)(HebrewCharClass.BaseAcceptsRafe | StartOfCluster); private const byte BaseAcceptsDagesh = (byte)(HebrewCharClass.BaseAcceptsDagesh | StartOfCluster); private const byte ShinBaseChar = (byte)(HebrewCharClass.ShinBaseChar | StartOfCluster); private const byte ShinSinDot = (byte)HebrewCharClass.ShinSinDot; private const byte Dagesh = (byte)HebrewCharClass.Dagesh; private const byte Rafe = (byte)HebrewCharClass.Rafe; private const byte Holam = (byte)HebrewCharClass.Holam; private const byte Hataf = (byte)HebrewCharClass.Hataf; private const byte ThaanaVowel = (byte)HebrewCharClass.Hataf; private const byte Vowel = (byte)HebrewCharClass.Vowel; private const byte Meteg = (byte)HebrewCharClass.LowerMark; private const byte BelowCenterRightMark = (byte)HebrewCharClass.LowerMark; private const byte BelowCenterLeftMark = (byte)HebrewCharClass.LowerMark; private const byte BelowLeftMark = (byte)HebrewCharClass.LowerMark; private const byte Puncta = (byte)HebrewCharClass.Puncta; private const byte LowerRightMark = (byte)HebrewCharClass.LowerRightMark; private const byte AboveRightMark = (byte)HebrewCharClass.UpperMark; private const byte AboveCenterRightMark = (byte)HebrewCharClass.UpperMark; private const byte AboveCenterMark = (byte)HebrewCharClass.UpperMark; private const byte AboveCenterLeftMark = (byte)HebrewCharClass.UpperMark; private const byte UpperLeftMark = (byte)HebrewCharClass.UpperLeftMark; private const byte MasoraCircle = (byte)HebrewCharClass.MasoraCircle; // All the Hebrew Unicode chars (U+590 - U+5FF) classified private static readonly byte[] _hebrewCharClasses = //new HebrewCharClass[] { // U+590 (Hebrew Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F UnknownCharClass, BelowCenterLeftMark,AboveCenterMark, AboveCenterMark, AboveCenterMark, AboveCenterMark, BelowCenterRightMark, AboveCenterLeftMark, AboveCenterLeftMark,UpperLeftMark, LowerRightMark, BelowLeftMark, AboveCenterLeftMark,AboveRightMark, AboveCenterLeftMark,AboveCenterLeftMark, // U+5A0 (Hebrew Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F AboveRightMark, AboveCenterLeftMark,UnknownCharClass,BelowCenterLeftMark, BelowCenterRightMark,BelowLeftMark, BelowCenterLeftMark,BelowCenterRightMark, AboveCenterLeftMark,UpperLeftMark, BelowCenterRightMark,AboveCenterMark, AboveCenterLeftMark,LowerRightMark, UpperLeftMark, MasoraCircle, // U+5B0 (Hebrew Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F Vowel, Hataf, Hataf, Hataf, Vowel, Vowel, Vowel, Vowel, Vowel, Holam, UnknownCharClass, Vowel, Dagesh, Meteg, PunctuationMark, Rafe, // U+5C0 (Hebrew Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F PunctuationMark, ShinSinDot, ShinSinDot, PunctuationMark, Puncta, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, // U+5D0 (Hebrew Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, // U+5E0 (Hebrew Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, ShinBaseChar, BaseAcceptsAll, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, // U+5F0 (Hebrew Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, PunctuationMark, PunctuationMark, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, }; // All the Thaana Unicode chars (U+780 - U+7BF) classifications private static readonly byte[] _thaanaCharClasses = // new HebrewCharClass[] { // U+780 (Thaana Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, // U+790 (Thaana Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, // U+7A0 (Thaana Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, BaseAcceptsVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, ThaanaVowel, // U+7B0 (Thaana Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F ThaanaVowel, BaseAcceptsVowel, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, }; // All the Hebrew Alphabetic Presentation Forms Unicode chars (U+FB1D - U+FB4F) private static readonly byte[] _alphabeticPresentationFormsCharClasses = // new HebrewCharClass[] { // U+FB10 (Presentation Forms Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsAll, Rafe, BaseAcceptsRafe, // U+FB20 (Presentation Forms Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsAll, PunctuationMark, BaseAcceptsAll, BaseAcceptsAll, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsAll, BaseAcceptsAll, // U+FB30 (Presentation Forms Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, UnknownCharClass, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, UnknownCharClass, BaseAcceptsRafe, UnknownCharClass, // U+FB40 (Presentation Forms Unicode) // 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F BaseAcceptsRafe, BaseAcceptsRafe, UnknownCharClass, BaseAcceptsRafe, BaseAcceptsRafe, UnknownCharClass, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsRafe, BaseAcceptsAll, BaseAcceptsDagesh, BaseAcceptsDagesh, BaseAcceptsDagesh, UnknownCharClass, }; #endregion // end of Classification Tables } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007. // Copyright (c) Microsoft Corporation. All rights reserved.
Link Menu
This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- CLSCompliantAttribute.cs
- SqlProcedureAttribute.cs
- PeerNearMe.cs
- FileIOPermission.cs
- ProfileManager.cs
- NavigationPropertyEmitter.cs
- ToolTipService.cs
- BackStopAuthenticationModule.cs
- MobileTemplatedControlDesigner.cs
- Item.cs
- ApplicationManager.cs
- BinaryObjectInfo.cs
- Pointer.cs
- ThreadAbortException.cs
- LongAverageAggregationOperator.cs
- OracleColumn.cs
- DuplicateDetector.cs
- GregorianCalendarHelper.cs
- XPathItem.cs
- PlainXmlSerializer.cs
- XmlDeclaration.cs
- DocumentSchemaValidator.cs
- GenericRootAutomationPeer.cs
- TextLineResult.cs
- XmlQueryOutput.cs
- EnumerableCollectionView.cs
- MergeExecutor.cs
- JsonDataContract.cs
- _SslSessionsCache.cs
- SoapExtensionImporter.cs
- Parsers.cs
- HttpCapabilitiesBase.cs
- XmlSerializerFaultFormatter.cs
- ModelUtilities.cs
- XmlLinkedNode.cs
- AssemblyInfo.cs
- TypePresenter.xaml.cs
- InkCollectionBehavior.cs
- Debug.cs
- EventDescriptor.cs
- BaseCollection.cs
- ZipIOZip64EndOfCentralDirectoryLocatorBlock.cs
- WebPartTransformerCollection.cs
- ReachDocumentReferenceCollectionSerializer.cs
- LocalizeDesigner.cs
- TrackingDataItem.cs
- FatalException.cs
- AgileSafeNativeMemoryHandle.cs
- FieldBuilder.cs
- BinaryKeyIdentifierClause.cs
- TextPenaltyModule.cs
- ObjectDesignerDataSourceView.cs
- GroupQuery.cs
- PolicyException.cs
- UnaryExpression.cs
- WorkflowDurableInstance.cs
- Win32SafeHandles.cs
- ContentPathSegment.cs
- SqlEnums.cs
- SettingsPropertyCollection.cs
- DataContractSerializerMessageContractImporter.cs
- BasicViewGenerator.cs
- ObjectTag.cs
- ObjectKeyFrameCollection.cs
- ImmutableClientRuntime.cs
- UriTemplate.cs
- BamlResourceDeserializer.cs
- KeyBinding.cs
- FusionWrap.cs
- MultipleViewProviderWrapper.cs
- SetStoryboardSpeedRatio.cs
- SystemResources.cs
- ActivityDesignerAccessibleObject.cs
- BindingManagerDataErrorEventArgs.cs
- ArgumentOutOfRangeException.cs
- SHA256CryptoServiceProvider.cs
- ISO2022Encoding.cs
- BooleanConverter.cs
- CodeTryCatchFinallyStatement.cs
- ServerValidateEventArgs.cs
- BaseTemplateBuildProvider.cs
- Formatter.cs
- ResourceReferenceExpression.cs
- VersionedStream.cs
- CustomCredentialPolicy.cs
- SerializationObjectManager.cs
- ReadOnlyPropertyMetadata.cs
- Subtree.cs
- OdbcPermission.cs
- FontCacheLogic.cs
- InteropExecutor.cs
- DataShape.cs
- WebEvents.cs
- BuildManagerHost.cs
- RuntimeResourceSet.cs
- RuntimeConfigLKG.cs
- LineInfo.cs
- RemoteWebConfigurationHostServer.cs
- FontFamilyValueSerializer.cs
- ExpressionEditorSheet.cs