Code:
/ Dotnetfx_Vista_SP2 / Dotnetfx_Vista_SP2 / 8.0.50727.4016 / DEVDIV / depot / DevDiv / releases / Orcas / QFE / wpf / src / Core / CSharp / MS / Internal / Shaping / arabicshape.cs / 1 / arabicshape.cs
using System;
using System.Security;
using System.Security.Permissions;
using System.Diagnostics;
using System.Collections;
using System.Globalization;
using System.Windows;
using System.Windows.Media;
using System.Windows.Media.TextFormatting;
using MS.Internal.FontCache;
using MS.Internal.FontFace;
using MS.Internal.PresentationCore;
using MS.Utility;
namespace MS.Internal.Shaping
{
///
/// ArabicCharClass - enumeration of Arabic character classifications
///
///
///
internal enum ArabicCharClass : byte
{
SyriacAleph, // syriac alaph rules
NonJoiningBase, // character does not join
JoiningBase, // character joins on both sides
RightJoining, // character joins on right (Arabic like waw and Syriac like taw)
RightJoiningRules, // character joins on right with rules (dalath, dotless dalath, rish)
Kashida, // The kashida character
UnknownChar,
LastBaseChar = UnknownChar,
ZWJ,
ZWNJ,
SyriacAbbrevMark,// Syriac Abreviation marker
ArabicMark,// Arabic above diacritics - (064B, 064C, 064E, 064F, 0652, 06E1)
// and below diacritics ( (064D, 0650)
SyriacLowerGreek,
Shadda,// Arabic shadda - (0651)
KoranicUpperMark,// Arabic Qur'anic mark above (0610, 0611, 0612, 0613, 0614, 06D6,
KoranicLowerMark,// Arabic Qur'anic mark below (06E3, 06EA, 06ED)
SuperscriptAlef,// Arabic superscript alef (0670)
Madda,// Arabic madda (0653)
Hamza,// Arabic hamza above/below (0654, 0655)
SyriacLowerLineMark,// Syriac line type below
SyriacSeyameAbove,// Syriac seyame above
SyriacSeyameBelow,// Syriac seyame below
SyriacDotAbove,// Syriac dot above
SyriacDotBelow,// Syriac dot below
SyriacUpperDuot,// Syriac two dots above
SyriacLowerDuot,// Syriac two dots below
SyriacUpperTriot,// Syriac three dots above
SyriacLowerTriot,// Syriac three dots below
NumberOfArabicCharClasses,
};
///
/// ArabicShapeClass - enumeration of Arabic shape classifications
///
///
/// The ArabicShape._arabicTextFeatures array has one member for each of
/// these shape classifications. The order of these enum values reflects
/// the fact that two of the shape classes (InitialShape and MedialShape)
/// don't have a correlated ShapeStateTable because there's no decision to
/// make when these are the current class (all characters produce the same
/// state change). These two classes are thus at the end of the list.
///
internal enum ArabicShapeClass : byte
{
SameShape,
UninitializedState = SameShape, // used for detecting initialization of state machine
NotShaped,
IsolatedShape,
InvalidBaseShape,
FinalShape,
Medial2Shape,
Final2Shape,
Final3Shape,
LastShapeWithStateTable = Final3Shape,
InitialShape,
MedialShape,
NumberOfShapesWithFeatureTag
};
///
/// The ArabicShape, class defines the Arabic Shaping Engine. The IShaper,
/// interface implemented provides the shaping methods for Arabic and Syrics Scripts.
/// The Shaping engine internally uses the Arabic Shaping Finite State machine to define
/// the ordering and generation of glyphs.
///
internal class ArabicShape : BaseShape
{
///
/// Composition Decomposition Features
///
private static readonly Feature[] _substitutionPass1Features =
{
new Feature(0,ushort.MaxValue,(uint)FeatureTags.LocalizedForms,1),
new Feature(0,ushort.MaxValue,(uint)FeatureTags.GlyphCompositionDecomposition,1),
};
///
/// Features for Required Ligatures and Connection Forms
///
private static readonly uint[] _substitutionPass3Features =
{
// these features are optional (can be suppressed by GetGlyphs caller)
(uint)FeatureTags.StandardLigatures,
(uint)FeatureTags.DiscretionaryLigatures,
(uint)FeatureTags.ContextualSwash,
(uint)FeatureTags.RequiredLigatures,
(uint)FeatureTags.ContextualAlternates,
(uint)FeatureTags.MarkPositioningviaSubstitution,
};
private static readonly Feature[] _arabicPositioningFeatures =
{
new Feature(0,ushort.MaxValue,(uint)FeatureTags.CursivePositioning,1),
new Feature(0,ushort.MaxValue,(uint)FeatureTags.MarkPositioning,1),
new Feature(0,ushort.MaxValue,(uint)FeatureTags.MarktoMarkPositioning,1),
new Feature(0,ushort.MaxValue,(uint)FeatureTags.Kerning,1),
};
static private readonly ScriptTags[] _supportedScripts = new ScriptTags[]{ScriptTags.Arabic, ScriptTags.Syriac};
///
/// Constructor for the Arabic Open Type Shaping Engine.
///
internal ArabicShape()
{
_textFlowDirection = TextFlowDirection.RTL;
}
public override ScriptTags[] SupportedScripts
{
get
{
return _supportedScripts;
}
}
///
/// ArabicShape.ApplyPositioningFeatures - Arabic implementation of the GetGlyphPlacement helper.
/// This method goes through a list of glyphs and adds placement information.
///
/// the wrapper for glyph advances, offset arrays
/// metrics for all the positioning features
/// Set of gpos features to be applied to the unicode run.
/// result of applying features
///
/// Critical - calls critical code
///
[SecurityCritical]
protected override OpenTypeLayoutResult ApplyPositioningFeatures(
ref PlacementWorkspace placementInfo,
ref LayoutMetrics layoutMetrics,
FeatureSet featureSet )
{
ShaperFontClient fontClient = placementInfo.FontClient;
OpenTypeLayoutResult layoutResult;
if(fontClient.ScriptTag == (uint)ScriptTags.Arabic)
{
layoutResult = fontClient.PositionGlyphs(
ref placementInfo,
ref layoutMetrics,
_arabicPositioningFeatures, // In: List of features to apply
_arabicPositioningFeatures.Length );
}
else
{
// syriac used standard positioning features
layoutResult = fontClient.PositionGlyphs(
ref placementInfo,
ref layoutMetrics,
_defaultPositioningFeatures, // In: List of features to apply
_defaultPositioningFeatures.Length);
}
return layoutResult;
}
///
/// ArabicShape.ApplySubstitutionFeatures - default implementation of the GetGlyphs() helper function.
///
/// shaping currentRun
/// Set of gsub features to be applied to the unicode run.
/// result of applying features
///
/// Critical - this method calls unsafe methods.
///
[SecurityCritical]
protected override OpenTypeLayoutResult ApplySubstitutionFeatures(
ref ShapingWorkspace currentRun,
FeatureSet featureSet )
{
ShaperFontClient fontClient = currentRun.FontClient;
OpenTypeLayoutResult layoutResult;
// Pass 1 - Apply the ccmp feature to the Glyphs
layoutResult = fontClient.SubstituteGlyphs(
ref currentRun,
_substitutionPass1Features,
_substitutionPass1Features.Length );
if (layoutResult == OpenTypeLayoutResult.Success &&
ShaperBuffers.TextFeatures.FeaturesCount > _substitutionPass3Features.Length)
{
// Pass 2 - Apply the Arabic Text Features from currentRun
layoutResult = fontClient.SubstituteGlyphs(
ref currentRun,
ShaperBuffers.TextFeatures.Features,
ShaperBuffers.TextFeatures.FeaturesCount );
}
if (layoutResult == OpenTypeLayoutResult.Success)
{
// Pass 3 - Apply the Required Ligatures and Connection Form Features, etc.
// the _substitutionPass3Features are all copied to the front of
// ShaperBuffers.TextFeatures. So, go through all these, enabling
// all that are appropriate.
for (ushort featureIx = (ushort)(currentRun.AreLigaturesInhibited ? 3 : 0);
featureIx < _substitutionPass3Features.Length;
++featureIx)
{
ShaperBuffers.TextFeatures.Features[featureIx].Parameter = 1; // enable this one
}
layoutResult = fontClient.SubstituteGlyphs(
ref currentRun,
ShaperBuffers.TextFeatures.Features,
_substitutionPass3Features.Length );
// (re)disable all these features (for next GetGlyphs)
for (ushort featureIx = (ushort)(currentRun.AreLigaturesInhibited ? 3 : 0);
featureIx < _substitutionPass3Features.Length;
++featureIx)
{
ShaperBuffers.TextFeatures.Features[featureIx].Parameter = 0; // disable this one
}
}
return layoutResult;
}
///
/// ArabicShape.GetCharClassifier - Base shape
///
///
/// This will normally be overridden by derived shapers. It is used in OnLoadFont
///
protected override ShaperCharacterClassifier GetCharClassifier(ScriptTags scriptTag, GlyphTypeface fontFace)
{
return new ArabicCharClassifier (scriptTag, fontFace);
}
///
/// ArabicShape.GetGlyphs - Arabic implementation of the GetGlyphs() helper function.
///
/// shaping currentRun
/// Text item
/// number of glyphs
///
/// Critical - calls critical code
///
[SecurityCritical]
protected override int GetGlyphs ( ref ShapingWorkspace currentRun, Item item )
{
uint currentDiacriticsMap = 0;
bool isInvalidChar = true; // if first char is a diacritic, we're ready!
int startingCharIx = currentRun.CurrentCharIx;
int startingGlyphIx = currentRun.NextGlyphIx;
int charsCount = 0;
bool isSamPending = false;
ushort samGlyph = 0;
RecordTraceEvent(MS.Utility.EventType.StartEvent,
"ArabicShape Init Start");
// initialize the features array
ShaperBuffers.InitializeFeatureList(0,(ushort)_substitutionPass3Features.Length);
if (ShaperBuffers.TextFeatures.FeaturesCount == 0)
{
// We include the _substitutionPass3Features in this feature
// array (note that they are all set to disabled)
for (int i = 0; i < _substitutionPass3Features.Length; ++i)
{
ShaperBuffers.TextFeatures.AddFeature(
new Feature( 0,
ushort.MaxValue,
_substitutionPass3Features[i],
0 ));
}
}
// initialize the cluster cop/state machine
ArabicShapeFSM _stateMachine = new ArabicShapeFSM( ref currentRun, ShaperBuffers.TextFeatures );
RecordTraceEvent(MS.Utility.EventType.EndEvent,
"ArabicShape Init End");
RecordTraceEvent(MS.Utility.EventType.StartEvent,
"ArabicShape ShapeArabicText Start");
// Shape and initialize the glyph list
// process the char stream, creating shape info, applying features
// as necessary
char currChar;
ushort currGlyph;
CharShapeInfo currShape;
while ( currentRun.GetNextCharProperties (out currChar, out currGlyph, out currShape) )
{
ArabicCharClass currClass = (ArabicCharClass)(currShape & CharShapeInfo.ShaperClassMask);
_stateMachine.StepToNextState(ref currentRun, currClass);
// validate this character
if (currClass > ArabicCharClass.SyriacAbbrevMark)
{
// This is a diacritic. Check for validity (only one of each
// diacritic class per base).
if (!isInvalidChar)
{
uint diacMapEntry = (uint)(1 << (int)currClass);
if ((diacMapEntry & currentDiacriticsMap) != 0)
{
// This is a duplicate mark (not allowed), or
// the current state doesn't alloso we will insert a
// dotted circle.
isInvalidChar = true;
_stateMachine.ForceInvalidBaseState();
}
else
{
currentDiacriticsMap |= diacMapEntry;
}
}
if (isInvalidChar)
{
currShape |= CharShapeInfo.RequiresInsertedBase;
}
}
else
{
if (currClass == ArabicCharClass.SyriacAbbrevMark)
{
if (!isSamPending)
{
// keep this glyph for use in the future
if (samGlyph == 0)
{
samGlyph = currGlyph;
}
}
else
{
currentRun.AddGlyphs(1);
currentRun.SetGlyphPropertiesUsingGlyph(currShape, samGlyph);
if ((currShape & CharShapeInfo.IsStartOfCluster) != 0)
{
currShape ^= CharShapeInfo.IsStartOfCluster;
}
}
isSamPending = true;
}
currentDiacriticsMap = 0;
isInvalidChar = false;
}
// If there's a SAM pending, insert a completion
// SAM if its appropriate (ie, if the current character
// is an illegal diacritic or the current character isn't
// legal as an abbreviation)
if (isSamPending &&
(isInvalidChar ||
!ArabicCharClassifier.IsCharacterValidWithSyriacAbrevMark(currentRun.CurrentChar)))
{
// handle the abbreviation mark
currentRun.AddGlyphs(1);
currentRun.SetGlyphPropertiesUsingGlyph(currShape, samGlyph);
if ((currShape & CharShapeInfo.IsStartOfCluster) != 0)
{
currShape ^= CharShapeInfo.IsStartOfCluster;
}
isSamPending = false;
}
currentRun.SetGlyphPropertiesUsingGlyph ( currShape, currGlyph );
++charsCount;
}
RecordTraceEvent(MS.Utility.EventType.EndEvent,
"ArabicShape ShapeArabicText End");
return (currentRun.GlyphsCount - startingGlyphIx); // we're done
}
}
///
/// ArabicShapeFSM - Arabic shaping state machine
///
///
///
internal struct ArabicShapeFSM
{
private ArabicShapeClass _currentShape;
private ArabicShapeClass _previousShape;
private ushort _currentStateRowIx;
private ArabicCharClass _currentCharClass;
private bool _isSyriacText;
private ShaperFeaturesList _featuresList;
///
/// ArabicShapeFSM constructor
///
public ArabicShapeFSM (ref ShapingWorkspace currentRun,
ShaperFeaturesList featuresList)
{
_previousShape = ArabicShapeClass.UninitializedState;
_currentShape = ArabicShapeClass.NotShaped;
_isSyriacText = (currentRun.FontClient.ScriptTag == (uint)ScriptTags.Syriac);
_currentCharClass =
(currentRun.CurrentCharIx == 0 && currentRun.HasLeadingJoin) ?
ArabicCharClass.ZWJ : ArabicCharClass.ZWNJ;
_currentStateRowIx = 0;
_featuresList = featuresList;
}
///
/// ArabicShapeFSM.ForceInvalidBaseState - force state machine to state
///
///
/// This is used to force the state machine to invalid base state
///
public void ForceInvalidBaseState ( )
{
_previousShape = _currentShape = ArabicShapeClass.InvalidBaseShape; // save new shape
_currentStateRowIx = (ushort)(_baseTableFirstEntryIx[ (ushort)_currentShape ] +
_rowFirstEntryIx[ (ushort)ArabicCharClass.UnknownChar ]);
}
public bool IsInvalidBaseState
{
get { return _currentShape == ArabicShapeClass.InvalidBaseShape; }
}
///
/// ArabicShapeFSM.SetStartState - start of new syllable (potentially)
///
///
/// This routine is used at the start of each cluster. If the base + this
/// first non-base char are the valid start of a new cluster, return
///
///
/// Critical - calls critical code (ForceInvalidBaseState)
///
[SecurityCritical]
private void SetStartState ( ref ShapingWorkspace currentRun,
ArabicCharClass firstCharClass )
{
Debug.Assert (_currentShape == ArabicShapeClass.NotShaped &&
_currentStateRowIx == 0, "Arabic FSM SetStartState should not be called now.");
_previousShape = _currentShape;
if (firstCharClass > ArabicCharClass.SyriacAbbrevMark)
{
// diacritics may not be first character
ForceInvalidBaseState ();
}
else
{
if ( firstCharClass <= ArabicCharClass.RightJoiningRules &&
firstCharClass != ArabicCharClass.NonJoiningBase)
{
// All joining base chars mean we start in a different state than
// NotShaped.
_currentShape =
(_currentCharClass == ArabicCharClass.ZWJ) ?
ArabicShapeClass.FinalShape : ArabicShapeClass.IsolatedShape;
_featuresList.AddFeature(currentRun.CurrentCharIx,
_arabicTextFeatures[ (int)_currentShape ]);
}
_currentCharClass = firstCharClass;
_currentStateRowIx =
(ushort)(_baseTableFirstEntryIx[ (int)_currentShape ] +
_rowFirstEntryIx[ (int) _currentCharClass ]);
}
}
///
/// ArabicShapeFSM.StepToNextState - process the latest character.
///
///
/// This routine steps the state machine to its next state
/// based on the current state, the last character that caused a
/// state change and the next char. It returns the new shape state
/// (which includes feature tag information)
///
/// shape info for this character
///
/// Critical - calls critical code, uses unsafe accessors
///
[SecurityCritical]
public void StepToNextState ( ref ShapingWorkspace currentRun, ArabicCharClass nextCharClass )
{
Debug.Assert ( (byte)nextCharClass < NumberOfCharClasses );
if ( _currentShape != ArabicShapeClass.SameShape )
{
if (_previousShape == ArabicShapeClass.UninitializedState)
{
SetStartState(ref currentRun, nextCharClass);
return;
}
// keep track of where we're coming from.
_previousShape = _currentShape; // remember last shape
}
// get the next shape entry...
if (nextCharClass > ArabicCharClass.SyriacAbbrevMark)
{
// All diacritics are checked via _diacriticShapeStateTable[]
// The current shape will end up either as InvalidBaseShape or SameShape
ushort diacTableIx = (ushort)(_rowFirstEntryIx[ (int)_previousShape - 1 ] + (ushort)_currentCharClass);
_currentShape = (ArabicShapeClass) _arabicDiacriticShapeStateTable[ diacTableIx ]; // save new shape
Debug.Assert(_currentShape == ArabicShapeClass.SameShape ||
_currentShape == ArabicShapeClass.InvalidBaseShape,"Arabic diacritic table has invalid value");
if (_previousShape == ArabicShapeClass.InvalidBaseShape ||
_currentShape == ArabicShapeClass.InvalidBaseShape)
{
ForceInvalidBaseState();
}
}
else
{
// get the shape state table entry.
byte shapeTableEntry = _arabicShapeStateTable[ _currentStateRowIx + (ushort) nextCharClass];
switch (shapeTableEntry)
{
case NoShapeChange:
// this means this latest character doesn't affect the current state AT ALL.
_currentShape = ArabicShapeClass.SameShape;
break;
case InvalidBase:
ForceInvalidBaseState();
break;
default:
// change our state (or shape)
_currentShape = (ArabicShapeClass) (shapeTableEntry & 0xf); // save new shape
Debug.Assert(_currentShape <= ArabicShapeClass.LastShapeWithStateTable,"invalid entry in Arabic state table");
_currentCharClass = nextCharClass; // keep up-to-date
// adjust the state table indices..
// _currentStartStateIx == the first entry in the active state table row.
_currentStateRowIx = (ushort)(_baseTableFirstEntryIx[ (ushort) _currentShape ] +
_rowFirstEntryIx[ (ushort) nextCharClass ]);
Debug.Assert( _currentStateRowIx < _baseTableFirstEntryIx[ (ushort) _currentShape + 1 ],"invalid entry in Arabic state table");
UpdateShapeInfo(ref currentRun, shapeTableEntry);
break;
}
}
}
///
/// ArabicShapeFSM.UpdateShapeInfo - process the latest character.
///
///
/// This routine returns the new shape state based on the newest
/// state info
///
/// shape info for this character
///
/// Critical - calls critical code, uses unsafe accessors
///
[SecurityCritical]
private void UpdateShapeInfo ( ref ShapingWorkspace currentRun, byte shapeTableEntry )
{
// Now that the state machine's previous/current state has been resolved we need to
// update the previous shape information. Check if the upper nibble of shapeTableEntry
// holds information for updating the previous shape; if not, then we only will be
// setting this char's shape.
if ((shapeTableEntry >> 4) != NoShapeChange)
{
ArabicShapeClass previousShape = _previousShape;
_previousShape = (ArabicShapeClass) (shapeTableEntry >> 4);
if (_isSyriacText && previousShape == ArabicShapeClass.IsolatedShape)
{
// for Syriac text, the previous isolated shape did not cause
// a feature to be added (we treated it as a "Not Shaped") so
// we want to add a feature, not update one
_featuresList.AddFeature( currentRun.PreviousCharIx,
_arabicTextFeatures[(int)_previousShape] );
}
else
{
// update the previous shape's feature tag. If this updated feature tag matches
// the latestCountedFeature, then its not a new feature and we don't want to set
// the "start of feature" flag. For example, in the following sequence...
//
// Event Feature Tags:
// 1st char processed, set to Isolated form Iso N.A N.A. N.A.
// 2nd char, changes 1st form, sets own form Ini Final N.A. N.A.
// 3rd char, changes 2nd form, set own Ini Mid Final N.A.
// 4th char, changes 3rd form, sets own Ini Mid Mid Final
//
// ... we expect the "start of feature" flag to be set on the 1st, 2nd, and 4th
// shapes (once we've processed all four chars)
_featuresList.UpdatePreviousShapedChar(_arabicTextFeatures[(int)_previousShape]);
}
}
// now set up the current shape...
// (for syriac "isolated shape", treat as "not shaped")
if ( !( _isSyriacText && _currentShape == ArabicShapeClass.IsolatedShape) )
{
_featuresList.AddFeature( currentRun.CurrentCharIx,
_arabicTextFeatures[(int)_currentShape] );
}
}
// these const's are for making the _baseCharInputShapeStateTable more
// readable.
private const byte NoShapeChange = (byte)ArabicShapeClass.SameShape;
private const byte ToNotShaped = (byte) ArabicShapeClass.NotShaped;
private const byte ToIsolated = (byte) ArabicShapeClass.IsolatedShape;
private const byte ToFinal = (byte) ArabicShapeClass.FinalShape;
private const byte ToFinal2 = (byte) ArabicShapeClass.Final2Shape;
private const byte ToFinal3 = (byte) ArabicShapeClass.Final3Shape;
private const byte FromIsolatedShape = ToIsolated << 4;
private const byte IsolatedToNotShaped = FromIsolatedShape | ToNotShaped;
private const byte IsolatedToIsolated = FromIsolatedShape | ToIsolated;
private const byte IsolatedToFinal2 = FromIsolatedShape | ToFinal2;
private const byte FromInitialShape = (byte) ArabicShapeClass.InitialShape << 4;
private const byte InitialToNotShaped = FromInitialShape | ToNotShaped;
private const byte InitialToFinal = FromInitialShape | ToFinal;
private const byte FromMedialShape = (byte) ArabicShapeClass.MedialShape << 4;
private const byte MedialToNotShaped = FromMedialShape | ToNotShaped;
private const byte MedialToFinal = FromMedialShape | ToFinal;
private const byte FromMedial2Shape = (byte) ArabicShapeClass.Medial2Shape << 4;
private const byte Medial2ToNotShaped = FromMedial2Shape | ToNotShaped;
private const byte Medial2ToIsolated = FromMedial2Shape | ToIsolated;
private const byte Medial2ToFinal2 = FromMedial2Shape | ToFinal2;
private const byte FromFinalShape = (byte) ArabicShapeClass.FinalShape << 4;
private const byte FinalToNotShaped = FromFinalShape | ToNotShaped;
private const byte InvalidBase = (byte) ArabicShapeClass.InvalidBaseShape;
private const byte NumberOfCharClasses = (byte) ArabicCharClass.NumberOfArabicCharClasses;
// This array (_arabicShapeStateTable) is a compressed version of the effective array,
// shapeState[ ][ ][ ].
// The current base shape is determined by the latest base character seen. A given base shape
// can be selected by one or more base char class. Each base char shape + class duple is a
// row in this array; each row has an entry (next shape state) for every char class that affects
// the shape state (there're 10 such char classes - the first 10 class defs in ArabicCharClass).
//
// So, use this array to find the next shape state, effectively...
// next shape = _arabicShapeStateTable [ + ]
//
private static readonly byte[] _arabicShapeStateTable =
{
//
// Current state is NotShaped...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 0-SyriacAleph,
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 1-NonJoiningBase,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 2-JoiningBase,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 3-RightJoining,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 4-RightJoiningRules,
ToFinal, ToNotShaped, ToFinal, ToFinal, ToFinal, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 5-Kashida,
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 6-UnknownChar,
ToFinal, ToNotShaped, ToFinal, ToFinal, ToFinal, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 7-ZWJ,
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 8-ZWNJ,
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 9-SyriacAbbrevMark,
//
// Current state is Isolated Shape...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 0-SyriacAleph,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // 1-NonJoiningBase,
InitialToFinal,ToNotShaped, InitialToFinal,InitialToFinal,InitialToFinal,InitialToNotShaped, ToNotShaped, InitialToNotShaped, ToNotShaped, ToNotShaped, // 2-JoiningBase,
ToFinal2, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 3-RightJoining,
ToFinal3, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 4-RightJoiningRules,
// Current state is Invalid Base Shape...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 0-SyriacAleph,
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 1-NonJoiningBase,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 2-JoiningBase,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 3-RightJoining,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 4-RightJoiningRules,
ToFinal, ToNotShaped, ToFinal, ToFinal, ToFinal, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 5-Kashida,
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 6-UnknownChar,
ToFinal, ToNotShaped, ToFinal, ToFinal, ToFinal, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 7-ZWJ,
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 8-ZWNJ,
NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,// 9-SyriacAbbrevMark,
//
// Current state is Final Shape...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
Medial2ToFinal2,ToNotShaped,Medial2ToIsolated,Medial2ToIsolated,Medial2ToIsolated,Medial2ToNotShaped, ToNotShaped, Medial2ToNotShaped, ToNotShaped, NoShapeChange, // 0-SyriacAleph,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // 1-NonJoiningBase,
MedialToFinal,ToNotShaped, MedialToFinal,MedialToFinal,MedialToFinal,MedialToNotShaped,ToNotShaped, MedialToNotShaped, ToNotShaped, NoShapeChange, // 2-JoiningBase,
ToFinal2, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, NoShapeChange, // 3-RightJoining,
ToFinal3, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, NoShapeChange, // 4-RightJoiningRules,
//
// Current state is Medial2 Shape...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
ToFinal2, FinalToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToFinal2, ToNotShaped, ToFinal2, FinalToNotShaped, NoShapeChange, // 0-SyriacAleph,
//
// Current state is Final2 Shape...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
IsolatedToFinal2, ToNotShaped, IsolatedToIsolated, IsolatedToIsolated, IsolatedToIsolated, IsolatedToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, NoShapeChange, // 0-SyriacAleph,
//
// Current state is Final3 Shape...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
IsolatedToFinal2, ToNotShaped, IsolatedToIsolated, IsolatedToIsolated, IsolatedToIsolated, IsolatedToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, NoShapeChange, // 0-SyriacAleph,
};
// This table is used differently than the _arabicShapeStateTable[]. This table is used
// when the latest char is a diacritic class. The index is dependent on the current base
// shape and class. For all the diacritics, there are actions possible; either the diacritic
// causes no state change or it causes the state to change to InvalidBase.
private static readonly byte[] _arabicDiacriticShapeStateTable =
{
//
// Current char is diacritic ...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
InvalidBase, NoShapeChange,InvalidBase, InvalidBase, InvalidBase, NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,InvalidBase, // NotShaped,
NoShapeChange,InvalidBase, NoShapeChange,NoShapeChange,NoShapeChange,InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // IsolatedShape,
InvalidBase, NoShapeChange,InvalidBase, InvalidBase, InvalidBase, NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange, // InvalidBaseShape,
NoShapeChange,InvalidBase, NoShapeChange,NoShapeChange,NoShapeChange,InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // FinalShape,
NoShapeChange,InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // Medial2Shape,
NoShapeChange,InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // Final2Shape,
NoShapeChange,InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // Final3Shape,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // InitialShape,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // MedialShape,
};
// this array has the starting offset for the current shape state. This
// offset is relative to the first entry in _baseCharShapeStateTable[]. The
// numbers are 'tally of all previous shape rows X 10 entries per row'.
private static readonly ushort[] _baseTableFirstEntryIx =
{
0, // SameShape
0, // NotShaped
10 * 10, // IsolatedShape
15 * 10, // InvalidBaseShape
25 * 10, // FinalShape
30 * 10, // Medial2Shape
31 * 10, // Final2Shape
32 * 10, // Final3Shape
33 * 10, // Initial (Invalid)
33 * 10, // Medial (Invalid)
};
// This array has the starting offset for the 'current char class'. This
// offset is relative to the first entry in the current shape state in whatever
// state lookup table is active. This table is used instead of multiplying the current
// row value by 10 every time we access the shape state table. This is
// used for both the _baseCharShapeStateTable[] and the _diacriticShapeStateTable[]
// lookups. The rows of the two arrays are based on different variables; each
// row in _baseCharShapeStateTable is selected based on the next char class
// while the row for _diacriticShapeStateTable is selected based on the next
// character's shape.
private static readonly ushort[] _rowFirstEntryIx =
{
// row entry ix _baseCharShapeStateTable _diacriticShapeStateTable
0, // SyriacAleph NotShaped
10, // NonJoiningBase IsolatedShape
20, // JoiningBase InvalidBaseShape
30, // RightJoining FinalShape
40, // RightJoiningRules Medial2Shape
50, // Kashida Final2Shape
60, // UnknownChar Final3Shape
70, // ZWNJ InitialShape
80, // ZWJ MedialShape
90, // SyriacAbbrevMark
};
private static readonly uint[] _arabicTextFeatures =
{
0, // Same (this is required by ShaperFeaturesList)
1, // NotShaped (this is required by ShaperFeaturesList)
(uint)FeatureTags.IsolatedForms, // IsolatedShape
1, // InvalidBaseShape
(uint)FeatureTags.TerminalForms, // FinalShape
(uint)FeatureTags.MedialForms2, // Medial2Shape
(uint)FeatureTags.TerminalForms2, // Final2Shape
(uint)FeatureTags.TerminalForms3, // Final3Shape
(uint)FeatureTags.InitialForms, // InitialShape
(uint)FeatureTags.MedialForms, // MedialShape
};
}
///
/// ArabicCharClassifier -
///
internal class ArabicCharClassifier : ShaperCharacterClassifier
{
public ArabicCharClassifier(ScriptTags scriptTag, GlyphTypeface fontFace) : base(scriptTag, fontFace)
{
_unknownClass = NonJoiningBase;
_spaceClass = NonJoiningBase;
_zwControlClass = NonJoiningBase;
_zwjClass = JoiningBase;
_zwnjClass = NonJoiningBase;
_shyClass = (byte)ArabicCharClass.UnknownChar;
_firstChar = '\u0600'; // this is the first Arabic Unicode char
_lastChar = '\u077F'; // this is the last Syriac Unicode char
_xorMask = 0x600;
_xorRange = 0x180;
_charClassTable = _arabicSyriacCharClasses;
}
const char SyriacExtraDiacriticsUnicodeRangeStart = '\u0300';
// const ushort SyriacExtraDiacriticsUnicodeRangeEnd = '\u033F';
const char SyriacExtraUnicodeXorMask = '\u03c0';
const char SyriacExtraUnicodeXorRange = '\u0040';
///
/// ArabicCharClassifier.ToShapeInfo - returns CharShapeInfo for the Unicode character
///
public override CharShapeInfo ToShapeInfo (char unicodeChar )
{
CharShapeInfo charShape = base.ToShapeInfo(unicodeChar);
ArabicCharClass charClass = (ArabicCharClass)(charShape & CharShapeInfo.ShaperClassMask);
if (charClass <= ArabicCharClass.LastBaseChar)
{
if (charClass == ArabicCharClass.UnknownChar &&
(unicodeChar ^ SyriacExtraUnicodeXorMask) < SyriacExtraUnicodeXorRange)
{
charShape = (CharShapeInfo)(_syriacExtraClasses[unicodeChar - SyriacExtraDiacriticsUnicodeRangeStart]);
charClass = (ArabicCharClass)(charShape & CharShapeInfo.ShaperClassMask);
}
}
return charShape;
}
///
/// Check if the character in context has a Syriac Abbreviation character.
///
///
public static bool IsCharacterValidWithSyriacAbrevMark(char currChar )
{
return ( (currChar >= '\u070f' && currChar <= '\u074F') ||
( (currChar ^ SyriacExtraUnicodeXorMask) < SyriacExtraUnicodeXorRange &&
_syriacExtraClasses[currChar - SyriacExtraDiacriticsUnicodeRangeStart] != NonJoiningBase));
}
#region Classification Tables
// these consts are so the tables below will be more readable
private const ArabicCharClass StartOfCluster = (ArabicCharClass)CharShapeInfo.IsStartOfCluster;
private const byte NonJoiningBase = (byte)(ArabicCharClass.NonJoiningBase | StartOfCluster);
private const byte JoiningBase = (byte)(ArabicCharClass.JoiningBase | StartOfCluster);
private const byte SyriacAleph = (byte)(ArabicCharClass.SyriacAleph | StartOfCluster);
private const byte RightJoining = (byte)(ArabicCharClass.RightJoining | StartOfCluster);
private const byte RightJoiningRules = (byte)(ArabicCharClass.RightJoiningRules | StartOfCluster);
private const byte Kashida = (byte)(ArabicCharClass.Kashida | StartOfCluster);
private const byte ArabicMark = (byte)ArabicCharClass.ArabicMark;
private const byte Shadda = (byte)ArabicCharClass.Shadda;
private const byte KoranicUpperMark = (byte)ArabicCharClass.KoranicUpperMark;
private const byte KoranicLowerMark = (byte)ArabicCharClass.KoranicLowerMark;
private const byte SuperscriptAlef = (byte)ArabicCharClass.SuperscriptAlef;
private const byte Madda = (byte)ArabicCharClass.Madda;
private const byte Hamza = (byte)ArabicCharClass.Hamza;
private const byte SyriacLowerLineMark = (byte)ArabicCharClass.SyriacLowerLineMark;
private const byte SyriacSeyameAbove = (byte)ArabicCharClass.SyriacSeyameAbove;
private const byte SyriacSeyameBelow = (byte) ArabicCharClass.SyriacSeyameBelow;
private const byte SyriacDotAbove = (byte) ArabicCharClass.SyriacDotAbove;
private const byte SyriacDotBelow = (byte) ArabicCharClass.SyriacDotBelow;
private const byte SyriacUpperDuot = (byte)ArabicCharClass.SyriacUpperDuot;
private const byte SyriacLowerDuot = (byte)ArabicCharClass.SyriacLowerDuot;
private const byte SyriacUpperTriot = (byte)ArabicCharClass.SyriacUpperTriot;
private const byte SyriacLowerTriot = (byte)ArabicCharClass.SyriacLowerTriot;
private const byte SyriacAbbrevMark = (byte)ArabicCharClass.SyriacAbbrevMark;
// some Syriac aliases
private const byte SyriacUpperGreek = ArabicMark;
private const byte SyriacLowerGreek = (byte)ArabicCharClass.SyriacLowerGreek;
private const byte SyriacOtherMark = Shadda;
private const byte SyriacUpperDotted = KoranicUpperMark;
private const byte SyriacLowerDotted = KoranicLowerMark;
private const byte SyriacQushshaya = SuperscriptAlef;
private const byte SyriacRukkakha = Madda;
private const byte SyriacUpperLineMark = Hamza;
static byte[] _arabicSyriacCharClasses =
{
// Arabic
//060 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
//061 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
KoranicUpperMark, KoranicUpperMark, KoranicUpperMark, KoranicUpperMark,
KoranicUpperMark, KoranicUpperMark, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
//062 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, RightJoining, RightJoining,
RightJoining, RightJoining, JoiningBase, RightJoining,
JoiningBase, RightJoining, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, RightJoining,
//063 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
RightJoining, RightJoining, RightJoining, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
//064 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
Kashida, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
RightJoining, JoiningBase, JoiningBase, ArabicMark,
ArabicMark, ArabicMark, ArabicMark, ArabicMark,
//065 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
ArabicMark, Shadda, ArabicMark, Madda,
Hamza, Hamza, ArabicMark, ArabicMark,
ArabicMark, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
//066 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, JoiningBase, JoiningBase,
//067 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
SuperscriptAlef, RightJoining, RightJoining, RightJoining,
NonJoiningBase, RightJoining, RightJoining, RightJoining,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
//068 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
RightJoining, RightJoining, RightJoining, RightJoining,
RightJoining, RightJoining, RightJoining, RightJoining,
//069 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
RightJoining, RightJoining, RightJoining, RightJoining,
RightJoining, RightJoining, RightJoining, RightJoining,
RightJoining, RightJoining, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
//06A 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
//06B 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
//06C 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
RightJoining, JoiningBase, RightJoining, RightJoining,
RightJoining, RightJoining, RightJoining, RightJoining,
RightJoining, RightJoining, RightJoining, RightJoining,
JoiningBase, RightJoining, JoiningBase, RightJoining,
//06D 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
JoiningBase, JoiningBase, RightJoining, RightJoining,
NonJoiningBase, RightJoining, KoranicUpperMark, KoranicUpperMark,
KoranicUpperMark, KoranicUpperMark, KoranicUpperMark, KoranicUpperMark,
KoranicUpperMark, NonJoiningBase, NonJoiningBase, KoranicUpperMark,
//06E 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
KoranicUpperMark, ArabicMark, KoranicUpperMark, KoranicLowerMark,
KoranicUpperMark, NonJoiningBase, NonJoiningBase, KoranicUpperMark,
KoranicUpperMark, NonJoiningBase, KoranicLowerMark, KoranicUpperMark,
KoranicUpperMark, KoranicLowerMark, RightJoining, RightJoining,
//06F 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, JoiningBase, JoiningBase,
JoiningBase, NonJoiningBase, NonJoiningBase, JoiningBase,
// Syriac
//070 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, SyriacAbbrevMark,
//071 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
SyriacAleph, SyriacOtherMark, JoiningBase, JoiningBase,
JoiningBase, RightJoiningRules, RightJoiningRules,RightJoining,
RightJoining, RightJoining, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, RightJoining, JoiningBase,
//072 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
RightJoining, JoiningBase, RightJoiningRules, JoiningBase,
RightJoining, NonJoiningBase, NonJoiningBase, RightJoiningRules,
//073 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
SyriacUpperGreek, SyriacLowerGreek, SyriacUpperDotted, SyriacUpperGreek,
SyriacLowerGreek, SyriacUpperDotted, SyriacUpperGreek, SyriacLowerGreek,
SyriacLowerDotted, SyriacLowerDotted, SyriacUpperGreek, SyriacLowerGreek,
SyriacLowerDotted, SyriacUpperGreek, SyriacLowerGreek, SyriacUpperDotted,
//074 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
SyriacOtherMark, SyriacQushshaya, SyriacRukkakha, SyriacUpperDuot,
SyriacLowerDuot, SyriacUpperTriot, SyriacLowerTriot, SyriacUpperLineMark,
SyriacLowerLineMark,SyriacOtherMark, SyriacOtherMark, NonJoiningBase,
NonJoiningBase, RightJoining, JoiningBase, JoiningBase,
// Arabic supplemental characters
//075 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, RightJoining, RightJoining, RightJoining,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
//076 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, RightJoining,
RightJoining, JoiningBase, NonJoiningBase, NonJoiningBase,
//077 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
};
static byte[] _syriacExtraClasses =
{
// Extra Syriac Diacritics
//030 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, Hamza,
Hamza, NonJoiningBase, NonJoiningBase, SyriacDotAbove,
SyriacSeyameAbove, NonJoiningBase, SyriacQushshaya, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
//031 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
//032 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, SyriacDotBelow,
SyriacSeyameBelow, Madda, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, SyriacLowerLineMark,SyriacLowerLineMark, NonJoiningBase,
//033 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
SyriacLowerLineMark,SyriacLowerLineMark,NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
};
#endregion // end of Classification Tables
}
}
// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// Copyright (c) Microsoft Corporation. All rights reserved.
using System;
using System.Security;
using System.Security.Permissions;
using System.Diagnostics;
using System.Collections;
using System.Globalization;
using System.Windows;
using System.Windows.Media;
using System.Windows.Media.TextFormatting;
using MS.Internal.FontCache;
using MS.Internal.FontFace;
using MS.Internal.PresentationCore;
using MS.Utility;
namespace MS.Internal.Shaping
{
///
/// ArabicCharClass - enumeration of Arabic character classifications
///
///
///
internal enum ArabicCharClass : byte
{
SyriacAleph, // syriac alaph rules
NonJoiningBase, // character does not join
JoiningBase, // character joins on both sides
RightJoining, // character joins on right (Arabic like waw and Syriac like taw)
RightJoiningRules, // character joins on right with rules (dalath, dotless dalath, rish)
Kashida, // The kashida character
UnknownChar,
LastBaseChar = UnknownChar,
ZWJ,
ZWNJ,
SyriacAbbrevMark,// Syriac Abreviation marker
ArabicMark,// Arabic above diacritics - (064B, 064C, 064E, 064F, 0652, 06E1)
// and below diacritics ( (064D, 0650)
SyriacLowerGreek,
Shadda,// Arabic shadda - (0651)
KoranicUpperMark,// Arabic Qur'anic mark above (0610, 0611, 0612, 0613, 0614, 06D6,
KoranicLowerMark,// Arabic Qur'anic mark below (06E3, 06EA, 06ED)
SuperscriptAlef,// Arabic superscript alef (0670)
Madda,// Arabic madda (0653)
Hamza,// Arabic hamza above/below (0654, 0655)
SyriacLowerLineMark,// Syriac line type below
SyriacSeyameAbove,// Syriac seyame above
SyriacSeyameBelow,// Syriac seyame below
SyriacDotAbove,// Syriac dot above
SyriacDotBelow,// Syriac dot below
SyriacUpperDuot,// Syriac two dots above
SyriacLowerDuot,// Syriac two dots below
SyriacUpperTriot,// Syriac three dots above
SyriacLowerTriot,// Syriac three dots below
NumberOfArabicCharClasses,
};
///
/// ArabicShapeClass - enumeration of Arabic shape classifications
///
///
/// The ArabicShape._arabicTextFeatures array has one member for each of
/// these shape classifications. The order of these enum values reflects
/// the fact that two of the shape classes (InitialShape and MedialShape)
/// don't have a correlated ShapeStateTable because there's no decision to
/// make when these are the current class (all characters produce the same
/// state change). These two classes are thus at the end of the list.
///
internal enum ArabicShapeClass : byte
{
SameShape,
UninitializedState = SameShape, // used for detecting initialization of state machine
NotShaped,
IsolatedShape,
InvalidBaseShape,
FinalShape,
Medial2Shape,
Final2Shape,
Final3Shape,
LastShapeWithStateTable = Final3Shape,
InitialShape,
MedialShape,
NumberOfShapesWithFeatureTag
};
///
/// The ArabicShape, class defines the Arabic Shaping Engine. The IShaper,
/// interface implemented provides the shaping methods for Arabic and Syrics Scripts.
/// The Shaping engine internally uses the Arabic Shaping Finite State machine to define
/// the ordering and generation of glyphs.
///
internal class ArabicShape : BaseShape
{
///
/// Composition Decomposition Features
///
private static readonly Feature[] _substitutionPass1Features =
{
new Feature(0,ushort.MaxValue,(uint)FeatureTags.LocalizedForms,1),
new Feature(0,ushort.MaxValue,(uint)FeatureTags.GlyphCompositionDecomposition,1),
};
///
/// Features for Required Ligatures and Connection Forms
///
private static readonly uint[] _substitutionPass3Features =
{
// these features are optional (can be suppressed by GetGlyphs caller)
(uint)FeatureTags.StandardLigatures,
(uint)FeatureTags.DiscretionaryLigatures,
(uint)FeatureTags.ContextualSwash,
(uint)FeatureTags.RequiredLigatures,
(uint)FeatureTags.ContextualAlternates,
(uint)FeatureTags.MarkPositioningviaSubstitution,
};
private static readonly Feature[] _arabicPositioningFeatures =
{
new Feature(0,ushort.MaxValue,(uint)FeatureTags.CursivePositioning,1),
new Feature(0,ushort.MaxValue,(uint)FeatureTags.MarkPositioning,1),
new Feature(0,ushort.MaxValue,(uint)FeatureTags.MarktoMarkPositioning,1),
new Feature(0,ushort.MaxValue,(uint)FeatureTags.Kerning,1),
};
static private readonly ScriptTags[] _supportedScripts = new ScriptTags[]{ScriptTags.Arabic, ScriptTags.Syriac};
///
/// Constructor for the Arabic Open Type Shaping Engine.
///
internal ArabicShape()
{
_textFlowDirection = TextFlowDirection.RTL;
}
public override ScriptTags[] SupportedScripts
{
get
{
return _supportedScripts;
}
}
///
/// ArabicShape.ApplyPositioningFeatures - Arabic implementation of the GetGlyphPlacement helper.
/// This method goes through a list of glyphs and adds placement information.
///
/// the wrapper for glyph advances, offset arrays
/// metrics for all the positioning features
/// Set of gpos features to be applied to the unicode run.
/// result of applying features
///
/// Critical - calls critical code
///
[SecurityCritical]
protected override OpenTypeLayoutResult ApplyPositioningFeatures(
ref PlacementWorkspace placementInfo,
ref LayoutMetrics layoutMetrics,
FeatureSet featureSet )
{
ShaperFontClient fontClient = placementInfo.FontClient;
OpenTypeLayoutResult layoutResult;
if(fontClient.ScriptTag == (uint)ScriptTags.Arabic)
{
layoutResult = fontClient.PositionGlyphs(
ref placementInfo,
ref layoutMetrics,
_arabicPositioningFeatures, // In: List of features to apply
_arabicPositioningFeatures.Length );
}
else
{
// syriac used standard positioning features
layoutResult = fontClient.PositionGlyphs(
ref placementInfo,
ref layoutMetrics,
_defaultPositioningFeatures, // In: List of features to apply
_defaultPositioningFeatures.Length);
}
return layoutResult;
}
///
/// ArabicShape.ApplySubstitutionFeatures - default implementation of the GetGlyphs() helper function.
///
/// shaping currentRun
/// Set of gsub features to be applied to the unicode run.
/// result of applying features
///
/// Critical - this method calls unsafe methods.
///
[SecurityCritical]
protected override OpenTypeLayoutResult ApplySubstitutionFeatures(
ref ShapingWorkspace currentRun,
FeatureSet featureSet )
{
ShaperFontClient fontClient = currentRun.FontClient;
OpenTypeLayoutResult layoutResult;
// Pass 1 - Apply the ccmp feature to the Glyphs
layoutResult = fontClient.SubstituteGlyphs(
ref currentRun,
_substitutionPass1Features,
_substitutionPass1Features.Length );
if (layoutResult == OpenTypeLayoutResult.Success &&
ShaperBuffers.TextFeatures.FeaturesCount > _substitutionPass3Features.Length)
{
// Pass 2 - Apply the Arabic Text Features from currentRun
layoutResult = fontClient.SubstituteGlyphs(
ref currentRun,
ShaperBuffers.TextFeatures.Features,
ShaperBuffers.TextFeatures.FeaturesCount );
}
if (layoutResult == OpenTypeLayoutResult.Success)
{
// Pass 3 - Apply the Required Ligatures and Connection Form Features, etc.
// the _substitutionPass3Features are all copied to the front of
// ShaperBuffers.TextFeatures. So, go through all these, enabling
// all that are appropriate.
for (ushort featureIx = (ushort)(currentRun.AreLigaturesInhibited ? 3 : 0);
featureIx < _substitutionPass3Features.Length;
++featureIx)
{
ShaperBuffers.TextFeatures.Features[featureIx].Parameter = 1; // enable this one
}
layoutResult = fontClient.SubstituteGlyphs(
ref currentRun,
ShaperBuffers.TextFeatures.Features,
_substitutionPass3Features.Length );
// (re)disable all these features (for next GetGlyphs)
for (ushort featureIx = (ushort)(currentRun.AreLigaturesInhibited ? 3 : 0);
featureIx < _substitutionPass3Features.Length;
++featureIx)
{
ShaperBuffers.TextFeatures.Features[featureIx].Parameter = 0; // disable this one
}
}
return layoutResult;
}
///
/// ArabicShape.GetCharClassifier - Base shape
///
///
/// This will normally be overridden by derived shapers. It is used in OnLoadFont
///
protected override ShaperCharacterClassifier GetCharClassifier(ScriptTags scriptTag, GlyphTypeface fontFace)
{
return new ArabicCharClassifier (scriptTag, fontFace);
}
///
/// ArabicShape.GetGlyphs - Arabic implementation of the GetGlyphs() helper function.
///
/// shaping currentRun
/// Text item
/// number of glyphs
///
/// Critical - calls critical code
///
[SecurityCritical]
protected override int GetGlyphs ( ref ShapingWorkspace currentRun, Item item )
{
uint currentDiacriticsMap = 0;
bool isInvalidChar = true; // if first char is a diacritic, we're ready!
int startingCharIx = currentRun.CurrentCharIx;
int startingGlyphIx = currentRun.NextGlyphIx;
int charsCount = 0;
bool isSamPending = false;
ushort samGlyph = 0;
RecordTraceEvent(MS.Utility.EventType.StartEvent,
"ArabicShape Init Start");
// initialize the features array
ShaperBuffers.InitializeFeatureList(0,(ushort)_substitutionPass3Features.Length);
if (ShaperBuffers.TextFeatures.FeaturesCount == 0)
{
// We include the _substitutionPass3Features in this feature
// array (note that they are all set to disabled)
for (int i = 0; i < _substitutionPass3Features.Length; ++i)
{
ShaperBuffers.TextFeatures.AddFeature(
new Feature( 0,
ushort.MaxValue,
_substitutionPass3Features[i],
0 ));
}
}
// initialize the cluster cop/state machine
ArabicShapeFSM _stateMachine = new ArabicShapeFSM( ref currentRun, ShaperBuffers.TextFeatures );
RecordTraceEvent(MS.Utility.EventType.EndEvent,
"ArabicShape Init End");
RecordTraceEvent(MS.Utility.EventType.StartEvent,
"ArabicShape ShapeArabicText Start");
// Shape and initialize the glyph list
// process the char stream, creating shape info, applying features
// as necessary
char currChar;
ushort currGlyph;
CharShapeInfo currShape;
while ( currentRun.GetNextCharProperties (out currChar, out currGlyph, out currShape) )
{
ArabicCharClass currClass = (ArabicCharClass)(currShape & CharShapeInfo.ShaperClassMask);
_stateMachine.StepToNextState(ref currentRun, currClass);
// validate this character
if (currClass > ArabicCharClass.SyriacAbbrevMark)
{
// This is a diacritic. Check for validity (only one of each
// diacritic class per base).
if (!isInvalidChar)
{
uint diacMapEntry = (uint)(1 << (int)currClass);
if ((diacMapEntry & currentDiacriticsMap) != 0)
{
// This is a duplicate mark (not allowed), or
// the current state doesn't alloso we will insert a
// dotted circle.
isInvalidChar = true;
_stateMachine.ForceInvalidBaseState();
}
else
{
currentDiacriticsMap |= diacMapEntry;
}
}
if (isInvalidChar)
{
currShape |= CharShapeInfo.RequiresInsertedBase;
}
}
else
{
if (currClass == ArabicCharClass.SyriacAbbrevMark)
{
if (!isSamPending)
{
// keep this glyph for use in the future
if (samGlyph == 0)
{
samGlyph = currGlyph;
}
}
else
{
currentRun.AddGlyphs(1);
currentRun.SetGlyphPropertiesUsingGlyph(currShape, samGlyph);
if ((currShape & CharShapeInfo.IsStartOfCluster) != 0)
{
currShape ^= CharShapeInfo.IsStartOfCluster;
}
}
isSamPending = true;
}
currentDiacriticsMap = 0;
isInvalidChar = false;
}
// If there's a SAM pending, insert a completion
// SAM if its appropriate (ie, if the current character
// is an illegal diacritic or the current character isn't
// legal as an abbreviation)
if (isSamPending &&
(isInvalidChar ||
!ArabicCharClassifier.IsCharacterValidWithSyriacAbrevMark(currentRun.CurrentChar)))
{
// handle the abbreviation mark
currentRun.AddGlyphs(1);
currentRun.SetGlyphPropertiesUsingGlyph(currShape, samGlyph);
if ((currShape & CharShapeInfo.IsStartOfCluster) != 0)
{
currShape ^= CharShapeInfo.IsStartOfCluster;
}
isSamPending = false;
}
currentRun.SetGlyphPropertiesUsingGlyph ( currShape, currGlyph );
++charsCount;
}
RecordTraceEvent(MS.Utility.EventType.EndEvent,
"ArabicShape ShapeArabicText End");
return (currentRun.GlyphsCount - startingGlyphIx); // we're done
}
}
///
/// ArabicShapeFSM - Arabic shaping state machine
///
///
///
internal struct ArabicShapeFSM
{
private ArabicShapeClass _currentShape;
private ArabicShapeClass _previousShape;
private ushort _currentStateRowIx;
private ArabicCharClass _currentCharClass;
private bool _isSyriacText;
private ShaperFeaturesList _featuresList;
///
/// ArabicShapeFSM constructor
///
public ArabicShapeFSM (ref ShapingWorkspace currentRun,
ShaperFeaturesList featuresList)
{
_previousShape = ArabicShapeClass.UninitializedState;
_currentShape = ArabicShapeClass.NotShaped;
_isSyriacText = (currentRun.FontClient.ScriptTag == (uint)ScriptTags.Syriac);
_currentCharClass =
(currentRun.CurrentCharIx == 0 && currentRun.HasLeadingJoin) ?
ArabicCharClass.ZWJ : ArabicCharClass.ZWNJ;
_currentStateRowIx = 0;
_featuresList = featuresList;
}
///
/// ArabicShapeFSM.ForceInvalidBaseState - force state machine to state
///
///
/// This is used to force the state machine to invalid base state
///
public void ForceInvalidBaseState ( )
{
_previousShape = _currentShape = ArabicShapeClass.InvalidBaseShape; // save new shape
_currentStateRowIx = (ushort)(_baseTableFirstEntryIx[ (ushort)_currentShape ] +
_rowFirstEntryIx[ (ushort)ArabicCharClass.UnknownChar ]);
}
public bool IsInvalidBaseState
{
get { return _currentShape == ArabicShapeClass.InvalidBaseShape; }
}
///
/// ArabicShapeFSM.SetStartState - start of new syllable (potentially)
///
///
/// This routine is used at the start of each cluster. If the base + this
/// first non-base char are the valid start of a new cluster, return
///
///
/// Critical - calls critical code (ForceInvalidBaseState)
///
[SecurityCritical]
private void SetStartState ( ref ShapingWorkspace currentRun,
ArabicCharClass firstCharClass )
{
Debug.Assert (_currentShape == ArabicShapeClass.NotShaped &&
_currentStateRowIx == 0, "Arabic FSM SetStartState should not be called now.");
_previousShape = _currentShape;
if (firstCharClass > ArabicCharClass.SyriacAbbrevMark)
{
// diacritics may not be first character
ForceInvalidBaseState ();
}
else
{
if ( firstCharClass <= ArabicCharClass.RightJoiningRules &&
firstCharClass != ArabicCharClass.NonJoiningBase)
{
// All joining base chars mean we start in a different state than
// NotShaped.
_currentShape =
(_currentCharClass == ArabicCharClass.ZWJ) ?
ArabicShapeClass.FinalShape : ArabicShapeClass.IsolatedShape;
_featuresList.AddFeature(currentRun.CurrentCharIx,
_arabicTextFeatures[ (int)_currentShape ]);
}
_currentCharClass = firstCharClass;
_currentStateRowIx =
(ushort)(_baseTableFirstEntryIx[ (int)_currentShape ] +
_rowFirstEntryIx[ (int) _currentCharClass ]);
}
}
///
/// ArabicShapeFSM.StepToNextState - process the latest character.
///
///
/// This routine steps the state machine to its next state
/// based on the current state, the last character that caused a
/// state change and the next char. It returns the new shape state
/// (which includes feature tag information)
///
/// shape info for this character
///
/// Critical - calls critical code, uses unsafe accessors
///
[SecurityCritical]
public void StepToNextState ( ref ShapingWorkspace currentRun, ArabicCharClass nextCharClass )
{
Debug.Assert ( (byte)nextCharClass < NumberOfCharClasses );
if ( _currentShape != ArabicShapeClass.SameShape )
{
if (_previousShape == ArabicShapeClass.UninitializedState)
{
SetStartState(ref currentRun, nextCharClass);
return;
}
// keep track of where we're coming from.
_previousShape = _currentShape; // remember last shape
}
// get the next shape entry...
if (nextCharClass > ArabicCharClass.SyriacAbbrevMark)
{
// All diacritics are checked via _diacriticShapeStateTable[]
// The current shape will end up either as InvalidBaseShape or SameShape
ushort diacTableIx = (ushort)(_rowFirstEntryIx[ (int)_previousShape - 1 ] + (ushort)_currentCharClass);
_currentShape = (ArabicShapeClass) _arabicDiacriticShapeStateTable[ diacTableIx ]; // save new shape
Debug.Assert(_currentShape == ArabicShapeClass.SameShape ||
_currentShape == ArabicShapeClass.InvalidBaseShape,"Arabic diacritic table has invalid value");
if (_previousShape == ArabicShapeClass.InvalidBaseShape ||
_currentShape == ArabicShapeClass.InvalidBaseShape)
{
ForceInvalidBaseState();
}
}
else
{
// get the shape state table entry.
byte shapeTableEntry = _arabicShapeStateTable[ _currentStateRowIx + (ushort) nextCharClass];
switch (shapeTableEntry)
{
case NoShapeChange:
// this means this latest character doesn't affect the current state AT ALL.
_currentShape = ArabicShapeClass.SameShape;
break;
case InvalidBase:
ForceInvalidBaseState();
break;
default:
// change our state (or shape)
_currentShape = (ArabicShapeClass) (shapeTableEntry & 0xf); // save new shape
Debug.Assert(_currentShape <= ArabicShapeClass.LastShapeWithStateTable,"invalid entry in Arabic state table");
_currentCharClass = nextCharClass; // keep up-to-date
// adjust the state table indices..
// _currentStartStateIx == the first entry in the active state table row.
_currentStateRowIx = (ushort)(_baseTableFirstEntryIx[ (ushort) _currentShape ] +
_rowFirstEntryIx[ (ushort) nextCharClass ]);
Debug.Assert( _currentStateRowIx < _baseTableFirstEntryIx[ (ushort) _currentShape + 1 ],"invalid entry in Arabic state table");
UpdateShapeInfo(ref currentRun, shapeTableEntry);
break;
}
}
}
///
/// ArabicShapeFSM.UpdateShapeInfo - process the latest character.
///
///
/// This routine returns the new shape state based on the newest
/// state info
///
/// shape info for this character
///
/// Critical - calls critical code, uses unsafe accessors
///
[SecurityCritical]
private void UpdateShapeInfo ( ref ShapingWorkspace currentRun, byte shapeTableEntry )
{
// Now that the state machine's previous/current state has been resolved we need to
// update the previous shape information. Check if the upper nibble of shapeTableEntry
// holds information for updating the previous shape; if not, then we only will be
// setting this char's shape.
if ((shapeTableEntry >> 4) != NoShapeChange)
{
ArabicShapeClass previousShape = _previousShape;
_previousShape = (ArabicShapeClass) (shapeTableEntry >> 4);
if (_isSyriacText && previousShape == ArabicShapeClass.IsolatedShape)
{
// for Syriac text, the previous isolated shape did not cause
// a feature to be added (we treated it as a "Not Shaped") so
// we want to add a feature, not update one
_featuresList.AddFeature( currentRun.PreviousCharIx,
_arabicTextFeatures[(int)_previousShape] );
}
else
{
// update the previous shape's feature tag. If this updated feature tag matches
// the latestCountedFeature, then its not a new feature and we don't want to set
// the "start of feature" flag. For example, in the following sequence...
//
// Event Feature Tags:
// 1st char processed, set to Isolated form Iso N.A N.A. N.A.
// 2nd char, changes 1st form, sets own form Ini Final N.A. N.A.
// 3rd char, changes 2nd form, set own Ini Mid Final N.A.
// 4th char, changes 3rd form, sets own Ini Mid Mid Final
//
// ... we expect the "start of feature" flag to be set on the 1st, 2nd, and 4th
// shapes (once we've processed all four chars)
_featuresList.UpdatePreviousShapedChar(_arabicTextFeatures[(int)_previousShape]);
}
}
// now set up the current shape...
// (for syriac "isolated shape", treat as "not shaped")
if ( !( _isSyriacText && _currentShape == ArabicShapeClass.IsolatedShape) )
{
_featuresList.AddFeature( currentRun.CurrentCharIx,
_arabicTextFeatures[(int)_currentShape] );
}
}
// these const's are for making the _baseCharInputShapeStateTable more
// readable.
private const byte NoShapeChange = (byte)ArabicShapeClass.SameShape;
private const byte ToNotShaped = (byte) ArabicShapeClass.NotShaped;
private const byte ToIsolated = (byte) ArabicShapeClass.IsolatedShape;
private const byte ToFinal = (byte) ArabicShapeClass.FinalShape;
private const byte ToFinal2 = (byte) ArabicShapeClass.Final2Shape;
private const byte ToFinal3 = (byte) ArabicShapeClass.Final3Shape;
private const byte FromIsolatedShape = ToIsolated << 4;
private const byte IsolatedToNotShaped = FromIsolatedShape | ToNotShaped;
private const byte IsolatedToIsolated = FromIsolatedShape | ToIsolated;
private const byte IsolatedToFinal2 = FromIsolatedShape | ToFinal2;
private const byte FromInitialShape = (byte) ArabicShapeClass.InitialShape << 4;
private const byte InitialToNotShaped = FromInitialShape | ToNotShaped;
private const byte InitialToFinal = FromInitialShape | ToFinal;
private const byte FromMedialShape = (byte) ArabicShapeClass.MedialShape << 4;
private const byte MedialToNotShaped = FromMedialShape | ToNotShaped;
private const byte MedialToFinal = FromMedialShape | ToFinal;
private const byte FromMedial2Shape = (byte) ArabicShapeClass.Medial2Shape << 4;
private const byte Medial2ToNotShaped = FromMedial2Shape | ToNotShaped;
private const byte Medial2ToIsolated = FromMedial2Shape | ToIsolated;
private const byte Medial2ToFinal2 = FromMedial2Shape | ToFinal2;
private const byte FromFinalShape = (byte) ArabicShapeClass.FinalShape << 4;
private const byte FinalToNotShaped = FromFinalShape | ToNotShaped;
private const byte InvalidBase = (byte) ArabicShapeClass.InvalidBaseShape;
private const byte NumberOfCharClasses = (byte) ArabicCharClass.NumberOfArabicCharClasses;
// This array (_arabicShapeStateTable) is a compressed version of the effective array,
// shapeState[ ][ ][ ].
// The current base shape is determined by the latest base character seen. A given base shape
// can be selected by one or more base char class. Each base char shape + class duple is a
// row in this array; each row has an entry (next shape state) for every char class that affects
// the shape state (there're 10 such char classes - the first 10 class defs in ArabicCharClass).
//
// So, use this array to find the next shape state, effectively...
// next shape = _arabicShapeStateTable [ + ]
//
private static readonly byte[] _arabicShapeStateTable =
{
//
// Current state is NotShaped...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 0-SyriacAleph,
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 1-NonJoiningBase,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 2-JoiningBase,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 3-RightJoining,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 4-RightJoiningRules,
ToFinal, ToNotShaped, ToFinal, ToFinal, ToFinal, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 5-Kashida,
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 6-UnknownChar,
ToFinal, ToNotShaped, ToFinal, ToFinal, ToFinal, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 7-ZWJ,
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 8-ZWNJ,
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 9-SyriacAbbrevMark,
//
// Current state is Isolated Shape...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 0-SyriacAleph,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // 1-NonJoiningBase,
InitialToFinal,ToNotShaped, InitialToFinal,InitialToFinal,InitialToFinal,InitialToNotShaped, ToNotShaped, InitialToNotShaped, ToNotShaped, ToNotShaped, // 2-JoiningBase,
ToFinal2, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 3-RightJoining,
ToFinal3, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 4-RightJoiningRules,
// Current state is Invalid Base Shape...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 0-SyriacAleph,
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 1-NonJoiningBase,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 2-JoiningBase,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 3-RightJoining,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, ToNotShaped, // 4-RightJoiningRules,
ToFinal, ToNotShaped, ToFinal, ToFinal, ToFinal, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 5-Kashida,
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 6-UnknownChar,
ToFinal, ToNotShaped, ToFinal, ToFinal, ToFinal, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 7-ZWJ,
ToIsolated, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, // 8-ZWNJ,
NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,// 9-SyriacAbbrevMark,
//
// Current state is Final Shape...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
Medial2ToFinal2,ToNotShaped,Medial2ToIsolated,Medial2ToIsolated,Medial2ToIsolated,Medial2ToNotShaped, ToNotShaped, Medial2ToNotShaped, ToNotShaped, NoShapeChange, // 0-SyriacAleph,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // 1-NonJoiningBase,
MedialToFinal,ToNotShaped, MedialToFinal,MedialToFinal,MedialToFinal,MedialToNotShaped,ToNotShaped, MedialToNotShaped, ToNotShaped, NoShapeChange, // 2-JoiningBase,
ToFinal2, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, NoShapeChange, // 3-RightJoining,
ToFinal3, ToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, NoShapeChange, // 4-RightJoiningRules,
//
// Current state is Medial2 Shape...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
ToFinal2, FinalToNotShaped, ToIsolated, ToIsolated, ToIsolated, ToFinal2, ToNotShaped, ToFinal2, FinalToNotShaped, NoShapeChange, // 0-SyriacAleph,
//
// Current state is Final2 Shape...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
IsolatedToFinal2, ToNotShaped, IsolatedToIsolated, IsolatedToIsolated, IsolatedToIsolated, IsolatedToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, NoShapeChange, // 0-SyriacAleph,
//
// Current state is Final3 Shape...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
IsolatedToFinal2, ToNotShaped, IsolatedToIsolated, IsolatedToIsolated, IsolatedToIsolated, IsolatedToNotShaped, ToNotShaped, ToNotShaped, ToNotShaped, NoShapeChange, // 0-SyriacAleph,
};
// This table is used differently than the _arabicShapeStateTable[]. This table is used
// when the latest char is a diacritic class. The index is dependent on the current base
// shape and class. For all the diacritics, there are actions possible; either the diacritic
// causes no state change or it causes the state to change to InvalidBase.
private static readonly byte[] _arabicDiacriticShapeStateTable =
{
//
// Current char is diacritic ...
// 0 1 2 3 4 5 6 7 8 9 Next/ Current
InvalidBase, NoShapeChange,InvalidBase, InvalidBase, InvalidBase, NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,InvalidBase, // NotShaped,
NoShapeChange,InvalidBase, NoShapeChange,NoShapeChange,NoShapeChange,InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // IsolatedShape,
InvalidBase, NoShapeChange,InvalidBase, InvalidBase, InvalidBase, NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange,NoShapeChange, // InvalidBaseShape,
NoShapeChange,InvalidBase, NoShapeChange,NoShapeChange,NoShapeChange,InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // FinalShape,
NoShapeChange,InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // Medial2Shape,
NoShapeChange,InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // Final2Shape,
NoShapeChange,InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // Final3Shape,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // InitialShape,
InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, InvalidBase, // MedialShape,
};
// this array has the starting offset for the current shape state. This
// offset is relative to the first entry in _baseCharShapeStateTable[]. The
// numbers are 'tally of all previous shape rows X 10 entries per row'.
private static readonly ushort[] _baseTableFirstEntryIx =
{
0, // SameShape
0, // NotShaped
10 * 10, // IsolatedShape
15 * 10, // InvalidBaseShape
25 * 10, // FinalShape
30 * 10, // Medial2Shape
31 * 10, // Final2Shape
32 * 10, // Final3Shape
33 * 10, // Initial (Invalid)
33 * 10, // Medial (Invalid)
};
// This array has the starting offset for the 'current char class'. This
// offset is relative to the first entry in the current shape state in whatever
// state lookup table is active. This table is used instead of multiplying the current
// row value by 10 every time we access the shape state table. This is
// used for both the _baseCharShapeStateTable[] and the _diacriticShapeStateTable[]
// lookups. The rows of the two arrays are based on different variables; each
// row in _baseCharShapeStateTable is selected based on the next char class
// while the row for _diacriticShapeStateTable is selected based on the next
// character's shape.
private static readonly ushort[] _rowFirstEntryIx =
{
// row entry ix _baseCharShapeStateTable _diacriticShapeStateTable
0, // SyriacAleph NotShaped
10, // NonJoiningBase IsolatedShape
20, // JoiningBase InvalidBaseShape
30, // RightJoining FinalShape
40, // RightJoiningRules Medial2Shape
50, // Kashida Final2Shape
60, // UnknownChar Final3Shape
70, // ZWNJ InitialShape
80, // ZWJ MedialShape
90, // SyriacAbbrevMark
};
private static readonly uint[] _arabicTextFeatures =
{
0, // Same (this is required by ShaperFeaturesList)
1, // NotShaped (this is required by ShaperFeaturesList)
(uint)FeatureTags.IsolatedForms, // IsolatedShape
1, // InvalidBaseShape
(uint)FeatureTags.TerminalForms, // FinalShape
(uint)FeatureTags.MedialForms2, // Medial2Shape
(uint)FeatureTags.TerminalForms2, // Final2Shape
(uint)FeatureTags.TerminalForms3, // Final3Shape
(uint)FeatureTags.InitialForms, // InitialShape
(uint)FeatureTags.MedialForms, // MedialShape
};
}
///
/// ArabicCharClassifier -
///
internal class ArabicCharClassifier : ShaperCharacterClassifier
{
public ArabicCharClassifier(ScriptTags scriptTag, GlyphTypeface fontFace) : base(scriptTag, fontFace)
{
_unknownClass = NonJoiningBase;
_spaceClass = NonJoiningBase;
_zwControlClass = NonJoiningBase;
_zwjClass = JoiningBase;
_zwnjClass = NonJoiningBase;
_shyClass = (byte)ArabicCharClass.UnknownChar;
_firstChar = '\u0600'; // this is the first Arabic Unicode char
_lastChar = '\u077F'; // this is the last Syriac Unicode char
_xorMask = 0x600;
_xorRange = 0x180;
_charClassTable = _arabicSyriacCharClasses;
}
const char SyriacExtraDiacriticsUnicodeRangeStart = '\u0300';
// const ushort SyriacExtraDiacriticsUnicodeRangeEnd = '\u033F';
const char SyriacExtraUnicodeXorMask = '\u03c0';
const char SyriacExtraUnicodeXorRange = '\u0040';
///
/// ArabicCharClassifier.ToShapeInfo - returns CharShapeInfo for the Unicode character
///
public override CharShapeInfo ToShapeInfo (char unicodeChar )
{
CharShapeInfo charShape = base.ToShapeInfo(unicodeChar);
ArabicCharClass charClass = (ArabicCharClass)(charShape & CharShapeInfo.ShaperClassMask);
if (charClass <= ArabicCharClass.LastBaseChar)
{
if (charClass == ArabicCharClass.UnknownChar &&
(unicodeChar ^ SyriacExtraUnicodeXorMask) < SyriacExtraUnicodeXorRange)
{
charShape = (CharShapeInfo)(_syriacExtraClasses[unicodeChar - SyriacExtraDiacriticsUnicodeRangeStart]);
charClass = (ArabicCharClass)(charShape & CharShapeInfo.ShaperClassMask);
}
}
return charShape;
}
///
/// Check if the character in context has a Syriac Abbreviation character.
///
///
public static bool IsCharacterValidWithSyriacAbrevMark(char currChar )
{
return ( (currChar >= '\u070f' && currChar <= '\u074F') ||
( (currChar ^ SyriacExtraUnicodeXorMask) < SyriacExtraUnicodeXorRange &&
_syriacExtraClasses[currChar - SyriacExtraDiacriticsUnicodeRangeStart] != NonJoiningBase));
}
#region Classification Tables
// these consts are so the tables below will be more readable
private const ArabicCharClass StartOfCluster = (ArabicCharClass)CharShapeInfo.IsStartOfCluster;
private const byte NonJoiningBase = (byte)(ArabicCharClass.NonJoiningBase | StartOfCluster);
private const byte JoiningBase = (byte)(ArabicCharClass.JoiningBase | StartOfCluster);
private const byte SyriacAleph = (byte)(ArabicCharClass.SyriacAleph | StartOfCluster);
private const byte RightJoining = (byte)(ArabicCharClass.RightJoining | StartOfCluster);
private const byte RightJoiningRules = (byte)(ArabicCharClass.RightJoiningRules | StartOfCluster);
private const byte Kashida = (byte)(ArabicCharClass.Kashida | StartOfCluster);
private const byte ArabicMark = (byte)ArabicCharClass.ArabicMark;
private const byte Shadda = (byte)ArabicCharClass.Shadda;
private const byte KoranicUpperMark = (byte)ArabicCharClass.KoranicUpperMark;
private const byte KoranicLowerMark = (byte)ArabicCharClass.KoranicLowerMark;
private const byte SuperscriptAlef = (byte)ArabicCharClass.SuperscriptAlef;
private const byte Madda = (byte)ArabicCharClass.Madda;
private const byte Hamza = (byte)ArabicCharClass.Hamza;
private const byte SyriacLowerLineMark = (byte)ArabicCharClass.SyriacLowerLineMark;
private const byte SyriacSeyameAbove = (byte)ArabicCharClass.SyriacSeyameAbove;
private const byte SyriacSeyameBelow = (byte) ArabicCharClass.SyriacSeyameBelow;
private const byte SyriacDotAbove = (byte) ArabicCharClass.SyriacDotAbove;
private const byte SyriacDotBelow = (byte) ArabicCharClass.SyriacDotBelow;
private const byte SyriacUpperDuot = (byte)ArabicCharClass.SyriacUpperDuot;
private const byte SyriacLowerDuot = (byte)ArabicCharClass.SyriacLowerDuot;
private const byte SyriacUpperTriot = (byte)ArabicCharClass.SyriacUpperTriot;
private const byte SyriacLowerTriot = (byte)ArabicCharClass.SyriacLowerTriot;
private const byte SyriacAbbrevMark = (byte)ArabicCharClass.SyriacAbbrevMark;
// some Syriac aliases
private const byte SyriacUpperGreek = ArabicMark;
private const byte SyriacLowerGreek = (byte)ArabicCharClass.SyriacLowerGreek;
private const byte SyriacOtherMark = Shadda;
private const byte SyriacUpperDotted = KoranicUpperMark;
private const byte SyriacLowerDotted = KoranicLowerMark;
private const byte SyriacQushshaya = SuperscriptAlef;
private const byte SyriacRukkakha = Madda;
private const byte SyriacUpperLineMark = Hamza;
static byte[] _arabicSyriacCharClasses =
{
// Arabic
//060 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
//061 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
KoranicUpperMark, KoranicUpperMark, KoranicUpperMark, KoranicUpperMark,
KoranicUpperMark, KoranicUpperMark, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
//062 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, RightJoining, RightJoining,
RightJoining, RightJoining, JoiningBase, RightJoining,
JoiningBase, RightJoining, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, RightJoining,
//063 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
RightJoining, RightJoining, RightJoining, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
//064 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
Kashida, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
RightJoining, JoiningBase, JoiningBase, ArabicMark,
ArabicMark, ArabicMark, ArabicMark, ArabicMark,
//065 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
ArabicMark, Shadda, ArabicMark, Madda,
Hamza, Hamza, ArabicMark, ArabicMark,
ArabicMark, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
//066 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, JoiningBase, JoiningBase,
//067 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
SuperscriptAlef, RightJoining, RightJoining, RightJoining,
NonJoiningBase, RightJoining, RightJoining, RightJoining,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
//068 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
RightJoining, RightJoining, RightJoining, RightJoining,
RightJoining, RightJoining, RightJoining, RightJoining,
//069 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
RightJoining, RightJoining, RightJoining, RightJoining,
RightJoining, RightJoining, RightJoining, RightJoining,
RightJoining, RightJoining, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
//06A 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
//06B 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
//06C 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
RightJoining, JoiningBase, RightJoining, RightJoining,
RightJoining, RightJoining, RightJoining, RightJoining,
RightJoining, RightJoining, RightJoining, RightJoining,
JoiningBase, RightJoining, JoiningBase, RightJoining,
//06D 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
JoiningBase, JoiningBase, RightJoining, RightJoining,
NonJoiningBase, RightJoining, KoranicUpperMark, KoranicUpperMark,
KoranicUpperMark, KoranicUpperMark, KoranicUpperMark, KoranicUpperMark,
KoranicUpperMark, NonJoiningBase, NonJoiningBase, KoranicUpperMark,
//06E 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
KoranicUpperMark, ArabicMark, KoranicUpperMark, KoranicLowerMark,
KoranicUpperMark, NonJoiningBase, NonJoiningBase, KoranicUpperMark,
KoranicUpperMark, NonJoiningBase, KoranicLowerMark, KoranicUpperMark,
KoranicUpperMark, KoranicLowerMark, RightJoining, RightJoining,
//06F 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, JoiningBase, JoiningBase,
JoiningBase, NonJoiningBase, NonJoiningBase, JoiningBase,
// Syriac
//070 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, SyriacAbbrevMark,
//071 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
SyriacAleph, SyriacOtherMark, JoiningBase, JoiningBase,
JoiningBase, RightJoiningRules, RightJoiningRules,RightJoining,
RightJoining, RightJoining, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, RightJoining, JoiningBase,
//072 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
RightJoining, JoiningBase, RightJoiningRules, JoiningBase,
RightJoining, NonJoiningBase, NonJoiningBase, RightJoiningRules,
//073 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
SyriacUpperGreek, SyriacLowerGreek, SyriacUpperDotted, SyriacUpperGreek,
SyriacLowerGreek, SyriacUpperDotted, SyriacUpperGreek, SyriacLowerGreek,
SyriacLowerDotted, SyriacLowerDotted, SyriacUpperGreek, SyriacLowerGreek,
SyriacLowerDotted, SyriacUpperGreek, SyriacLowerGreek, SyriacUpperDotted,
//074 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
SyriacOtherMark, SyriacQushshaya, SyriacRukkakha, SyriacUpperDuot,
SyriacLowerDuot, SyriacUpperTriot, SyriacLowerTriot, SyriacUpperLineMark,
SyriacLowerLineMark,SyriacOtherMark, SyriacOtherMark, NonJoiningBase,
NonJoiningBase, RightJoining, JoiningBase, JoiningBase,
// Arabic supplemental characters
//075 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, RightJoining, RightJoining, RightJoining,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
//076 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, JoiningBase,
JoiningBase, JoiningBase, JoiningBase, RightJoining,
RightJoining, JoiningBase, NonJoiningBase, NonJoiningBase,
//077 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
};
static byte[] _syriacExtraClasses =
{
// Extra Syriac Diacritics
//030 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, Hamza,
Hamza, NonJoiningBase, NonJoiningBase, SyriacDotAbove,
SyriacSeyameAbove, NonJoiningBase, SyriacQushshaya, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
//031 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
//032 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
NonJoiningBase, NonJoiningBase, NonJoiningBase, SyriacDotBelow,
SyriacSeyameBelow, Madda, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, SyriacLowerLineMark,SyriacLowerLineMark, NonJoiningBase,
//033 0, 4, 8, C 1, 5, 9, D 2, 6, A, E 3, 7, B, F
SyriacLowerLineMark,SyriacLowerLineMark,NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
NonJoiningBase, NonJoiningBase, NonJoiningBase, NonJoiningBase,
};
#endregion // end of Classification Tables
}
}
// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// Copyright (c) Microsoft Corporation. All rights reserved.
Link Menu

This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- WebBrowserContainer.cs
- FixedStringLookup.cs
- ListView.cs
- LocalizationComments.cs
- ModuleBuilder.cs
- BamlResourceSerializer.cs
- CodeExpressionCollection.cs
- BigInt.cs
- ConnectionPoint.cs
- CompilerScopeManager.cs
- HttpListenerException.cs
- Rotation3DKeyFrameCollection.cs
- PrintPageEvent.cs
- Crc32.cs
- PublisherMembershipCondition.cs
- CngKey.cs
- PenContext.cs
- QueryCorrelationInitializer.cs
- LayoutTableCell.cs
- FontWeightConverter.cs
- ProcessProtocolHandler.cs
- WebErrorHandler.cs
- HashJoinQueryOperatorEnumerator.cs
- AlgoModule.cs
- ExtensionQuery.cs
- IntegerFacetDescriptionElement.cs
- ElementNotEnabledException.cs
- AdjustableArrowCap.cs
- BamlBinaryWriter.cs
- CommandValueSerializer.cs
- MarshalByRefObject.cs
- PaintEvent.cs
- BinaryFormatterWriter.cs
- PassportAuthenticationEventArgs.cs
- SchemaLookupTable.cs
- Zone.cs
- CodeIterationStatement.cs
- SaveWorkflowAsyncResult.cs
- TagMapCollection.cs
- SpecularMaterial.cs
- CodeArrayIndexerExpression.cs
- SimpleWebHandlerParser.cs
- Typeface.cs
- MenuEventArgs.cs
- MarkerProperties.cs
- EncryptedKeyHashIdentifierClause.cs
- RegexFCD.cs
- TextSimpleMarkerProperties.cs
- WindowsFormsSectionHandler.cs
- SQLConvert.cs
- MarshalByValueComponent.cs
- EntityDataReader.cs
- DataGridColumnReorderingEventArgs.cs
- TaskExceptionHolder.cs
- CharEnumerator.cs
- WebBrowserContainer.cs
- FormViewUpdateEventArgs.cs
- RandomNumberGenerator.cs
- XPathNodeIterator.cs
- HttpCookiesSection.cs
- BufferAllocator.cs
- ValidationPropertyAttribute.cs
- OrderedDictionaryStateHelper.cs
- SmtpReplyReaderFactory.cs
- BuilderInfo.cs
- TextEvent.cs
- AutomationIdentifier.cs
- EventLogSession.cs
- DataGridCommandEventArgs.cs
- AnnotationHelper.cs
- SynchronizedInputPattern.cs
- ListInitExpression.cs
- GestureRecognitionResult.cs
- DetailsViewUpdatedEventArgs.cs
- StringDictionaryWithComparer.cs
- RadioButtonList.cs
- MimeWriter.cs
- DataSourceSelectArguments.cs
- KerberosRequestorSecurityToken.cs
- wmiprovider.cs
- cryptoapiTransform.cs
- ActivityContext.cs
- TypeConverterValueSerializer.cs
- DataGridViewRowErrorTextNeededEventArgs.cs
- MessageBox.cs
- SrgsElementFactory.cs
- GZipStream.cs
- ListControl.cs
- AppDomain.cs
- TextBoxAutomationPeer.cs
- ValidationManager.cs
- ComponentEditorPage.cs
- CodeAttributeDeclarationCollection.cs
- WorkflowValidationFailedException.cs
- RoutedPropertyChangedEventArgs.cs
- DesignerVerbCollection.cs
- SpellerHighlightLayer.cs
- CodeMethodReturnStatement.cs
- TextServicesLoader.cs
- GrammarBuilder.cs