Code:
/ Dotnetfx_Win7_3.5.1 / Dotnetfx_Win7_3.5.1 / 3.5.1 / DEVDIV / depot / DevDiv / releases / whidbey / NetFXspW7 / ndp / clr / src / BCL / System / Globalization / CharUnicodeInfo.cs / 1 / CharUnicodeInfo.cs
// ==++== // // Copyright (c) Microsoft Corporation. All rights reserved. // // ==--== //////////////////////////////////////////////////////////////////////////// // // Class: CharacterInfo // // Purpose: This class implements a set of methods for retrieving // character type information. Character type information is // independent of culture and region. // // Date: August 12, 1998 // //////////////////////////////////////////////////////////////////////////// namespace System.Globalization { //This class has only static members and therefore doesn't need to be serialized. using System; using System.Threading; using System.Runtime.InteropServices; using System.Runtime.CompilerServices; using System.Reflection; // Only statics, does not need to be marked with the serializable attribute public sealed class CharUnicodeInfo : System.Object { //--------------------------------------------------------------------// // Internal Information // //-------------------------------------------------------------------// // // Native methods to access the Unicode category data tables in charinfo.nlp. // internal const char HIGH_SURROGATE_START = '\ud800'; internal const char HIGH_SURROGATE_END = '\udbff'; internal const char LOW_SURROGATE_START = '\udc00'; internal const char LOW_SURROGATE_END = '\udfff'; internal const int UNICODE_CATEGORY_OFFSET = 0; internal const int BIDI_CATEGORY_OFFSET = 1; // The base pointer of the data table unsafe static byte* m_pDataTable; // The native pointer to the 12:4:4 index table of the Unicode cateogry data. unsafe static ushort* m_pCategoryLevel1Index; unsafe static byte* m_pCategoriesValue; // The native pointer to the 12:4:4 index table of the Unicode numeric data. // The value of this index table is an index into the real value table stored in m_pNumericValues. unsafe static ushort* m_pNumericLevel1Index; // The numeric value table, which is indexed by m_pNumericLevel1Index. // Every item contains the value for numeric value. // unsafe static double* m_pNumericValues; // To get around the IA64 alignment issue. Our double data is aligned in 8-byte boundary, but loader loads the embeded table starting // at 4-byte boundary. This cause a alignment issue since double is 8-byte. unsafe static byte* m_pNumericValues; // The digit value table, which is indexed by m_pNumericLevel1Index. It shares the same indice as m_pNumericValues. // Every item contains the value for decimal digit/digit value. unsafe static DigitValues* m_pDigitValues; internal const String UNICODE_INFO_FILE_NAME = "charinfo.nlp"; // The starting codepoint for Unicode plane 1. Plane 1 contains 0x010000 ~ 0x01ffff. internal const int UNICODE_PLANE01_START = 0x10000; // // This is the header for the native data table that we load from UNICODE_INFO_FILE_NAME. // // Excplicit layout is used here since a syntax like char[16] can not be used in sequential layout. [StructLayout(LayoutKind.Explicit)] internal unsafe struct UnicodeDataHeader { [FieldOffset(0)] internal char TableName; // WCHAR[16] [FieldOffset(0x20)] internal ushort version; // WORD[4] [FieldOffset(0x28)] internal uint OffsetToCategoriesIndex; // DWORD [FieldOffset(0x2c)] internal uint OffsetToCategoriesValue; // DWORD [FieldOffset(0x30)] internal uint OffsetToNumbericIndex; // DWORD [FieldOffset(0x34)] internal uint OffsetToDigitValue; // DWORD [FieldOffset(0x38)] internal uint OffsetToNumbericValue; // DWORD } // NOTE: It's important to specify pack size here, since the size of the structure is 2 bytes. Otherwise, // the default pack size will be 4. [StructLayout(LayoutKind.Sequential, Pack=2)] internal struct DigitValues { internal sbyte decimalDigit; internal sbyte digit; } //We need to allocate the underlying table that provides us with the information that we //use. We allocate this once in the class initializer and then we don't need to worry //about it again. // unsafe static CharUnicodeInfo() { m_pDataTable = GlobalizationAssembly.GetGlobalizationResourceBytePtr(typeof(CharUnicodeInfo).Assembly, UNICODE_INFO_FILE_NAME); UnicodeDataHeader* mainHeader = (UnicodeDataHeader*)m_pDataTable; // Set up the native pointer to different part of the tables. m_pCategoryLevel1Index = (ushort*) (m_pDataTable + mainHeader->OffsetToCategoriesIndex); m_pCategoriesValue = (byte*) (m_pDataTable + mainHeader->OffsetToCategoriesValue); m_pNumericLevel1Index = (ushort*) (m_pDataTable + mainHeader->OffsetToNumbericIndex); m_pNumericValues = (byte*) (m_pDataTable + mainHeader->OffsetToNumbericValue); m_pDigitValues = (DigitValues*) (m_pDataTable + mainHeader->OffsetToDigitValue); // Go to native side to make sure the native CharacterInfoTable pointer in the native side is initialized. nativeInitTable(m_pDataTable); } //////////////////////////////////////////////////////////////////////// // // Define a private ctor so that compiler won't generate a default public ctor for us. // //////////////////////////////////////////////////////////////////////// private CharUnicodeInfo() { } //////////////////////////////////////////////////////////////////////// // // Actions: // Convert the BMP character or surrogate pointed by index to a UTF32 value. // This is similar to Char.ConvertToUTF32, but the difference is that // it does not throw exceptions when invalid surrogate characters are passed in. // // WARNING: since it doesn't throw an exception it CAN return a value // in the surrogate range D800-DFFF, which are not legal unicode values. // //////////////////////////////////////////////////////////////////////// internal static int InternalConvertToUtf32(String s, int index) { BCLDebug.Assert(s != null, "s != null"); BCLDebug.Assert(index >= 0 && index < s.Length, "index < s.Length"); if (index < s.Length - 1) { int temp1 = (int)s[index] - HIGH_SURROGATE_START; if (temp1 >= 0 && temp1 <= 0x3ff) { int temp2 = (int)s[index+1] - LOW_SURROGATE_START; if (temp2 >= 0 && temp2 <= 0x3ff) { // Convert the surrogate to UTF32 and get the result. return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); } } } return ((int)s[index]); } //////////////////////////////////////////////////////////////////////// // // Convert a character or a surrogate pair starting at index of string s // to UTF32 value. // // Parameters: // s The string // index The starting index. It can point to a BMP character or // a surrogate pair. // len The length of the string. // charLength [out] If the index points to a BMP char, charLength // will be 1. If the index points to a surrogate pair, // charLength will be 2. // // WARNING: since it doesn't throw an exception it CAN return a value // in the surrogate range D800-DFFF, which are not legal unicode values. // // Returns: // The UTF32 value // //////////////////////////////////////////////////////////////////////// internal static int InternalConvertToUtf32(String s, int index, out int charLength) { BCLDebug.Assert(s != null, "s != null"); BCLDebug.Assert(s.Length > 0, "s.Length > 0"); BCLDebug.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length"); charLength = 1; if (index < s.Length - 1) { int temp1 = (int)s[index] - HIGH_SURROGATE_START; if (temp1 >= 0 && temp1 <= 0x3ff) { int temp2 = (int)s[index+1] - LOW_SURROGATE_START; if (temp2 >= 0 && temp2 <= 0x3ff) { // Convert the surrogate to UTF32 and get the result. charLength++; return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); } } } return ((int)s[index]); } //////////////////////////////////////////////////////////////////////// // // IsWhiteSpace // // Determines if the given character is a white space character. // //////////////////////////////////////////////////////////////////////// internal static bool IsWhiteSpace(String s, int index) { BCLDebug.Assert(s != null, "s!=null"); BCLDebug.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length"); UnicodeCategory uc = GetUnicodeCategory(s, index); // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator". // And U+2029 is th eonly character which is under the category "ParagraphSeparator". switch (uc) { case (UnicodeCategory.SpaceSeparator): case (UnicodeCategory.LineSeparator): case (UnicodeCategory.ParagraphSeparator): return (true); } return (false); } internal static bool IsWhiteSpace(char c) { UnicodeCategory uc = GetUnicodeCategory(c); // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator". // And U+2029 is th eonly character which is under the category "ParagraphSeparator". switch (uc) { case (UnicodeCategory.SpaceSeparator): case (UnicodeCategory.LineSeparator): case (UnicodeCategory.ParagraphSeparator): return (true); } return (false); } // // This is called by the public char and string, index versions // // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character // internal unsafe static double InternalGetNumericValue(int ch) { BCLDebug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = m_pNumericLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // The offset is referred to an float item in m_pNumericFloatData. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = m_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(m_pNumericLevel1Index[index]); // Get the result from the 0 -3 bit of ch. #if WIN64 // To get around the IA64 alignment issue. Our double data is aligned in 8-byte boundary, but loader loads the embeded table starting // at 4-byte boundary. This cause a alignment issue since double is 8-byte. byte* pSourcePtr = &(m_pNumericValues[pBytePtr[(ch & 0x000f)] * sizeof(double)]); if (((long)pSourcePtr % 8) != 0) { // We are not aligned in 8-byte boundary. Do a copy. double ret; byte* retPtr = (byte*)&ret; Buffer.memcpyimpl(pSourcePtr, retPtr, sizeof(double)); return (ret); } return (((double*)m_pNumericValues)[pBytePtr[(ch & 0x000f)]]); #else return (((double*)m_pNumericValues)[pBytePtr[(ch & 0x000f)]]); #endif } // // This is called by the public char and string, index versions // // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character // internal unsafe static DigitValues* InternalGetDigitValues(int ch) { BCLDebug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = m_pNumericLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // The offset is referred to an float item in m_pNumericFloatData. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = m_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(m_pNumericLevel1Index[index]); // Get the result from the 0 -3 bit of ch. return &(m_pDigitValues[pBytePtr[(ch & 0x000f)]]); } internal unsafe static sbyte InternalGetDecimalDigitValue(int ch) { return (InternalGetDigitValues(ch)->decimalDigit); } internal unsafe static sbyte InternalGetDigitValue(int ch) { return (InternalGetDigitValues(ch)->digit); } //////////////////////////////////////////////////////////////////////// // //Returns the numeric value associated with the character c. If the character is a fraction, // the return value will not be an integer. If the character does not have a numeric value, the return value is -1. // //Returns: // the numeric value for the specified Unicode character. If the character does not have a numeric value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static double GetNumericValue(char ch) { return (InternalGetNumericValue(ch)); } public static double GetNumericValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } return (InternalGetNumericValue(InternalConvertToUtf32(s, index))); } //////////////////////////////////////////////////////////////////////// // //Returns the decimal digit value associated with the character c. // // The value should be from 0 ~ 9. // If the character does not have a numeric value, the return value is -1. // From Unicode.org: Decimal Digits. Digits that can be used to form decimal-radix numbers. //Returns: // the decimal digit value for the specified Unicode character. If the character does not have a decimal digit value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static int GetDecimalDigitValue(char ch) { return (InternalGetDecimalDigitValue(ch)); } public static int GetDecimalDigitValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } return (InternalGetDecimalDigitValue(InternalConvertToUtf32(s, index))); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the digit value associated with the character c. // If the character does not have a numeric value, the return value is -1. // From Unicode.org: If the character represents a digit, not necessarily a decimal digit, // the value is here. This covers digits which do not form decimal radix forms, such as the compatibility superscript digits. // // An example is: U+2460 IRCLED DIGIT ONE. This character has digit value 1, but does not have associcated decimal digit value. // //Returns: // the digit value for the specified Unicode character. If the character does not have a digit value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static int GetDigitValue(char ch) { return (InternalGetDigitValue(ch)); } public static int GetDigitValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } return (InternalGetDigitValue(InternalConvertToUtf32(s, index))); } public static UnicodeCategory GetUnicodeCategory(char ch) { return (InternalGetUnicodeCategory(ch)) ; } public static UnicodeCategory GetUnicodeCategory(String s, int index) { if (s==null) throw new ArgumentNullException("s"); if (((uint)index)>=((uint)s.Length)) { throw new ArgumentOutOfRangeException("index"); } return InternalGetUnicodeCategory(s, index); } internal unsafe static UnicodeCategory InternalGetUnicodeCategory(int ch) { return ((UnicodeCategory)InternalGetCategoryValue(ch, UNICODE_CATEGORY_OFFSET)); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the Unicode Category property for the character c. //Returns: // an value in UnicodeCategory enum //Arguments: // ch a Unicode character //Exceptions: // None // //Note that this API will return values for D800-DF00 surrogate halves. // //////////////////////////////////////////////////////////////////////// internal unsafe static byte InternalGetCategoryValue(int ch, int offset) { BCLDebug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = m_pCategoryLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = m_pCategoryLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(m_pCategoryLevel1Index[index]); // Get the result from the 0 -3 bit of ch. byte valueIndex = pBytePtr[(ch & 0x000f)]; byte uc = m_pCategoriesValue[valueIndex * 2 + offset]; // // Make sure that OtherNotAssigned is the last category in UnicodeCategory. // If that changes, change the following assertion as well. // //BCLDebug.Assert(uc >= 0 && uc <= UnicodeCategory.OtherNotAssigned, "Table returns incorrect Unicode category"); return (uc); } // internal static BidiCategory GetBidiCategory(char ch) { // return ((BidiCategory)InternalGetCategoryValue(c, BIDI_CATEGORY_OFFSET)); // } internal static BidiCategory GetBidiCategory(String s, int index) { if (s==null) throw new ArgumentNullException("s"); if (((uint)index)>=((uint)s.Length)) { throw new ArgumentOutOfRangeException("index"); } return ((BidiCategory)InternalGetCategoryValue(InternalConvertToUtf32(s, index), BIDI_CATEGORY_OFFSET)); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the Unicode Category property for the character c. //Returns: // an value in UnicodeCategory enum //Arguments: // value a Unicode String // index Index for the specified string. //Exceptions: // None // //////////////////////////////////////////////////////////////////////// internal static UnicodeCategory InternalGetUnicodeCategory(String value, int index) { BCLDebug.Assert(value != null, "value can not be null"); BCLDebug.Assert(index < value.Length, "index < value.Length"); return (InternalGetUnicodeCategory(InternalConvertToUtf32(value, index))); } //////////////////////////////////////////////////////////////////////// // // Get the Unicode category of the character starting at index. If the character is in BMP, charLength will return 1. // If the character is a valid surrogate pair, charLength will return 2. // //////////////////////////////////////////////////////////////////////// internal static UnicodeCategory InternalGetUnicodeCategory(String str, int index, out int charLength) { BCLDebug.Assert(str != null, "str can not be null"); BCLDebug.Assert(str.Length > 0, "str.Length > 0");; BCLDebug.Assert(index >= 0 && index < str.Length, "index >= 0 && index < str.Length"); return (InternalGetUnicodeCategory(InternalConvertToUtf32(str, index, out charLength))); } internal static bool IsCombiningCategory(UnicodeCategory uc) { BCLDebug.Assert(uc >= 0, "uc >= 0"); return ( uc == UnicodeCategory.NonSpacingMark || uc == UnicodeCategory.SpacingCombiningMark || uc == UnicodeCategory.EnclosingMark ); } [MethodImplAttribute(MethodImplOptions.InternalCall)] private unsafe static extern void nativeInitTable(byte* bytePtr); } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007. // ==++== // // Copyright (c) Microsoft Corporation. All rights reserved. // // ==--== //////////////////////////////////////////////////////////////////////////// // // Class: CharacterInfo // // Purpose: This class implements a set of methods for retrieving // character type information. Character type information is // independent of culture and region. // // Date: August 12, 1998 // //////////////////////////////////////////////////////////////////////////// namespace System.Globalization { //This class has only static members and therefore doesn't need to be serialized. using System; using System.Threading; using System.Runtime.InteropServices; using System.Runtime.CompilerServices; using System.Reflection; // Only statics, does not need to be marked with the serializable attribute public sealed class CharUnicodeInfo : System.Object { //--------------------------------------------------------------------// // Internal Information // //-------------------------------------------------------------------// // // Native methods to access the Unicode category data tables in charinfo.nlp. // internal const char HIGH_SURROGATE_START = '\ud800'; internal const char HIGH_SURROGATE_END = '\udbff'; internal const char LOW_SURROGATE_START = '\udc00'; internal const char LOW_SURROGATE_END = '\udfff'; internal const int UNICODE_CATEGORY_OFFSET = 0; internal const int BIDI_CATEGORY_OFFSET = 1; // The base pointer of the data table unsafe static byte* m_pDataTable; // The native pointer to the 12:4:4 index table of the Unicode cateogry data. unsafe static ushort* m_pCategoryLevel1Index; unsafe static byte* m_pCategoriesValue; // The native pointer to the 12:4:4 index table of the Unicode numeric data. // The value of this index table is an index into the real value table stored in m_pNumericValues. unsafe static ushort* m_pNumericLevel1Index; // The numeric value table, which is indexed by m_pNumericLevel1Index. // Every item contains the value for numeric value. // unsafe static double* m_pNumericValues; // To get around the IA64 alignment issue. Our double data is aligned in 8-byte boundary, but loader loads the embeded table starting // at 4-byte boundary. This cause a alignment issue since double is 8-byte. unsafe static byte* m_pNumericValues; // The digit value table, which is indexed by m_pNumericLevel1Index. It shares the same indice as m_pNumericValues. // Every item contains the value for decimal digit/digit value. unsafe static DigitValues* m_pDigitValues; internal const String UNICODE_INFO_FILE_NAME = "charinfo.nlp"; // The starting codepoint for Unicode plane 1. Plane 1 contains 0x010000 ~ 0x01ffff. internal const int UNICODE_PLANE01_START = 0x10000; // // This is the header for the native data table that we load from UNICODE_INFO_FILE_NAME. // // Excplicit layout is used here since a syntax like char[16] can not be used in sequential layout. [StructLayout(LayoutKind.Explicit)] internal unsafe struct UnicodeDataHeader { [FieldOffset(0)] internal char TableName; // WCHAR[16] [FieldOffset(0x20)] internal ushort version; // WORD[4] [FieldOffset(0x28)] internal uint OffsetToCategoriesIndex; // DWORD [FieldOffset(0x2c)] internal uint OffsetToCategoriesValue; // DWORD [FieldOffset(0x30)] internal uint OffsetToNumbericIndex; // DWORD [FieldOffset(0x34)] internal uint OffsetToDigitValue; // DWORD [FieldOffset(0x38)] internal uint OffsetToNumbericValue; // DWORD } // NOTE: It's important to specify pack size here, since the size of the structure is 2 bytes. Otherwise, // the default pack size will be 4. [StructLayout(LayoutKind.Sequential, Pack=2)] internal struct DigitValues { internal sbyte decimalDigit; internal sbyte digit; } //We need to allocate the underlying table that provides us with the information that we //use. We allocate this once in the class initializer and then we don't need to worry //about it again. // unsafe static CharUnicodeInfo() { m_pDataTable = GlobalizationAssembly.GetGlobalizationResourceBytePtr(typeof(CharUnicodeInfo).Assembly, UNICODE_INFO_FILE_NAME); UnicodeDataHeader* mainHeader = (UnicodeDataHeader*)m_pDataTable; // Set up the native pointer to different part of the tables. m_pCategoryLevel1Index = (ushort*) (m_pDataTable + mainHeader->OffsetToCategoriesIndex); m_pCategoriesValue = (byte*) (m_pDataTable + mainHeader->OffsetToCategoriesValue); m_pNumericLevel1Index = (ushort*) (m_pDataTable + mainHeader->OffsetToNumbericIndex); m_pNumericValues = (byte*) (m_pDataTable + mainHeader->OffsetToNumbericValue); m_pDigitValues = (DigitValues*) (m_pDataTable + mainHeader->OffsetToDigitValue); // Go to native side to make sure the native CharacterInfoTable pointer in the native side is initialized. nativeInitTable(m_pDataTable); } //////////////////////////////////////////////////////////////////////// // // Define a private ctor so that compiler won't generate a default public ctor for us. // //////////////////////////////////////////////////////////////////////// private CharUnicodeInfo() { } //////////////////////////////////////////////////////////////////////// // // Actions: // Convert the BMP character or surrogate pointed by index to a UTF32 value. // This is similar to Char.ConvertToUTF32, but the difference is that // it does not throw exceptions when invalid surrogate characters are passed in. // // WARNING: since it doesn't throw an exception it CAN return a value // in the surrogate range D800-DFFF, which are not legal unicode values. // //////////////////////////////////////////////////////////////////////// internal static int InternalConvertToUtf32(String s, int index) { BCLDebug.Assert(s != null, "s != null"); BCLDebug.Assert(index >= 0 && index < s.Length, "index < s.Length"); if (index < s.Length - 1) { int temp1 = (int)s[index] - HIGH_SURROGATE_START; if (temp1 >= 0 && temp1 <= 0x3ff) { int temp2 = (int)s[index+1] - LOW_SURROGATE_START; if (temp2 >= 0 && temp2 <= 0x3ff) { // Convert the surrogate to UTF32 and get the result. return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); } } } return ((int)s[index]); } //////////////////////////////////////////////////////////////////////// // // Convert a character or a surrogate pair starting at index of string s // to UTF32 value. // // Parameters: // s The string // index The starting index. It can point to a BMP character or // a surrogate pair. // len The length of the string. // charLength [out] If the index points to a BMP char, charLength // will be 1. If the index points to a surrogate pair, // charLength will be 2. // // WARNING: since it doesn't throw an exception it CAN return a value // in the surrogate range D800-DFFF, which are not legal unicode values. // // Returns: // The UTF32 value // //////////////////////////////////////////////////////////////////////// internal static int InternalConvertToUtf32(String s, int index, out int charLength) { BCLDebug.Assert(s != null, "s != null"); BCLDebug.Assert(s.Length > 0, "s.Length > 0"); BCLDebug.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length"); charLength = 1; if (index < s.Length - 1) { int temp1 = (int)s[index] - HIGH_SURROGATE_START; if (temp1 >= 0 && temp1 <= 0x3ff) { int temp2 = (int)s[index+1] - LOW_SURROGATE_START; if (temp2 >= 0 && temp2 <= 0x3ff) { // Convert the surrogate to UTF32 and get the result. charLength++; return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); } } } return ((int)s[index]); } //////////////////////////////////////////////////////////////////////// // // IsWhiteSpace // // Determines if the given character is a white space character. // //////////////////////////////////////////////////////////////////////// internal static bool IsWhiteSpace(String s, int index) { BCLDebug.Assert(s != null, "s!=null"); BCLDebug.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length"); UnicodeCategory uc = GetUnicodeCategory(s, index); // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator". // And U+2029 is th eonly character which is under the category "ParagraphSeparator". switch (uc) { case (UnicodeCategory.SpaceSeparator): case (UnicodeCategory.LineSeparator): case (UnicodeCategory.ParagraphSeparator): return (true); } return (false); } internal static bool IsWhiteSpace(char c) { UnicodeCategory uc = GetUnicodeCategory(c); // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator". // And U+2029 is th eonly character which is under the category "ParagraphSeparator". switch (uc) { case (UnicodeCategory.SpaceSeparator): case (UnicodeCategory.LineSeparator): case (UnicodeCategory.ParagraphSeparator): return (true); } return (false); } // // This is called by the public char and string, index versions // // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character // internal unsafe static double InternalGetNumericValue(int ch) { BCLDebug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = m_pNumericLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // The offset is referred to an float item in m_pNumericFloatData. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = m_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(m_pNumericLevel1Index[index]); // Get the result from the 0 -3 bit of ch. #if WIN64 // To get around the IA64 alignment issue. Our double data is aligned in 8-byte boundary, but loader loads the embeded table starting // at 4-byte boundary. This cause a alignment issue since double is 8-byte. byte* pSourcePtr = &(m_pNumericValues[pBytePtr[(ch & 0x000f)] * sizeof(double)]); if (((long)pSourcePtr % 8) != 0) { // We are not aligned in 8-byte boundary. Do a copy. double ret; byte* retPtr = (byte*)&ret; Buffer.memcpyimpl(pSourcePtr, retPtr, sizeof(double)); return (ret); } return (((double*)m_pNumericValues)[pBytePtr[(ch & 0x000f)]]); #else return (((double*)m_pNumericValues)[pBytePtr[(ch & 0x000f)]]); #endif } // // This is called by the public char and string, index versions // // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character // internal unsafe static DigitValues* InternalGetDigitValues(int ch) { BCLDebug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = m_pNumericLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // The offset is referred to an float item in m_pNumericFloatData. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = m_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(m_pNumericLevel1Index[index]); // Get the result from the 0 -3 bit of ch. return &(m_pDigitValues[pBytePtr[(ch & 0x000f)]]); } internal unsafe static sbyte InternalGetDecimalDigitValue(int ch) { return (InternalGetDigitValues(ch)->decimalDigit); } internal unsafe static sbyte InternalGetDigitValue(int ch) { return (InternalGetDigitValues(ch)->digit); } //////////////////////////////////////////////////////////////////////// // //Returns the numeric value associated with the character c. If the character is a fraction, // the return value will not be an integer. If the character does not have a numeric value, the return value is -1. // //Returns: // the numeric value for the specified Unicode character. If the character does not have a numeric value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static double GetNumericValue(char ch) { return (InternalGetNumericValue(ch)); } public static double GetNumericValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } return (InternalGetNumericValue(InternalConvertToUtf32(s, index))); } //////////////////////////////////////////////////////////////////////// // //Returns the decimal digit value associated with the character c. // // The value should be from 0 ~ 9. // If the character does not have a numeric value, the return value is -1. // From Unicode.org: Decimal Digits. Digits that can be used to form decimal-radix numbers. //Returns: // the decimal digit value for the specified Unicode character. If the character does not have a decimal digit value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static int GetDecimalDigitValue(char ch) { return (InternalGetDecimalDigitValue(ch)); } public static int GetDecimalDigitValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } return (InternalGetDecimalDigitValue(InternalConvertToUtf32(s, index))); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the digit value associated with the character c. // If the character does not have a numeric value, the return value is -1. // From Unicode.org: If the character represents a digit, not necessarily a decimal digit, // the value is here. This covers digits which do not form decimal radix forms, such as the compatibility superscript digits. // // An example is: U+2460 IRCLED DIGIT ONE. This character has digit value 1, but does not have associcated decimal digit value. // //Returns: // the digit value for the specified Unicode character. If the character does not have a digit value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static int GetDigitValue(char ch) { return (InternalGetDigitValue(ch)); } public static int GetDigitValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } return (InternalGetDigitValue(InternalConvertToUtf32(s, index))); } public static UnicodeCategory GetUnicodeCategory(char ch) { return (InternalGetUnicodeCategory(ch)) ; } public static UnicodeCategory GetUnicodeCategory(String s, int index) { if (s==null) throw new ArgumentNullException("s"); if (((uint)index)>=((uint)s.Length)) { throw new ArgumentOutOfRangeException("index"); } return InternalGetUnicodeCategory(s, index); } internal unsafe static UnicodeCategory InternalGetUnicodeCategory(int ch) { return ((UnicodeCategory)InternalGetCategoryValue(ch, UNICODE_CATEGORY_OFFSET)); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the Unicode Category property for the character c. //Returns: // an value in UnicodeCategory enum //Arguments: // ch a Unicode character //Exceptions: // None // //Note that this API will return values for D800-DF00 surrogate halves. // //////////////////////////////////////////////////////////////////////// internal unsafe static byte InternalGetCategoryValue(int ch, int offset) { BCLDebug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = m_pCategoryLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = m_pCategoryLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(m_pCategoryLevel1Index[index]); // Get the result from the 0 -3 bit of ch. byte valueIndex = pBytePtr[(ch & 0x000f)]; byte uc = m_pCategoriesValue[valueIndex * 2 + offset]; // // Make sure that OtherNotAssigned is the last category in UnicodeCategory. // If that changes, change the following assertion as well. // //BCLDebug.Assert(uc >= 0 && uc <= UnicodeCategory.OtherNotAssigned, "Table returns incorrect Unicode category"); return (uc); } // internal static BidiCategory GetBidiCategory(char ch) { // return ((BidiCategory)InternalGetCategoryValue(c, BIDI_CATEGORY_OFFSET)); // } internal static BidiCategory GetBidiCategory(String s, int index) { if (s==null) throw new ArgumentNullException("s"); if (((uint)index)>=((uint)s.Length)) { throw new ArgumentOutOfRangeException("index"); } return ((BidiCategory)InternalGetCategoryValue(InternalConvertToUtf32(s, index), BIDI_CATEGORY_OFFSET)); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the Unicode Category property for the character c. //Returns: // an value in UnicodeCategory enum //Arguments: // value a Unicode String // index Index for the specified string. //Exceptions: // None // //////////////////////////////////////////////////////////////////////// internal static UnicodeCategory InternalGetUnicodeCategory(String value, int index) { BCLDebug.Assert(value != null, "value can not be null"); BCLDebug.Assert(index < value.Length, "index < value.Length"); return (InternalGetUnicodeCategory(InternalConvertToUtf32(value, index))); } //////////////////////////////////////////////////////////////////////// // // Get the Unicode category of the character starting at index. If the character is in BMP, charLength will return 1. // If the character is a valid surrogate pair, charLength will return 2. // //////////////////////////////////////////////////////////////////////// internal static UnicodeCategory InternalGetUnicodeCategory(String str, int index, out int charLength) { BCLDebug.Assert(str != null, "str can not be null"); BCLDebug.Assert(str.Length > 0, "str.Length > 0");; BCLDebug.Assert(index >= 0 && index < str.Length, "index >= 0 && index < str.Length"); return (InternalGetUnicodeCategory(InternalConvertToUtf32(str, index, out charLength))); } internal static bool IsCombiningCategory(UnicodeCategory uc) { BCLDebug.Assert(uc >= 0, "uc >= 0"); return ( uc == UnicodeCategory.NonSpacingMark || uc == UnicodeCategory.SpacingCombiningMark || uc == UnicodeCategory.EnclosingMark ); } [MethodImplAttribute(MethodImplOptions.InternalCall)] private unsafe static extern void nativeInitTable(byte* bytePtr); } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007.
Link Menu

This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- XmlArrayItemAttribute.cs
- WpfPayload.cs
- StreamWriter.cs
- ScriptReference.cs
- EndpointIdentityExtension.cs
- XmlArrayItemAttributes.cs
- SqlXmlStorage.cs
- ProcessHostMapPath.cs
- CodeRegionDirective.cs
- VirtualPathUtility.cs
- DataObject.cs
- AccessDataSourceDesigner.cs
- DeclarativeCatalogPart.cs
- Literal.cs
- CompoundFileStorageReference.cs
- BooleanAnimationBase.cs
- SqlNotificationRequest.cs
- WorkflowDesigner.cs
- PointAnimationClockResource.cs
- TextBreakpoint.cs
- TextProviderWrapper.cs
- ResourceDescriptionAttribute.cs
- DecimalAnimationBase.cs
- Thickness.cs
- XsltInput.cs
- DoubleAnimationBase.cs
- HtmlTableCell.cs
- Utils.cs
- IsolatedStoragePermission.cs
- PrinterResolution.cs
- TypeBrowser.xaml.cs
- EventLogPermission.cs
- HttpServerVarsCollection.cs
- DictionarySectionHandler.cs
- UpnEndpointIdentity.cs
- DictionaryGlobals.cs
- SafeRightsManagementEnvironmentHandle.cs
- TypeConverterHelper.cs
- KeyFrames.cs
- SignedXmlDebugLog.cs
- TitleStyle.cs
- MSHTMLHost.cs
- Label.cs
- ProviderCommandInfoUtils.cs
- VisualStyleRenderer.cs
- IItemContainerGenerator.cs
- QueryResult.cs
- MessageRpc.cs
- InputBindingCollection.cs
- Debug.cs
- ContextMenuStrip.cs
- LOSFormatter.cs
- IOThreadScheduler.cs
- ComponentCollection.cs
- SqlCharStream.cs
- prompt.cs
- ImageField.cs
- ExtenderProvidedPropertyAttribute.cs
- LeafCellTreeNode.cs
- Pair.cs
- TrustSection.cs
- NullableDoubleMinMaxAggregationOperator.cs
- DataGridParentRows.cs
- StaticTextPointer.cs
- TypedDataSetSchemaImporterExtension.cs
- CachedFontFamily.cs
- Listbox.cs
- EncoderNLS.cs
- Internal.cs
- StrokeCollectionDefaultValueFactory.cs
- UiaCoreTypesApi.cs
- rsa.cs
- MeshGeometry3D.cs
- StylusPlugin.cs
- XmlWriterSettings.cs
- StateManagedCollection.cs
- SafeHandles.cs
- FixedPage.cs
- IntegerValidator.cs
- CodeGeneratorOptions.cs
- SelectionHighlightInfo.cs
- Range.cs
- WebRequest.cs
- PresentationAppDomainManager.cs
- OrderedDictionary.cs
- MimeTypePropertyAttribute.cs
- NameValueConfigurationElement.cs
- KeyValueInternalCollection.cs
- WebPageTraceListener.cs
- Container.cs
- PropertyTabAttribute.cs
- TypeReference.cs
- UpdatePanelTrigger.cs
- ProviderConnectionPointCollection.cs
- CoTaskMemUnicodeSafeHandle.cs
- xmlfixedPageInfo.cs
- IgnoreFileBuildProvider.cs
- PointIndependentAnimationStorage.cs
- GroupQuery.cs
- QueryResults.cs