Code:
/ 4.0 / 4.0 / DEVDIV_TFS / Dev10 / Releases / RTMRel / ndp / clr / src / BCL / System / Globalization / CharUnicodeInfo.cs / 1305376 / CharUnicodeInfo.cs
// ==++== // // Copyright (c) Microsoft Corporation. All rights reserved. // // ==--== //////////////////////////////////////////////////////////////////////////// // // Class: CharacterInfo // // Purpose: This class implements a set of methods for retrieving // character type information. Character type information is // independent of culture and region. // // Date: August 12, 1998 // //////////////////////////////////////////////////////////////////////////// namespace System.Globalization { //This class has only static members and therefore doesn't need to be serialized. using System; using System.Threading; using System.Runtime.InteropServices; using System.Runtime.CompilerServices; using System.Runtime.Versioning; using System.Reflection; using System.Diagnostics.Contracts; public static class CharUnicodeInfo { //--------------------------------------------------------------------// // Internal Information // //-------------------------------------------------------------------// // // Native methods to access the Unicode category data tables in charinfo.nlp. // internal const char HIGH_SURROGATE_START = '\ud800'; internal const char HIGH_SURROGATE_END = '\udbff'; internal const char LOW_SURROGATE_START = '\udc00'; internal const char LOW_SURROGATE_END = '\udfff'; internal const int UNICODE_CATEGORY_OFFSET = 0; internal const int BIDI_CATEGORY_OFFSET = 1; // The base pointer of the data table unsafe static byte* s_pDataTable; // The native pointer to the 12:4:4 index table of the Unicode cateogry data. unsafe static ushort* s_pCategoryLevel1Index; unsafe static byte* s_pCategoriesValue; // The native pointer to the 12:4:4 index table of the Unicode numeric data. // The value of this index table is an index into the real value table stored in s_pNumericValues. unsafe static ushort* s_pNumericLevel1Index; // The numeric value table, which is indexed by s_pNumericLevel1Index. // Every item contains the value for numeric value. // unsafe static double* s_pNumericValues; // To get around the IA64 alignment issue. Our double data is aligned in 8-byte boundary, but loader loads the embeded table starting // at 4-byte boundary. This cause a alignment issue since double is 8-byte. unsafe static byte* s_pNumericValues; // The digit value table, which is indexed by s_pNumericLevel1Index. It shares the same indice as s_pNumericValues. // Every item contains the value for decimal digit/digit value. unsafe static DigitValues* s_pDigitValues; internal const String UNICODE_INFO_FILE_NAME = "charinfo.nlp"; // The starting codepoint for Unicode plane 1. Plane 1 contains 0x010000 ~ 0x01ffff. internal const int UNICODE_PLANE01_START = 0x10000; // // This is the header for the native data table that we load from UNICODE_INFO_FILE_NAME. // // Excplicit layout is used here since a syntax like char[16] can not be used in sequential layout. [StructLayout(LayoutKind.Explicit)] internal unsafe struct UnicodeDataHeader { [FieldOffset(0)] internal char TableName; // WCHAR[16] [FieldOffset(0x20)] internal ushort version; // WORD[4] [FieldOffset(0x28)] internal uint OffsetToCategoriesIndex; // DWORD [FieldOffset(0x2c)] internal uint OffsetToCategoriesValue; // DWORD [FieldOffset(0x30)] internal uint OffsetToNumbericIndex; // DWORD [FieldOffset(0x34)] internal uint OffsetToDigitValue; // DWORD [FieldOffset(0x38)] internal uint OffsetToNumbericValue; // DWORD } // NOTE: It's important to specify pack size here, since the size of the structure is 2 bytes. Otherwise, // the default pack size will be 4. [StructLayout(LayoutKind.Sequential, Pack=2)] internal struct DigitValues { internal sbyte decimalDigit; internal sbyte digit; } //We need to allocate the underlying table that provides us with the information that we //use. We allocate this once in the class initializer and then we don't need to worry //about it again. // [System.Security.SecuritySafeCritical] // auto-generated [ResourceExposure(ResourceScope.None)] [ResourceConsumption(ResourceScope.Process, ResourceScope.Process)] unsafe static CharUnicodeInfo() { s_pDataTable = GlobalizationAssembly.GetGlobalizationResourceBytePtr(typeof(CharUnicodeInfo).Assembly, UNICODE_INFO_FILE_NAME); UnicodeDataHeader* mainHeader = (UnicodeDataHeader*)s_pDataTable; // Set up the native pointer to different part of the tables. s_pCategoryLevel1Index = (ushort*) (s_pDataTable + mainHeader->OffsetToCategoriesIndex); s_pCategoriesValue = (byte*) (s_pDataTable + mainHeader->OffsetToCategoriesValue); s_pNumericLevel1Index = (ushort*) (s_pDataTable + mainHeader->OffsetToNumbericIndex); s_pNumericValues = (byte*) (s_pDataTable + mainHeader->OffsetToNumbericValue); s_pDigitValues = (DigitValues*) (s_pDataTable + mainHeader->OffsetToDigitValue); // Go to native side to make sure the native CharacterInfoTable pointer in the native side is initialized. nativeInitTable(s_pDataTable); } //////////////////////////////////////////////////////////////////////// // // Actions: // Convert the BMP character or surrogate pointed by index to a UTF32 value. // This is similar to Char.ConvertToUTF32, but the difference is that // it does not throw exceptions when invalid surrogate characters are passed in. // // WARNING: since it doesn't throw an exception it CAN return a value // in the surrogate range D800-DFFF, which are not legal unicode values. // //////////////////////////////////////////////////////////////////////// internal static int InternalConvertToUtf32(String s, int index) { Contract.Assert(s != null, "s != null"); Contract.Assert(index >= 0 && index < s.Length, "index < s.Length"); if (index < s.Length - 1) { int temp1 = (int)s[index] - HIGH_SURROGATE_START; if (temp1 >= 0 && temp1 <= 0x3ff) { int temp2 = (int)s[index+1] - LOW_SURROGATE_START; if (temp2 >= 0 && temp2 <= 0x3ff) { // Convert the surrogate to UTF32 and get the result. return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); } } } return ((int)s[index]); } //////////////////////////////////////////////////////////////////////// // // Convert a character or a surrogate pair starting at index of string s // to UTF32 value. // // Parameters: // s The string // index The starting index. It can point to a BMP character or // a surrogate pair. // len The length of the string. // charLength [out] If the index points to a BMP char, charLength // will be 1. If the index points to a surrogate pair, // charLength will be 2. // // WARNING: since it doesn't throw an exception it CAN return a value // in the surrogate range D800-DFFF, which are not legal unicode values. // // Returns: // The UTF32 value // //////////////////////////////////////////////////////////////////////// internal static int InternalConvertToUtf32(String s, int index, out int charLength) { Contract.Assert(s != null, "s != null"); Contract.Assert(s.Length > 0, "s.Length > 0"); Contract.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length"); charLength = 1; if (index < s.Length - 1) { int temp1 = (int)s[index] - HIGH_SURROGATE_START; if (temp1 >= 0 && temp1 <= 0x3ff) { int temp2 = (int)s[index+1] - LOW_SURROGATE_START; if (temp2 >= 0 && temp2 <= 0x3ff) { // Convert the surrogate to UTF32 and get the result. charLength++; return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); } } } return ((int)s[index]); } //////////////////////////////////////////////////////////////////////// // // IsWhiteSpace // // Determines if the given character is a white space character. // //////////////////////////////////////////////////////////////////////// internal static bool IsWhiteSpace(String s, int index) { Contract.Assert(s != null, "s!=null"); Contract.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length"); UnicodeCategory uc = GetUnicodeCategory(s, index); // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator". // And U+2029 is th eonly character which is under the category "ParagraphSeparator". switch (uc) { case (UnicodeCategory.SpaceSeparator): case (UnicodeCategory.LineSeparator): case (UnicodeCategory.ParagraphSeparator): return (true); } return (false); } internal static bool IsWhiteSpace(char c) { UnicodeCategory uc = GetUnicodeCategory(c); // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator". // And U+2029 is th eonly character which is under the category "ParagraphSeparator". switch (uc) { case (UnicodeCategory.SpaceSeparator): case (UnicodeCategory.LineSeparator): case (UnicodeCategory.ParagraphSeparator): return (true); } return (false); } // // This is called by the public char and string, index versions // // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character // [System.Security.SecuritySafeCritical] // auto-generated internal unsafe static double InternalGetNumericValue(int ch) { Contract.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = s_pNumericLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // The offset is referred to an float item in m_pNumericFloatData. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = s_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(s_pNumericLevel1Index[index]); // Get the result from the 0 -3 bit of ch. #if WIN64 // To get around the IA64 alignment issue. Our double data is aligned in 8-byte boundary, but loader loads the embeded table starting // at 4-byte boundary. This cause a alignment issue since double is 8-byte. byte* pSourcePtr = &(s_pNumericValues[pBytePtr[(ch & 0x000f)] * sizeof(double)]); if (((long)pSourcePtr % 8) != 0) { // We are not aligned in 8-byte boundary. Do a copy. double ret; byte* retPtr = (byte*)&ret; Buffer.memcpyimpl(pSourcePtr, retPtr, sizeof(double)); return (ret); } return (((double*)s_pNumericValues)[pBytePtr[(ch & 0x000f)]]); #else return (((double*)s_pNumericValues)[pBytePtr[(ch & 0x000f)]]); #endif } // // This is called by the public char and string, index versions // // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character // [System.Security.SecuritySafeCritical] // auto-generated internal unsafe static DigitValues* InternalGetDigitValues(int ch) { Contract.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = s_pNumericLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // The offset is referred to an float item in m_pNumericFloatData. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = s_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(s_pNumericLevel1Index[index]); // Get the result from the 0 -3 bit of ch. return &(s_pDigitValues[pBytePtr[(ch & 0x000f)]]); } [System.Security.SecuritySafeCritical] // auto-generated internal unsafe static sbyte InternalGetDecimalDigitValue(int ch) { return (InternalGetDigitValues(ch)->decimalDigit); } [System.Security.SecuritySafeCritical] // auto-generated internal unsafe static sbyte InternalGetDigitValue(int ch) { return (InternalGetDigitValues(ch)->digit); } //////////////////////////////////////////////////////////////////////// // //Returns the numeric value associated with the character c. If the character is a fraction, // the return value will not be an integer. If the character does not have a numeric value, the return value is -1. // //Returns: // the numeric value for the specified Unicode character. If the character does not have a numeric value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// [System.Security.SecuritySafeCritical] // auto-generated public static double GetNumericValue(char ch) { return (InternalGetNumericValue(ch)); } [System.Security.SecuritySafeCritical] // auto-generated public static double GetNumericValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } Contract.EndContractBlock(); return (InternalGetNumericValue(InternalConvertToUtf32(s, index))); } //////////////////////////////////////////////////////////////////////// // //Returns the decimal digit value associated with the character c. // // The value should be from 0 ~ 9. // If the character does not have a numeric value, the return value is -1. // From Unicode.org: Decimal Digits. Digits that can be used to form decimal-radix numbers. //Returns: // the decimal digit value for the specified Unicode character. If the character does not have a decimal digit value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static int GetDecimalDigitValue(char ch) { return (InternalGetDecimalDigitValue(ch)); } public static int GetDecimalDigitValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } Contract.EndContractBlock(); return (InternalGetDecimalDigitValue(InternalConvertToUtf32(s, index))); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the digit value associated with the character c. // If the character does not have a numeric value, the return value is -1. // From Unicode.org: If the character represents a digit, not necessarily a decimal digit, // the value is here. This covers digits which do not form decimal radix forms, such as the compatibility superscript digits. // // An example is: U+2460 IRCLED DIGIT ONE. This character has digit value 1, but does not have associcated decimal digit value. // //Returns: // the digit value for the specified Unicode character. If the character does not have a digit value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static int GetDigitValue(char ch) { return (InternalGetDigitValue(ch)); } public static int GetDigitValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } Contract.EndContractBlock(); return (InternalGetDigitValue(InternalConvertToUtf32(s, index))); } [System.Security.SecuritySafeCritical] // auto-generated public static UnicodeCategory GetUnicodeCategory(char ch) { return (InternalGetUnicodeCategory(ch)) ; } [System.Security.SecuritySafeCritical] // auto-generated public static UnicodeCategory GetUnicodeCategory(String s, int index) { if (s==null) throw new ArgumentNullException("s"); if (((uint)index)>=((uint)s.Length)) { throw new ArgumentOutOfRangeException("index"); } Contract.EndContractBlock(); return InternalGetUnicodeCategory(s, index); } internal unsafe static UnicodeCategory InternalGetUnicodeCategory(int ch) { return ((UnicodeCategory)InternalGetCategoryValue(ch, UNICODE_CATEGORY_OFFSET)); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the Unicode Category property for the character c. //Returns: // an value in UnicodeCategory enum //Arguments: // ch a Unicode character //Exceptions: // None // //Note that this API will return values for D800-DF00 surrogate halves. // //////////////////////////////////////////////////////////////////////// [System.Security.SecuritySafeCritical] // auto-generated internal unsafe static byte InternalGetCategoryValue(int ch, int offset) { Contract.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = s_pCategoryLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = s_pCategoryLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(s_pCategoryLevel1Index[index]); // Get the result from the 0 -3 bit of ch. byte valueIndex = pBytePtr[(ch & 0x000f)]; byte uc = s_pCategoriesValue[valueIndex * 2 + offset]; // // Make sure that OtherNotAssigned is the last category in UnicodeCategory. // If that changes, change the following assertion as well. // //Contract.Assert(uc >= 0 && uc <= UnicodeCategory.OtherNotAssigned, "Table returns incorrect Unicode category"); return (uc); } // internal static BidiCategory GetBidiCategory(char ch) { // return ((BidiCategory)InternalGetCategoryValue(c, BIDI_CATEGORY_OFFSET)); // } internal static BidiCategory GetBidiCategory(String s, int index) { if (s==null) throw new ArgumentNullException("s"); if (((uint)index)>=((uint)s.Length)) { throw new ArgumentOutOfRangeException("index"); } Contract.EndContractBlock(); return ((BidiCategory)InternalGetCategoryValue(InternalConvertToUtf32(s, index), BIDI_CATEGORY_OFFSET)); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the Unicode Category property for the character c. //Returns: // an value in UnicodeCategory enum //Arguments: // value a Unicode String // index Index for the specified string. //Exceptions: // None // //////////////////////////////////////////////////////////////////////// internal static UnicodeCategory InternalGetUnicodeCategory(String value, int index) { Contract.Assert(value != null, "value can not be null"); Contract.Assert(index < value.Length, "index < value.Length"); return (InternalGetUnicodeCategory(InternalConvertToUtf32(value, index))); } //////////////////////////////////////////////////////////////////////// // // Get the Unicode category of the character starting at index. If the character is in BMP, charLength will return 1. // If the character is a valid surrogate pair, charLength will return 2. // //////////////////////////////////////////////////////////////////////// internal static UnicodeCategory InternalGetUnicodeCategory(String str, int index, out int charLength) { Contract.Assert(str != null, "str can not be null"); Contract.Assert(str.Length > 0, "str.Length > 0");; Contract.Assert(index >= 0 && index < str.Length, "index >= 0 && index < str.Length"); return (InternalGetUnicodeCategory(InternalConvertToUtf32(str, index, out charLength))); } internal static bool IsCombiningCategory(UnicodeCategory uc) { Contract.Assert(uc >= 0, "uc >= 0"); return ( uc == UnicodeCategory.NonSpacingMark || uc == UnicodeCategory.SpacingCombiningMark || uc == UnicodeCategory.EnclosingMark ); } [System.Security.SecurityCritical] // auto-generated [ResourceExposure(ResourceScope.Process)] [MethodImplAttribute(MethodImplOptions.InternalCall)] private unsafe static extern void nativeInitTable(byte* bytePtr); } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007. // ==++== // // Copyright (c) Microsoft Corporation. All rights reserved. // // ==--== //////////////////////////////////////////////////////////////////////////// // // Class: CharacterInfo // // Purpose: This class implements a set of methods for retrieving // character type information. Character type information is // independent of culture and region. // // Date: August 12, 1998 // //////////////////////////////////////////////////////////////////////////// namespace System.Globalization { //This class has only static members and therefore doesn't need to be serialized. using System; using System.Threading; using System.Runtime.InteropServices; using System.Runtime.CompilerServices; using System.Runtime.Versioning; using System.Reflection; using System.Diagnostics.Contracts; public static class CharUnicodeInfo { //--------------------------------------------------------------------// // Internal Information // //-------------------------------------------------------------------// // // Native methods to access the Unicode category data tables in charinfo.nlp. // internal const char HIGH_SURROGATE_START = '\ud800'; internal const char HIGH_SURROGATE_END = '\udbff'; internal const char LOW_SURROGATE_START = '\udc00'; internal const char LOW_SURROGATE_END = '\udfff'; internal const int UNICODE_CATEGORY_OFFSET = 0; internal const int BIDI_CATEGORY_OFFSET = 1; // The base pointer of the data table unsafe static byte* s_pDataTable; // The native pointer to the 12:4:4 index table of the Unicode cateogry data. unsafe static ushort* s_pCategoryLevel1Index; unsafe static byte* s_pCategoriesValue; // The native pointer to the 12:4:4 index table of the Unicode numeric data. // The value of this index table is an index into the real value table stored in s_pNumericValues. unsafe static ushort* s_pNumericLevel1Index; // The numeric value table, which is indexed by s_pNumericLevel1Index. // Every item contains the value for numeric value. // unsafe static double* s_pNumericValues; // To get around the IA64 alignment issue. Our double data is aligned in 8-byte boundary, but loader loads the embeded table starting // at 4-byte boundary. This cause a alignment issue since double is 8-byte. unsafe static byte* s_pNumericValues; // The digit value table, which is indexed by s_pNumericLevel1Index. It shares the same indice as s_pNumericValues. // Every item contains the value for decimal digit/digit value. unsafe static DigitValues* s_pDigitValues; internal const String UNICODE_INFO_FILE_NAME = "charinfo.nlp"; // The starting codepoint for Unicode plane 1. Plane 1 contains 0x010000 ~ 0x01ffff. internal const int UNICODE_PLANE01_START = 0x10000; // // This is the header for the native data table that we load from UNICODE_INFO_FILE_NAME. // // Excplicit layout is used here since a syntax like char[16] can not be used in sequential layout. [StructLayout(LayoutKind.Explicit)] internal unsafe struct UnicodeDataHeader { [FieldOffset(0)] internal char TableName; // WCHAR[16] [FieldOffset(0x20)] internal ushort version; // WORD[4] [FieldOffset(0x28)] internal uint OffsetToCategoriesIndex; // DWORD [FieldOffset(0x2c)] internal uint OffsetToCategoriesValue; // DWORD [FieldOffset(0x30)] internal uint OffsetToNumbericIndex; // DWORD [FieldOffset(0x34)] internal uint OffsetToDigitValue; // DWORD [FieldOffset(0x38)] internal uint OffsetToNumbericValue; // DWORD } // NOTE: It's important to specify pack size here, since the size of the structure is 2 bytes. Otherwise, // the default pack size will be 4. [StructLayout(LayoutKind.Sequential, Pack=2)] internal struct DigitValues { internal sbyte decimalDigit; internal sbyte digit; } //We need to allocate the underlying table that provides us with the information that we //use. We allocate this once in the class initializer and then we don't need to worry //about it again. // [System.Security.SecuritySafeCritical] // auto-generated [ResourceExposure(ResourceScope.None)] [ResourceConsumption(ResourceScope.Process, ResourceScope.Process)] unsafe static CharUnicodeInfo() { s_pDataTable = GlobalizationAssembly.GetGlobalizationResourceBytePtr(typeof(CharUnicodeInfo).Assembly, UNICODE_INFO_FILE_NAME); UnicodeDataHeader* mainHeader = (UnicodeDataHeader*)s_pDataTable; // Set up the native pointer to different part of the tables. s_pCategoryLevel1Index = (ushort*) (s_pDataTable + mainHeader->OffsetToCategoriesIndex); s_pCategoriesValue = (byte*) (s_pDataTable + mainHeader->OffsetToCategoriesValue); s_pNumericLevel1Index = (ushort*) (s_pDataTable + mainHeader->OffsetToNumbericIndex); s_pNumericValues = (byte*) (s_pDataTable + mainHeader->OffsetToNumbericValue); s_pDigitValues = (DigitValues*) (s_pDataTable + mainHeader->OffsetToDigitValue); // Go to native side to make sure the native CharacterInfoTable pointer in the native side is initialized. nativeInitTable(s_pDataTable); } //////////////////////////////////////////////////////////////////////// // // Actions: // Convert the BMP character or surrogate pointed by index to a UTF32 value. // This is similar to Char.ConvertToUTF32, but the difference is that // it does not throw exceptions when invalid surrogate characters are passed in. // // WARNING: since it doesn't throw an exception it CAN return a value // in the surrogate range D800-DFFF, which are not legal unicode values. // //////////////////////////////////////////////////////////////////////// internal static int InternalConvertToUtf32(String s, int index) { Contract.Assert(s != null, "s != null"); Contract.Assert(index >= 0 && index < s.Length, "index < s.Length"); if (index < s.Length - 1) { int temp1 = (int)s[index] - HIGH_SURROGATE_START; if (temp1 >= 0 && temp1 <= 0x3ff) { int temp2 = (int)s[index+1] - LOW_SURROGATE_START; if (temp2 >= 0 && temp2 <= 0x3ff) { // Convert the surrogate to UTF32 and get the result. return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); } } } return ((int)s[index]); } //////////////////////////////////////////////////////////////////////// // // Convert a character or a surrogate pair starting at index of string s // to UTF32 value. // // Parameters: // s The string // index The starting index. It can point to a BMP character or // a surrogate pair. // len The length of the string. // charLength [out] If the index points to a BMP char, charLength // will be 1. If the index points to a surrogate pair, // charLength will be 2. // // WARNING: since it doesn't throw an exception it CAN return a value // in the surrogate range D800-DFFF, which are not legal unicode values. // // Returns: // The UTF32 value // //////////////////////////////////////////////////////////////////////// internal static int InternalConvertToUtf32(String s, int index, out int charLength) { Contract.Assert(s != null, "s != null"); Contract.Assert(s.Length > 0, "s.Length > 0"); Contract.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length"); charLength = 1; if (index < s.Length - 1) { int temp1 = (int)s[index] - HIGH_SURROGATE_START; if (temp1 >= 0 && temp1 <= 0x3ff) { int temp2 = (int)s[index+1] - LOW_SURROGATE_START; if (temp2 >= 0 && temp2 <= 0x3ff) { // Convert the surrogate to UTF32 and get the result. charLength++; return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); } } } return ((int)s[index]); } //////////////////////////////////////////////////////////////////////// // // IsWhiteSpace // // Determines if the given character is a white space character. // //////////////////////////////////////////////////////////////////////// internal static bool IsWhiteSpace(String s, int index) { Contract.Assert(s != null, "s!=null"); Contract.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length"); UnicodeCategory uc = GetUnicodeCategory(s, index); // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator". // And U+2029 is th eonly character which is under the category "ParagraphSeparator". switch (uc) { case (UnicodeCategory.SpaceSeparator): case (UnicodeCategory.LineSeparator): case (UnicodeCategory.ParagraphSeparator): return (true); } return (false); } internal static bool IsWhiteSpace(char c) { UnicodeCategory uc = GetUnicodeCategory(c); // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator". // And U+2029 is th eonly character which is under the category "ParagraphSeparator". switch (uc) { case (UnicodeCategory.SpaceSeparator): case (UnicodeCategory.LineSeparator): case (UnicodeCategory.ParagraphSeparator): return (true); } return (false); } // // This is called by the public char and string, index versions // // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character // [System.Security.SecuritySafeCritical] // auto-generated internal unsafe static double InternalGetNumericValue(int ch) { Contract.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = s_pNumericLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // The offset is referred to an float item in m_pNumericFloatData. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = s_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(s_pNumericLevel1Index[index]); // Get the result from the 0 -3 bit of ch. #if WIN64 // To get around the IA64 alignment issue. Our double data is aligned in 8-byte boundary, but loader loads the embeded table starting // at 4-byte boundary. This cause a alignment issue since double is 8-byte. byte* pSourcePtr = &(s_pNumericValues[pBytePtr[(ch & 0x000f)] * sizeof(double)]); if (((long)pSourcePtr % 8) != 0) { // We are not aligned in 8-byte boundary. Do a copy. double ret; byte* retPtr = (byte*)&ret; Buffer.memcpyimpl(pSourcePtr, retPtr, sizeof(double)); return (ret); } return (((double*)s_pNumericValues)[pBytePtr[(ch & 0x000f)]]); #else return (((double*)s_pNumericValues)[pBytePtr[(ch & 0x000f)]]); #endif } // // This is called by the public char and string, index versions // // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character // [System.Security.SecuritySafeCritical] // auto-generated internal unsafe static DigitValues* InternalGetDigitValues(int ch) { Contract.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = s_pNumericLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // The offset is referred to an float item in m_pNumericFloatData. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = s_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(s_pNumericLevel1Index[index]); // Get the result from the 0 -3 bit of ch. return &(s_pDigitValues[pBytePtr[(ch & 0x000f)]]); } [System.Security.SecuritySafeCritical] // auto-generated internal unsafe static sbyte InternalGetDecimalDigitValue(int ch) { return (InternalGetDigitValues(ch)->decimalDigit); } [System.Security.SecuritySafeCritical] // auto-generated internal unsafe static sbyte InternalGetDigitValue(int ch) { return (InternalGetDigitValues(ch)->digit); } //////////////////////////////////////////////////////////////////////// // //Returns the numeric value associated with the character c. If the character is a fraction, // the return value will not be an integer. If the character does not have a numeric value, the return value is -1. // //Returns: // the numeric value for the specified Unicode character. If the character does not have a numeric value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// [System.Security.SecuritySafeCritical] // auto-generated public static double GetNumericValue(char ch) { return (InternalGetNumericValue(ch)); } [System.Security.SecuritySafeCritical] // auto-generated public static double GetNumericValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } Contract.EndContractBlock(); return (InternalGetNumericValue(InternalConvertToUtf32(s, index))); } //////////////////////////////////////////////////////////////////////// // //Returns the decimal digit value associated with the character c. // // The value should be from 0 ~ 9. // If the character does not have a numeric value, the return value is -1. // From Unicode.org: Decimal Digits. Digits that can be used to form decimal-radix numbers. //Returns: // the decimal digit value for the specified Unicode character. If the character does not have a decimal digit value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static int GetDecimalDigitValue(char ch) { return (InternalGetDecimalDigitValue(ch)); } public static int GetDecimalDigitValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } Contract.EndContractBlock(); return (InternalGetDecimalDigitValue(InternalConvertToUtf32(s, index))); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the digit value associated with the character c. // If the character does not have a numeric value, the return value is -1. // From Unicode.org: If the character represents a digit, not necessarily a decimal digit, // the value is here. This covers digits which do not form decimal radix forms, such as the compatibility superscript digits. // // An example is: U+2460 IRCLED DIGIT ONE. This character has digit value 1, but does not have associcated decimal digit value. // //Returns: // the digit value for the specified Unicode character. If the character does not have a digit value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static int GetDigitValue(char ch) { return (InternalGetDigitValue(ch)); } public static int GetDigitValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } Contract.EndContractBlock(); return (InternalGetDigitValue(InternalConvertToUtf32(s, index))); } [System.Security.SecuritySafeCritical] // auto-generated public static UnicodeCategory GetUnicodeCategory(char ch) { return (InternalGetUnicodeCategory(ch)) ; } [System.Security.SecuritySafeCritical] // auto-generated public static UnicodeCategory GetUnicodeCategory(String s, int index) { if (s==null) throw new ArgumentNullException("s"); if (((uint)index)>=((uint)s.Length)) { throw new ArgumentOutOfRangeException("index"); } Contract.EndContractBlock(); return InternalGetUnicodeCategory(s, index); } internal unsafe static UnicodeCategory InternalGetUnicodeCategory(int ch) { return ((UnicodeCategory)InternalGetCategoryValue(ch, UNICODE_CATEGORY_OFFSET)); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the Unicode Category property for the character c. //Returns: // an value in UnicodeCategory enum //Arguments: // ch a Unicode character //Exceptions: // None // //Note that this API will return values for D800-DF00 surrogate halves. // //////////////////////////////////////////////////////////////////////// [System.Security.SecuritySafeCritical] // auto-generated internal unsafe static byte InternalGetCategoryValue(int ch, int offset) { Contract.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = s_pCategoryLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = s_pCategoryLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(s_pCategoryLevel1Index[index]); // Get the result from the 0 -3 bit of ch. byte valueIndex = pBytePtr[(ch & 0x000f)]; byte uc = s_pCategoriesValue[valueIndex * 2 + offset]; // // Make sure that OtherNotAssigned is the last category in UnicodeCategory. // If that changes, change the following assertion as well. // //Contract.Assert(uc >= 0 && uc <= UnicodeCategory.OtherNotAssigned, "Table returns incorrect Unicode category"); return (uc); } // internal static BidiCategory GetBidiCategory(char ch) { // return ((BidiCategory)InternalGetCategoryValue(c, BIDI_CATEGORY_OFFSET)); // } internal static BidiCategory GetBidiCategory(String s, int index) { if (s==null) throw new ArgumentNullException("s"); if (((uint)index)>=((uint)s.Length)) { throw new ArgumentOutOfRangeException("index"); } Contract.EndContractBlock(); return ((BidiCategory)InternalGetCategoryValue(InternalConvertToUtf32(s, index), BIDI_CATEGORY_OFFSET)); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the Unicode Category property for the character c. //Returns: // an value in UnicodeCategory enum //Arguments: // value a Unicode String // index Index for the specified string. //Exceptions: // None // //////////////////////////////////////////////////////////////////////// internal static UnicodeCategory InternalGetUnicodeCategory(String value, int index) { Contract.Assert(value != null, "value can not be null"); Contract.Assert(index < value.Length, "index < value.Length"); return (InternalGetUnicodeCategory(InternalConvertToUtf32(value, index))); } //////////////////////////////////////////////////////////////////////// // // Get the Unicode category of the character starting at index. If the character is in BMP, charLength will return 1. // If the character is a valid surrogate pair, charLength will return 2. // //////////////////////////////////////////////////////////////////////// internal static UnicodeCategory InternalGetUnicodeCategory(String str, int index, out int charLength) { Contract.Assert(str != null, "str can not be null"); Contract.Assert(str.Length > 0, "str.Length > 0");; Contract.Assert(index >= 0 && index < str.Length, "index >= 0 && index < str.Length"); return (InternalGetUnicodeCategory(InternalConvertToUtf32(str, index, out charLength))); } internal static bool IsCombiningCategory(UnicodeCategory uc) { Contract.Assert(uc >= 0, "uc >= 0"); return ( uc == UnicodeCategory.NonSpacingMark || uc == UnicodeCategory.SpacingCombiningMark || uc == UnicodeCategory.EnclosingMark ); } [System.Security.SecurityCritical] // auto-generated [ResourceExposure(ResourceScope.Process)] [MethodImplAttribute(MethodImplOptions.InternalCall)] private unsafe static extern void nativeInitTable(byte* bytePtr); } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007.
Link Menu

This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- ReadOnlyDataSource.cs
- FilteredAttributeCollection.cs
- ContextProperty.cs
- ToolstripProfessionalRenderer.cs
- DataGridViewRowContextMenuStripNeededEventArgs.cs
- JpegBitmapEncoder.cs
- TreeNodeCollectionEditorDialog.cs
- TrustManager.cs
- DefaultBindingPropertyAttribute.cs
- TemplateKey.cs
- Point3DCollection.cs
- SqlDataSourceConfigureSelectPanel.cs
- DataSourceXmlAttributeAttribute.cs
- Atom10FeedFormatter.cs
- GetFileNameResult.cs
- DataGridViewTextBoxEditingControl.cs
- XmlDomTextWriter.cs
- Vector3DKeyFrameCollection.cs
- MenuScrollingVisibilityConverter.cs
- PenThreadWorker.cs
- InProcStateClientManager.cs
- DiagnosticTrace.cs
- Separator.cs
- NavigateEvent.cs
- ProfileSection.cs
- ButtonBaseDesigner.cs
- ResXFileRef.cs
- ZoomPercentageConverter.cs
- DataRowCollection.cs
- WebServiceEnumData.cs
- EncoderParameters.cs
- XPathExpr.cs
- CharStorage.cs
- GacUtil.cs
- OperationBehaviorAttribute.cs
- ReaderWriterLock.cs
- ZipIOBlockManager.cs
- SQLInt16Storage.cs
- DataGridTablesFactory.cs
- formatter.cs
- WebDisplayNameAttribute.cs
- Ray3DHitTestResult.cs
- AuthenticationModulesSection.cs
- AppDomainGrammarProxy.cs
- GroupBox.cs
- TrackPoint.cs
- WSTrust.cs
- Style.cs
- SqlXmlStorage.cs
- XmlTextReaderImpl.cs
- FileLoadException.cs
- CreateBookmarkScope.cs
- ListControlConvertEventArgs.cs
- VisualBrush.cs
- VectorCollection.cs
- ClientConfigurationHost.cs
- RegionData.cs
- ImageCollectionCodeDomSerializer.cs
- TimeSpanValidator.cs
- TextTreePropertyUndoUnit.cs
- WorkflowDebuggerSteppingAttribute.cs
- QuotedPairReader.cs
- ComplexObject.cs
- XmlValueConverter.cs
- AssemblyNameUtility.cs
- DataGridViewCellStyleConverter.cs
- PieceDirectory.cs
- FileVersion.cs
- VisualStyleElement.cs
- XmlCollation.cs
- HTMLTextWriter.cs
- XmlSchemaDocumentation.cs
- PointCollectionConverter.cs
- EncoderExceptionFallback.cs
- Point3DAnimationBase.cs
- ProxyFragment.cs
- DefaultBindingPropertyAttribute.cs
- PropertyGridDesigner.cs
- ClientSettingsStore.cs
- StorageTypeMapping.cs
- IsolatedStorageFile.cs
- TextRange.cs
- RegistryExceptionHelper.cs
- XmlRootAttribute.cs
- SafeBitVector32.cs
- ThemeDirectoryCompiler.cs
- DataRecordObjectView.cs
- ToolStripControlHost.cs
- TextRangeAdaptor.cs
- MaskedTextProvider.cs
- DataListItemCollection.cs
- DataServiceBuildProvider.cs
- Fx.cs
- ColorBlend.cs
- ListViewSortEventArgs.cs
- DefaultSection.cs
- Constraint.cs
- XsltConvert.cs
- ItemCollection.cs
- FlowDocumentPageViewerAutomationPeer.cs