Code:
/ FX-1434 / FX-1434 / 1.0 / untmp / whidbey / REDBITS / ndp / fx / src / Regex / System / Text / RegularExpressions / RegexCharClass.cs / 1 / RegexCharClass.cs
//------------------------------------------------------------------------------ //// Copyright (c) Microsoft Corporation. All rights reserved. // //----------------------------------------------------------------------------- // This RegexCharClass class provides the "set of Unicode chars" functionality // used by the regexp engine. // The main function of RegexCharClass is as a builder to turn ranges, characters and // Unicode categories into a single string. This string is used as a black box // representation of a character class by the rest of Regex. The format is as follows. // // Char index Use // 0 Flags - currently this only holds the "negate" flag // 1 length of the string representing the "set" portion, eg [a-z0-9] only has a "set" // 2 length of the string representing the "category" portion, eg [\p{Lu}] only has a "category" // 3...m The set. These are a series of ranges which define the characters included in the set. // To determine if a given character is in the set, we binary search over this set of ranges // and see where the character should go. Based on whether the ending index is odd or even, // we know if the character is in the set. // m+1...n The categories. This is a list of UnicodeCategory enum values which describe categories // included in this class. namespace System.Text.RegularExpressions { using System.Collections; using System.Globalization; using System.Diagnostics; internal sealed class RegexCharClass { // instance data private ArrayList _rangelist; private StringBuilder _categories; private bool _canonical; private bool _negate; private RegexCharClass _subtractor; // Constants private const int FLAGS = 0; private const int SETLENGTH = 1; private const int CATEGORYLENGTH = 2; private const int SETSTART = 3; private const char Nullchar = '\0'; private const char Lastchar = '\uFFFF'; private const char GroupChar = (char) 0; private const short SpaceConst = 100; private const short NotSpaceConst = -100; private static readonly String Space = "\x64"; private static readonly String NotSpace = NegateCategory(Space); private static readonly String Word; private static readonly String NotWord; internal static readonly String SpaceClass; internal static readonly String NotSpaceClass; internal static readonly String WordClass; internal static readonly String NotWordClass; internal static readonly String DigitClass; internal static readonly String NotDigitClass; private const String ECMASpaceSet = "\u0009\u000E\u0020\u0021"; private const String NotECMASpaceSet = "\0\u0009\u000E\u0020\u0021"; private const String ECMAWordSet = "\u0030\u003A\u0041\u005B\u005F\u0060\u0061\u007B\u0130\u0131"; private const String NotECMAWordSet = "\0\u0030\u003A\u0041\u005B\u005F\u0060\u0061\u007B\u0130\u0131"; private const String ECMADigitSet = "\u0030\u003A"; private const String NotECMADigitSet = "\0\u0030\u003A"; internal const String ECMASpaceClass = "\x00\x04\x00" + ECMASpaceSet; internal const String NotECMASpaceClass = "\x01\x04\x00" + ECMASpaceSet; internal const String ECMAWordClass = "\x00\x0A\x00" + ECMAWordSet; internal const String NotECMAWordClass = "\x01\x0A\x00" + ECMAWordSet; internal const String ECMADigitClass = "\x00\x02\x00" + ECMADigitSet; internal const String NotECMADigitClass = "\x01\x02\x00" + ECMADigitSet; internal const String AnyClass = "\x00\x01\x00\x00"; internal const String EmptyClass = "\x00\x00\x00"; private static Hashtable _definedCategories; /* * The property table contains all the block definitions defined in the * XML schema spec (http://www.w3.org/TR/2001/PR-xmlschema-2-20010316/#charcter-classes), Unicode 4.0 spec (www.unicode.org), * and Perl 5.6 (see Programming Perl, 3rd edition page 167). Three blocks defined by Perl (and here) may * not be in the Unicode: IsHighPrivateUseSurrogates, IsHighSurrogates, and IsLowSurrogates. * **/ // Has to be sorted by the first column private static readonly String[,] _propTable = { {"IsAlphabeticPresentationForms", "\uFB00\uFB50"}, {"IsArabic", "\u0600\u0700"}, {"IsArabicPresentationForms-A", "\uFB50\uFE00"}, {"IsArabicPresentationForms-B", "\uFE70\uFF00"}, {"IsArmenian", "\u0530\u0590"}, {"IsArrows", "\u2190\u2200"}, {"IsBasicLatin", "\u0000\u0080"}, {"IsBengali", "\u0980\u0A00"}, {"IsBlockElements", "\u2580\u25A0"}, {"IsBopomofo", "\u3100\u3130"}, {"IsBopomofoExtended", "\u31A0\u31C0"}, {"IsBoxDrawing", "\u2500\u2580"}, {"IsBraillePatterns", "\u2800\u2900"}, {"IsBuhid", "\u1740\u1760"}, {"IsCJKCompatibility", "\u3300\u3400"}, {"IsCJKCompatibilityForms", "\uFE30\uFE50"}, {"IsCJKCompatibilityIdeographs", "\uF900\uFB00"}, {"IsCJKRadicalsSupplement", "\u2E80\u2F00"}, {"IsCJKSymbolsandPunctuation", "\u3000\u3040"}, {"IsCJKUnifiedIdeographs", "\u4E00\uA000"}, {"IsCJKUnifiedIdeographsExtensionA", "\u3400\u4DC0"}, {"IsCherokee", "\u13A0\u1400"}, {"IsCombiningDiacriticalMarks", "\u0300\u0370"}, {"IsCombiningDiacriticalMarksforSymbols","\u20D0\u2100"}, {"IsCombiningHalfMarks", "\uFE20\uFE30"}, {"IsCombiningMarksforSymbols", "\u20D0\u2100"}, {"IsControlPictures", "\u2400\u2440"}, {"IsCurrencySymbols", "\u20A0\u20D0"}, {"IsCyrillic", "\u0400\u0500"}, {"IsCyrillicSupplement", "\u0500\u0530"}, {"IsDevanagari", "\u0900\u0980"}, {"IsDingbats", "\u2700\u27C0"}, {"IsEnclosedAlphanumerics", "\u2460\u2500"}, {"IsEnclosedCJKLettersandMonths", "\u3200\u3300"}, {"IsEthiopic", "\u1200\u1380"}, {"IsGeneralPunctuation", "\u2000\u2070"}, {"IsGeometricShapes", "\u25A0\u2600"}, {"IsGeorgian", "\u10A0\u1100"}, {"IsGreek", "\u0370\u0400"}, {"IsGreekExtended", "\u1F00\u2000"}, {"IsGreekandCoptic", "\u0370\u0400"}, {"IsGujarati", "\u0A80\u0B00"}, {"IsGurmukhi", "\u0A00\u0A80"}, {"IsHalfwidthandFullwidthForms", "\uFF00\uFFF0"}, {"IsHangulCompatibilityJamo", "\u3130\u3190"}, {"IsHangulJamo", "\u1100\u1200"}, {"IsHangulSyllables", "\uAC00\uD7B0"}, {"IsHanunoo", "\u1720\u1740"}, {"IsHebrew", "\u0590\u0600"}, {"IsHighPrivateUseSurrogates", "\uDB80\uDC00"}, {"IsHighSurrogates", "\uD800\uDB80"}, {"IsHiragana", "\u3040\u30A0"}, {"IsIPAExtensions", "\u0250\u02B0"}, {"IsIdeographicDescriptionCharacters", "\u2FF0\u3000"}, {"IsKanbun", "\u3190\u31A0"}, {"IsKangxiRadicals", "\u2F00\u2FE0"}, {"IsKannada", "\u0C80\u0D00"}, {"IsKatakana", "\u30A0\u3100"}, {"IsKatakanaPhoneticExtensions", "\u31F0\u3200"}, {"IsKhmer", "\u1780\u1800"}, {"IsKhmerSymbols", "\u19E0\u1A00"}, {"IsLao", "\u0E80\u0F00"}, {"IsLatin-1Supplement", "\u0080\u0100"}, {"IsLatinExtended-A", "\u0100\u0180"}, {"IsLatinExtended-B", "\u0180\u0250"}, {"IsLatinExtendedAdditional", "\u1E00\u1F00"}, {"IsLetterlikeSymbols", "\u2100\u2150"}, {"IsLimbu", "\u1900\u1950"}, {"IsLowSurrogates", "\uDC00\uE000"}, {"IsMalayalam", "\u0D00\u0D80"}, {"IsMathematicalOperators", "\u2200\u2300"}, {"IsMiscellaneousMathematicalSymbols-A","\u27C0\u27F0"}, {"IsMiscellaneousMathematicalSymbols-B","\u2980\u2A00"}, {"IsMiscellaneousSymbols", "\u2600\u2700"}, {"IsMiscellaneousSymbolsandArrows", "\u2B00\u2C00"}, {"IsMiscellaneousTechnical", "\u2300\u2400"}, {"IsMongolian", "\u1800\u18B0"}, {"IsMyanmar", "\u1000\u10A0"}, {"IsNumberForms", "\u2150\u2190"}, {"IsOgham", "\u1680\u16A0"}, {"IsOpticalCharacterRecognition", "\u2440\u2460"}, {"IsOriya", "\u0B00\u0B80"}, {"IsPhoneticExtensions", "\u1D00\u1D80"}, {"IsPrivateUse", "\uE000\uF900"}, {"IsPrivateUseArea", "\uE000\uF900"}, {"IsRunic", "\u16A0\u1700"}, {"IsSinhala", "\u0D80\u0E00"}, {"IsSmallFormVariants", "\uFE50\uFE70"}, {"IsSpacingModifierLetters", "\u02B0\u0300"}, {"IsSpecials", "\uFFF0"}, {"IsSuperscriptsandSubscripts", "\u2070\u20A0"}, {"IsSupplementalArrows-A", "\u27F0\u2800"}, {"IsSupplementalArrows-B", "\u2900\u2980"}, {"IsSupplementalMathematicalOperators", "\u2A00\u2B00"}, {"IsSyriac", "\u0700\u0750"}, {"IsTagalog", "\u1700\u1720"}, {"IsTagbanwa", "\u1760\u1780"}, {"IsTaiLe", "\u1950\u1980"}, {"IsTamil", "\u0B80\u0C00"}, {"IsTelugu", "\u0C00\u0C80"}, {"IsThaana", "\u0780\u07C0"}, {"IsThai", "\u0E00\u0E80"}, {"IsTibetan", "\u0F00\u1000"}, {"IsUnifiedCanadianAboriginalSyllabics","\u1400\u1680"}, {"IsVariationSelectors", "\uFE00\uFE10"}, {"IsYiRadicals", "\uA490\uA4D0"}, {"IsYiSyllables", "\uA000\uA490"}, {"IsYijingHexagramSymbols", "\u4DC0\u4E00"}, {"_xmlC", /* Name Char */ "\u002D\u002F\u0030\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00B7\u00B8\u00C0\u00D7\u00D8\u00F7\u00F8\u0132\u0134\u013F\u0141\u0149\u014A\u017F\u0180\u01C4\u01CD\u01F1\u01F4\u01F6\u01FA\u0218\u0250\u02A9\u02BB\u02C2\u02D0\u02D2\u0300\u0346\u0360\u0362\u0386\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0483\u0487\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0587\u0591\u05A2\u05A3\u05BA\u05BB\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05C5\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u0653\u0660\u066A\u0670\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06E9\u06EA\u06EE\u06F0\u06FA\u0901\u0904\u0905\u093A\u093C\u094E\u0951\u0955\u0958\u0964\u0966\u0970\u0981\u0984\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09BC\u09BD\u09BE\u09C5\u09C7\u09C9\u09CB\u09CE\u09D7\u09D8\u09DC" +"\u09DE\u09DF\u09E4\u09E6\u09F2\u0A02\u0A03\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A3C\u0A3D\u0A3E\u0A43\u0A47\u0A49\u0A4B\u0A4E\u0A59\u0A5D\u0A5E\u0A5F\u0A66\u0A75\u0A81\u0A84\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABC\u0AC6\u0AC7\u0ACA\u0ACB\u0ACE\u0AE0\u0AE1\u0AE6\u0AF0\u0B01\u0B04\u0B05\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3C\u0B44\u0B47\u0B49\u0B4B\u0B4E\u0B56\u0B58\u0B5C\u0B5E\u0B5F\u0B62\u0B66\u0B70\u0B82\u0B84\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0BBE\u0BC3\u0BC6\u0BC9\u0BCA\u0BCE\u0BD7\u0BD8\u0BE7\u0BF0\u0C01\u0C04\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C3E\u0C45\u0C46\u0C49\u0C4A\u0C4E\u0C55\u0C57\u0C60\u0C62\u0C66\u0C70\u0C82\u0C84\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CBE\u0CC5\u0CC6\u0CC9\u0CCA\u0CCE\u0CD5\u0CD7\u0CDE\u0CDF\u0CE0\u0CE2" +"\u0CE6\u0CF0\u0D02\u0D04\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D3E\u0D44\u0D46\u0D49\u0D4A\u0D4E\u0D57\u0D58\u0D60\u0D62\u0D66\u0D70\u0E01\u0E2F\u0E30\u0E3B\u0E40\u0E4F\u0E50\u0E5A\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EBA\u0EBB\u0EBE\u0EC0\u0EC5\u0EC6\u0EC7\u0EC8\u0ECE\u0ED0\u0EDA\u0F18\u0F1A\u0F20\u0F2A\u0F35\u0F36\u0F37\u0F38\u0F39\u0F3A\u0F3E\u0F48\u0F49\u0F6A\u0F71\u0F85\u0F86\u0F8C\u0F90\u0F96\u0F97\u0F98\u0F99\u0FAE\u0FB1\u0FB8\u0FB9\u0FBA\u10A0\u10C6\u10D0\u10F7\u1100\u1101\u1102\u1104\u1105\u1108\u1109\u110A\u110B\u110D\u110E\u1113\u113C\u113D\u113E\u113F\u1140\u1141\u114C\u114D\u114E\u114F\u1150\u1151\u1154\u1156\u1159\u115A\u115F\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116A\u116D\u116F\u1172\u1174\u1175\u1176\u119E\u119F\u11A8\u11A9\u11AB\u11AC\u11AE\u11B0\u11B7\u11B9\u11BA\u11BB\u11BC\u11C3\u11EB\u11EC\u11F0\u11F1\u11F9\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00" +"\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FBD\u1FBE\u1FBF\u1FC2\u1FC5\u1FC6\u1FCD\u1FD0\u1FD4\u1FD6\u1FDC\u1FE0\u1FED\u1FF2\u1FF5\u1FF6\u1FFD\u20D0\u20DD\u20E1\u20E2\u2126\u2127\u212A\u212C\u212E\u212F\u2180\u2183\u3005\u3006\u3007\u3008\u3021\u3030\u3031\u3036\u3041\u3095\u3099\u309B\u309D\u309F\u30A1\u30FB\u30FC\u30FF\u3105\u312D\u4E00\u9FA6\uAC00\uD7A4"}, {"_xmlD", "\u0030\u003A\u0660\u066A\u06F0\u06FA\u0966\u0970\u09E6\u09F0\u0A66\u0A70\u0AE6\u0AF0\u0B66\u0B70\u0BE7\u0BF0\u0C66\u0C70\u0CE6\u0CF0\u0D66\u0D70\u0E50\u0E5A\u0ED0\u0EDA\u0F20\u0F2A\u1040\u104A\u1369\u1372\u17E0\u17EA\u1810\u181A\uFF10\uFF1A"}, {"_xmlI", /* Start Name Char */ "\u003A\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00C0\u00D7\u00D8\u00F7\u00F8\u0132\u0134\u013F\u0141\u0149\u014A\u017F\u0180\u01C4\u01CD\u01F1\u01F4\u01F6\u01FA\u0218\u0250\u02A9\u02BB\u02C2\u0386\u0387\u0388\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0587\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0641\u064B\u0671\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06D6\u06E5\u06E7\u0905\u093A\u093D\u093E\u0958\u0962\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09DC\u09DE\u09DF\u09E2\u09F0\u09F2\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A59\u0A5D\u0A5E\u0A5F\u0A72\u0A75\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABD\u0ABE\u0AE0\u0AE1\u0B05\u0B0D\u0B0F" +"\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3D\u0B3E\u0B5C\u0B5E\u0B5F\u0B62\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C60\u0C62\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CDE\u0CDF\u0CE0\u0CE2\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D60\u0D62\u0E01\u0E2F\u0E30\u0E31\u0E32\u0E34\u0E40\u0E46\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EB1\u0EB2\u0EB4\u0EBD\u0EBE\u0EC0\u0EC5\u0F40\u0F48\u0F49\u0F6A\u10A0\u10C6\u10D0\u10F7\u1100\u1101\u1102\u1104\u1105\u1108\u1109\u110A\u110B\u110D\u110E\u1113\u113C\u113D\u113E\u113F\u1140\u1141\u114C\u114D\u114E\u114F\u1150\u1151\u1154\u1156\u1159\u115A\u115F\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116A\u116D\u116F\u1172\u1174\u1175\u1176\u119E\u119F\u11A8\u11A9\u11AB\u11AC" +"\u11AE\u11B0\u11B7\u11B9\u11BA\u11BB\u11BC\u11C3\u11EB\u11EC\u11F0\u11F1\u11F9\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FBD\u1FBE\u1FBF\u1FC2\u1FC5\u1FC6\u1FCD\u1FD0\u1FD4\u1FD6\u1FDC\u1FE0\u1FED\u1FF2\u1FF5\u1FF6\u1FFD\u2126\u2127\u212A\u212C\u212E\u212F\u2180\u2183\u3007\u3008\u3021\u302A\u3041\u3095\u30A1\u30FB\u3105\u312D\u4E00\u9FA6\uAC00\uD7A4"}, {"_xmlW", "\u0024\u0025\u002B\u002C\u0030\u003A\u003C\u003F\u0041\u005B\u005E\u005F\u0060\u007B\u007C\u007D\u007E\u007F\u00A2\u00AB\u00AC\u00AD\u00AE\u00B7\u00B8\u00BB\u00BC\u00BF\u00C0\u0221\u0222\u0234\u0250\u02AE\u02B0\u02EF\u0300\u0350\u0360\u0370\u0374\u0376\u037A\u037B\u0384\u0387\u0388\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03F7\u0400\u0487\u0488\u04CF\u04D0\u04F6\u04F8\u04FA\u0500\u0510\u0531\u0557\u0559\u055A\u0561\u0588\u0591\u05A2\u05A3\u05BA\u05BB\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05C5\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u0656\u0660\u066A\u066E\u06D4\u06D5\u06DD\u06DE\u06EE\u06F0\u06FF\u0710\u072D\u0730\u074B\u0780\u07B2\u0901\u0904\u0905\u093A\u093C\u094E\u0950\u0955\u0958\u0964\u0966\u0970\u0981\u0984\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09BC\u09BD\u09BE\u09C5\u09C7\u09C9\u09CB\u09CE\u09D7\u09D8\u09DC\u09DE\u09DF\u09E4\u09E6\u09FB\u0A02\u0A03\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35" +"\u0A37\u0A38\u0A3A\u0A3C\u0A3D\u0A3E\u0A43\u0A47\u0A49\u0A4B\u0A4E\u0A59\u0A5D\u0A5E\u0A5F\u0A66\u0A75\u0A81\u0A84\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABC\u0AC6\u0AC7\u0ACA\u0ACB\u0ACE\u0AD0\u0AD1\u0AE0\u0AE1\u0AE6\u0AF0\u0B01\u0B04\u0B05\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3C\u0B44\u0B47\u0B49\u0B4B\u0B4E\u0B56\u0B58\u0B5C\u0B5E\u0B5F\u0B62\u0B66\u0B71\u0B82\u0B84\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0BBE\u0BC3\u0BC6\u0BC9\u0BCA\u0BCE\u0BD7\u0BD8\u0BE7\u0BF3\u0C01\u0C04\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C3E\u0C45\u0C46\u0C49\u0C4A\u0C4E\u0C55\u0C57\u0C60\u0C62\u0C66\u0C70\u0C82\u0C84\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CBE\u0CC5\u0CC6\u0CC9\u0CCA\u0CCE\u0CD5\u0CD7\u0CDE\u0CDF\u0CE0\u0CE2\u0CE6\u0CF0\u0D02\u0D04\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D3E\u0D44\u0D46\u0D49" +"\u0D4A\u0D4E\u0D57\u0D58\u0D60\u0D62\u0D66\u0D70\u0D82\u0D84\u0D85\u0D97\u0D9A\u0DB2\u0DB3\u0DBC\u0DBD\u0DBE\u0DC0\u0DC7\u0DCA\u0DCB\u0DCF\u0DD5\u0DD6\u0DD7\u0DD8\u0DE0\u0DF2\u0DF4\u0E01\u0E3B\u0E3F\u0E4F\u0E50\u0E5A\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EBA\u0EBB\u0EBE\u0EC0\u0EC5\u0EC6\u0EC7\u0EC8\u0ECE\u0ED0\u0EDA\u0EDC\u0EDE\u0F00\u0F04\u0F13\u0F3A\u0F3E\u0F48\u0F49\u0F6B\u0F71\u0F85\u0F86\u0F8C\u0F90\u0F98\u0F99\u0FBD\u0FBE\u0FCD\u0FCF\u0FD0\u1000\u1022\u1023\u1028\u1029\u102B\u102C\u1033\u1036\u103A\u1040\u104A\u1050\u105A\u10A0\u10C6\u10D0\u10F9\u1100\u115A\u115F\u11A3\u11A8\u11FA\u1200\u1207\u1208\u1247\u1248\u1249\u124A\u124E\u1250\u1257\u1258\u1259\u125A\u125E\u1260\u1287\u1288\u1289\u128A\u128E\u1290\u12AF\u12B0\u12B1\u12B2\u12B6\u12B8\u12BF\u12C0\u12C1\u12C2\u12C6\u12C8\u12CF\u12D0\u12D7\u12D8\u12EF\u12F0\u130F\u1310\u1311\u1312\u1316\u1318\u131F\u1320\u1347\u1348\u135B\u1369\u137D\u13A0" +"\u13F5\u1401\u166D\u166F\u1677\u1681\u169B\u16A0\u16EB\u16EE\u16F1\u1700\u170D\u170E\u1715\u1720\u1735\u1740\u1754\u1760\u176D\u176E\u1771\u1772\u1774\u1780\u17D4\u17D7\u17D8\u17DB\u17DD\u17E0\u17EA\u180B\u180E\u1810\u181A\u1820\u1878\u1880\u18AA\u1E00\u1E9C\u1EA0\u1EFA\u1F00\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FC5\u1FC6\u1FD4\u1FD6\u1FDC\u1FDD\u1FF0\u1FF2\u1FF5\u1FF6\u1FFF\u2044\u2045\u2052\u2053\u2070\u2072\u2074\u207D\u207F\u208D\u20A0\u20B2\u20D0\u20EB\u2100\u213B\u213D\u214C\u2153\u2184\u2190\u2329\u232B\u23B4\u23B7\u23CF\u2400\u2427\u2440\u244B\u2460\u24FF\u2500\u2614\u2616\u2618\u2619\u267E\u2680\u268A\u2701\u2705\u2706\u270A\u270C\u2728\u2729\u274C\u274D\u274E\u274F\u2753\u2756\u2757\u2758\u275F\u2761\u2768\u2776\u2795\u2798\u27B0\u27B1\u27BF\u27D0\u27E6\u27F0\u2983\u2999\u29D8\u29DC\u29FC\u29FE\u2B00\u2E80\u2E9A\u2E9B\u2EF4\u2F00\u2FD6\u2FF0\u2FFC\u3004\u3008\u3012\u3014\u3020\u3030\u3031\u303D\u303E\u3040" +"\u3041\u3097\u3099\u30A0\u30A1\u30FB\u30FC\u3100\u3105\u312D\u3131\u318F\u3190\u31B8\u31F0\u321D\u3220\u3244\u3251\u327C\u327F\u32CC\u32D0\u32FF\u3300\u3377\u337B\u33DE\u33E0\u33FF\u3400\u4DB6\u4E00\u9FA6\uA000\uA48D\uA490\uA4C7\uAC00\uD7A4\uF900\uFA2E\uFA30\uFA6B\uFB00\uFB07\uFB13\uFB18\uFB1D\uFB37\uFB38\uFB3D\uFB3E\uFB3F\uFB40\uFB42\uFB43\uFB45\uFB46\uFBB2\uFBD3\uFD3E\uFD50\uFD90\uFD92\uFDC8\uFDF0\uFDFD\uFE00\uFE10\uFE20\uFE24\uFE62\uFE63\uFE64\uFE67\uFE69\uFE6A\uFE70\uFE75\uFE76\uFEFD\uFF04\uFF05\uFF0B\uFF0C\uFF10\uFF1A\uFF1C\uFF1F\uFF21\uFF3B\uFF3E\uFF3F\uFF40\uFF5B\uFF5C\uFF5D\uFF5E\uFF5F\uFF66\uFFBF\uFFC2\uFFC8\uFFCA\uFFD0\uFFD2\uFFD8\uFFDA\uFFDD\uFFE0\uFFE7\uFFE8\uFFEF\uFFFC\uFFFE"}, }; /************************************************************************* Let U be the set of Unicode character values and let L be the lowercase function, mapping from U to U. To perform case insensitive matching of character sets, we need to be able to map an interval I in U, say I = [chMin, chMax] = { ch : chMin <= ch <= chMax } to a set A such that A contains L(I) and A is contained in the union of I and L(I). The table below partitions U into intervals on which L is non-decreasing. Thus, for any interval J = [a, b] contained in one of these intervals, L(J) is contained in [L(a), L(b)]. It is also true that for any such J, [L(a), L(b)] is contained in the union of J and L(J). This does not follow from L being non-decreasing on these intervals. It follows from the nature of the L on each interval. On each interval, L has one of the following forms: (1) L(ch) = constant (LowercaseSet) (2) L(ch) = ch + offset (LowercaseAdd) (3) L(ch) = ch | 1 (LowercaseBor) (4) L(ch) = ch + (ch & 1) (LowercaseBad) It is easy to verify that for any of these forms [L(a), L(b)] is contained in the union of [a, b] and L([a, b]). ***************************************************************************/ private const int LowercaseSet = 0; // Set to arg. private const int LowercaseAdd = 1; // Add arg. private const int LowercaseBor = 2; // Bitwise or with 1. private const int LowercaseBad = 3; // Bitwise and with 1 and add original. private static readonly LowerCaseMapping[] _lcTable = new LowerCaseMapping[] { new LowerCaseMapping('\u0041', '\u005A', LowercaseAdd, 32), new LowerCaseMapping('\u00C0', '\u00DE', LowercaseAdd, 32), new LowerCaseMapping('\u0100', '\u012E', LowercaseBor, 0), new LowerCaseMapping('\u0130', '\u0130', LowercaseSet, 0x0069), new LowerCaseMapping('\u0132', '\u0136', LowercaseBor, 0), new LowerCaseMapping('\u0139', '\u0147', LowercaseBad, 0), new LowerCaseMapping('\u014A', '\u0176', LowercaseBor, 0), new LowerCaseMapping('\u0178', '\u0178', LowercaseSet, 0x00FF), new LowerCaseMapping('\u0179', '\u017D', LowercaseBad, 0), new LowerCaseMapping('\u0181', '\u0181', LowercaseSet, 0x0253), new LowerCaseMapping('\u0182', '\u0184', LowercaseBor, 0), new LowerCaseMapping('\u0186', '\u0186', LowercaseSet, 0x0254), new LowerCaseMapping('\u0187', '\u0187', LowercaseSet, 0x0188), new LowerCaseMapping('\u0189', '\u018A', LowercaseAdd, 205), new LowerCaseMapping('\u018B', '\u018B', LowercaseSet, 0x018C), new LowerCaseMapping('\u018E', '\u018E', LowercaseSet, 0x01DD), new LowerCaseMapping('\u018F', '\u018F', LowercaseSet, 0x0259), new LowerCaseMapping('\u0190', '\u0190', LowercaseSet, 0x025B), new LowerCaseMapping('\u0191', '\u0191', LowercaseSet, 0x0192), new LowerCaseMapping('\u0193', '\u0193', LowercaseSet, 0x0260), new LowerCaseMapping('\u0194', '\u0194', LowercaseSet, 0x0263), new LowerCaseMapping('\u0196', '\u0196', LowercaseSet, 0x0269), new LowerCaseMapping('\u0197', '\u0197', LowercaseSet, 0x0268), new LowerCaseMapping('\u0198', '\u0198', LowercaseSet, 0x0199), new LowerCaseMapping('\u019C', '\u019C', LowercaseSet, 0x026F), new LowerCaseMapping('\u019D', '\u019D', LowercaseSet, 0x0272), new LowerCaseMapping('\u019F', '\u019F', LowercaseSet, 0x0275), new LowerCaseMapping('\u01A0', '\u01A4', LowercaseBor, 0), new LowerCaseMapping('\u01A7', '\u01A7', LowercaseSet, 0x01A8), new LowerCaseMapping('\u01A9', '\u01A9', LowercaseSet, 0x0283), new LowerCaseMapping('\u01AC', '\u01AC', LowercaseSet, 0x01AD), new LowerCaseMapping('\u01AE', '\u01AE', LowercaseSet, 0x0288), new LowerCaseMapping('\u01AF', '\u01AF', LowercaseSet, 0x01B0), new LowerCaseMapping('\u01B1', '\u01B2', LowercaseAdd, 217), new LowerCaseMapping('\u01B3', '\u01B5', LowercaseBad, 0), new LowerCaseMapping('\u01B7', '\u01B7', LowercaseSet, 0x0292), new LowerCaseMapping('\u01B8', '\u01B8', LowercaseSet, 0x01B9), new LowerCaseMapping('\u01BC', '\u01BC', LowercaseSet, 0x01BD), new LowerCaseMapping('\u01C4', '\u01C5', LowercaseSet, 0x01C6), new LowerCaseMapping('\u01C7', '\u01C8', LowercaseSet, 0x01C9), new LowerCaseMapping('\u01CA', '\u01CB', LowercaseSet, 0x01CC), new LowerCaseMapping('\u01CD', '\u01DB', LowercaseBad, 0), new LowerCaseMapping('\u01DE', '\u01EE', LowercaseBor, 0), new LowerCaseMapping('\u01F1', '\u01F2', LowercaseSet, 0x01F3), new LowerCaseMapping('\u01F4', '\u01F4', LowercaseSet, 0x01F5), new LowerCaseMapping('\u01FA', '\u0216', LowercaseBor, 0), new LowerCaseMapping('\u0386', '\u0386', LowercaseSet, 0x03AC), new LowerCaseMapping('\u0388', '\u038A', LowercaseAdd, 37), new LowerCaseMapping('\u038C', '\u038C', LowercaseSet, 0x03CC), new LowerCaseMapping('\u038E', '\u038F', LowercaseAdd, 63), new LowerCaseMapping('\u0391', '\u03AB', LowercaseAdd, 32), new LowerCaseMapping('\u03E2', '\u03EE', LowercaseBor, 0), new LowerCaseMapping('\u0401', '\u040F', LowercaseAdd, 80), new LowerCaseMapping('\u0410', '\u042F', LowercaseAdd, 32), new LowerCaseMapping('\u0460', '\u0480', LowercaseBor, 0), new LowerCaseMapping('\u0490', '\u04BE', LowercaseBor, 0), new LowerCaseMapping('\u04C1', '\u04C3', LowercaseBad, 0), new LowerCaseMapping('\u04C7', '\u04C7', LowercaseSet, 0x04C8), new LowerCaseMapping('\u04CB', '\u04CB', LowercaseSet, 0x04CC), new LowerCaseMapping('\u04D0', '\u04EA', LowercaseBor, 0), new LowerCaseMapping('\u04EE', '\u04F4', LowercaseBor, 0), new LowerCaseMapping('\u04F8', '\u04F8', LowercaseSet, 0x04F9), new LowerCaseMapping('\u0531', '\u0556', LowercaseAdd, 48), new LowerCaseMapping('\u10A0', '\u10C5', LowercaseAdd, 48), new LowerCaseMapping('\u1E00', '\u1EF8', LowercaseBor, 0), new LowerCaseMapping('\u1F08', '\u1F0F', LowercaseAdd, -8), new LowerCaseMapping('\u1F18', '\u1F1F', LowercaseAdd, -8), new LowerCaseMapping('\u1F28', '\u1F2F', LowercaseAdd, -8), new LowerCaseMapping('\u1F38', '\u1F3F', LowercaseAdd, -8), new LowerCaseMapping('\u1F48', '\u1F4D', LowercaseAdd, -8), new LowerCaseMapping('\u1F59', '\u1F59', LowercaseSet, 0x1F51), new LowerCaseMapping('\u1F5B', '\u1F5B', LowercaseSet, 0x1F53), new LowerCaseMapping('\u1F5D', '\u1F5D', LowercaseSet, 0x1F55), new LowerCaseMapping('\u1F5F', '\u1F5F', LowercaseSet, 0x1F57), new LowerCaseMapping('\u1F68', '\u1F6F', LowercaseAdd, -8), new LowerCaseMapping('\u1F88', '\u1F8F', LowercaseAdd, -8), new LowerCaseMapping('\u1F98', '\u1F9F', LowercaseAdd, -8), new LowerCaseMapping('\u1FA8', '\u1FAF', LowercaseAdd, -8), new LowerCaseMapping('\u1FB8', '\u1FB9', LowercaseAdd, -8), new LowerCaseMapping('\u1FBA', '\u1FBB', LowercaseAdd, -74), new LowerCaseMapping('\u1FBC', '\u1FBC', LowercaseSet, 0x1FB3), new LowerCaseMapping('\u1FC8', '\u1FCB', LowercaseAdd, -86), new LowerCaseMapping('\u1FCC', '\u1FCC', LowercaseSet, 0x1FC3), new LowerCaseMapping('\u1FD8', '\u1FD9', LowercaseAdd, -8), new LowerCaseMapping('\u1FDA', '\u1FDB', LowercaseAdd, -100), new LowerCaseMapping('\u1FE8', '\u1FE9', LowercaseAdd, -8), new LowerCaseMapping('\u1FEA', '\u1FEB', LowercaseAdd, -112), new LowerCaseMapping('\u1FEC', '\u1FEC', LowercaseSet, 0x1FE5), new LowerCaseMapping('\u1FF8', '\u1FF9', LowercaseAdd, -128), new LowerCaseMapping('\u1FFA', '\u1FFB', LowercaseAdd, -126), new LowerCaseMapping('\u1FFC', '\u1FFC', LowercaseSet, 0x1FF3), new LowerCaseMapping('\u2160', '\u216F', LowercaseAdd, 16), new LowerCaseMapping('\u24B6', '\u24D0', LowercaseAdd, 26), new LowerCaseMapping('\uFF21', '\uFF3A', LowercaseAdd, 32), }; static RegexCharClass() { _definedCategories = new Hashtable(31); char[] groups = new char[9]; StringBuilder word = new StringBuilder(11); word.Append(GroupChar); groups[0] = GroupChar; // We need the UnicodeCategory enum values as a char so we can put them in a string // in the hashtable. In order to get there, we first must cast to an int, // then cast to a char // Also need to distinguish between positive and negative values. UnicodeCategory is zero // based, so we add one to each value and subtract it off later // Others groups[1] = (char) ((int) UnicodeCategory.Control + 1); _definedCategories["Cc"] = groups[1].ToString(); // Control groups[2] = (char) ((int) UnicodeCategory.Format + 1); _definedCategories["Cf"] = groups[2].ToString(); // Format groups[3] = (char) ((int) UnicodeCategory.OtherNotAssigned + 1); _definedCategories["Cn"] = groups[3].ToString(); // Not assigned groups[4] = (char) ((int) UnicodeCategory.PrivateUse + 1); _definedCategories["Co"] = groups[4].ToString(); // Private use groups[5] = (char) ((int) UnicodeCategory.Surrogate + 1); _definedCategories["Cs"] = groups[5].ToString(); // Surrogate groups[6] = GroupChar; _definedCategories["C"] = new String(groups, 0, 7); // Letters groups[1] = (char) ((int) UnicodeCategory.LowercaseLetter + 1); _definedCategories["Ll"] = groups[1].ToString(); // Lowercase groups[2] = (char) ((int) UnicodeCategory.ModifierLetter + 1); _definedCategories["Lm"] = groups[2].ToString(); // Modifier groups[3] = (char) ((int) UnicodeCategory.OtherLetter + 1); _definedCategories["Lo"] = groups[3].ToString(); // Other groups[4] = (char) ((int) UnicodeCategory.TitlecaseLetter + 1); _definedCategories["Lt"] = groups[4].ToString(); // Titlecase groups[5] = (char) ((int) UnicodeCategory.UppercaseLetter + 1); _definedCategories["Lu"] = groups[5].ToString(); // Uppercase //groups[6] = GroupChar; _definedCategories["L"] = new String(groups, 0, 7); word.Append(new String(groups, 1, 5)); // Marks groups[1] = (char) ((int) UnicodeCategory.SpacingCombiningMark + 1); _definedCategories["Mc"] = groups[1].ToString(); // Spacing combining groups[2] = (char) ((int) UnicodeCategory.EnclosingMark + 1); _definedCategories["Me"] = groups[2].ToString(); // Enclosing groups[3] = (char) ((int) UnicodeCategory.NonSpacingMark + 1); _definedCategories["Mn"] = groups[3].ToString(); // Non-spacing groups[4] = GroupChar; _definedCategories["M"] = new String(groups, 0, 5); //word.Append(groups[1]); //word.Append(groups[3]); // Numbers groups[1] = (char) ((int) UnicodeCategory.DecimalDigitNumber + 1); _definedCategories["Nd"] = groups[1].ToString(); // Decimal digit groups[2] = (char) ((int) UnicodeCategory.LetterNumber + 1); _definedCategories["Nl"] = groups[2].ToString(); // Letter groups[3] = (char) ((int) UnicodeCategory.OtherNumber + 1); _definedCategories["No"] = groups[3].ToString(); // Other //groups[4] = GroupChar; _definedCategories["N"] = new String(groups, 0, 5); word.Append(groups[1]); //word.Append(new String(groups, 1, 3)); // Punctuation groups[1] = (char) ((int) UnicodeCategory.ConnectorPunctuation + 1); _definedCategories["Pc"] = groups[1].ToString(); // Connector groups[2] = (char) ((int) UnicodeCategory.DashPunctuation + 1); _definedCategories["Pd"] = groups[2].ToString(); // Dash groups[3] = (char) ((int) UnicodeCategory.ClosePunctuation + 1); _definedCategories["Pe"] = groups[3].ToString(); // Close groups[4] = (char) ((int) UnicodeCategory.OtherPunctuation + 1); _definedCategories["Po"] = groups[4].ToString(); // Other groups[5] = (char) ((int) UnicodeCategory.OpenPunctuation + 1); _definedCategories["Ps"] = groups[5].ToString(); // Open groups[6] = (char) ((int) UnicodeCategory.FinalQuotePunctuation + 1); _definedCategories["Pf"] = groups[6].ToString(); // Inital quote groups[7] = (char) ((int) UnicodeCategory.InitialQuotePunctuation + 1); _definedCategories["Pi"] = groups[7].ToString(); // Final quote groups[8] = GroupChar; _definedCategories["P"] = new String(groups, 0, 9); word.Append(groups[1]); // Symbols groups[1] = (char) ((int) UnicodeCategory.CurrencySymbol + 1); _definedCategories["Sc"] = groups[1].ToString(); // Currency groups[2] = (char) ((int) UnicodeCategory.ModifierSymbol + 1); _definedCategories["Sk"] = groups[2].ToString(); // Modifier groups[3] = (char) ((int) UnicodeCategory.MathSymbol + 1); _definedCategories["Sm"] = groups[3].ToString(); // Math groups[4] = (char) ((int) UnicodeCategory.OtherSymbol + 1); _definedCategories["So"] = groups[4].ToString(); // Other groups[5] = GroupChar; _definedCategories["S"] = new String(groups, 0, 6); // Separators groups[1] = (char) ((int) UnicodeCategory.LineSeparator + 1); _definedCategories["Zl"] = groups[1].ToString(); // Line groups[2] = (char) ((int) UnicodeCategory.ParagraphSeparator + 1); _definedCategories["Zp"] = groups[2].ToString(); // Paragraph groups[3] = (char) ((int) UnicodeCategory.SpaceSeparator + 1); _definedCategories["Zs"] = groups[3].ToString(); // Space groups[4] = GroupChar; _definedCategories["Z"] = new String(groups, 0, 5); word.Append(GroupChar); Word = word.ToString(); NotWord = NegateCategory(Word); SpaceClass = "\x00\x00\x01" + Space; NotSpaceClass = "\x01\x00\x01" + Space; WordClass = "\x00\x00" + (char) Word.Length + Word; NotWordClass = "\x01\x00" + (char) Word.Length + Word;; DigitClass = "\x00\x00\x01" + (char) ((int) UnicodeCategory.DecimalDigitNumber + 1); NotDigitClass = "\x00\x00\x01" + unchecked ((char) (- ((int) UnicodeCategory.DecimalDigitNumber + 1)) ); #if DBG // make sure the _propTable is correctly ordered int len = _propTable.GetLength(0); for (int i=0; i0 && cc.RangeCount() > 0 && cc.GetRangeAt(0)._first <= GetRangeAt(RangeCount() - 1)._last) _canonical = false; for (i = 0; i < cc.RangeCount(); i += 1) { _rangelist.Add(cc.GetRangeAt(i)); } _categories.Append(cc._categories.ToString()); } /* * AddSet() * * Adds a set (specified by its string represenation) to the class. */ private void AddSet(String set) { int i; if (_canonical && RangeCount() > 0 && set.Length > 0 && set[0] <= GetRangeAt(RangeCount() - 1)._last) _canonical = false; for (i = 0; i < set.Length - 1; i += 2) { _rangelist.Add(new SingleRange(set[i], (char)(set[i + 1] - 1))); } if (i < set.Length) { _rangelist.Add(new SingleRange(set[i], Lastchar)); } } internal void AddSubtraction(RegexCharClass sub) { Debug.Assert(_subtractor == null, "Can't add two subtractions to a char class. "); _subtractor = sub; } /* * AddRange() * * Adds a single range of characters to the class. */ internal void AddRange(char first, char last) { _rangelist.Add(new SingleRange(first, last)); if (_canonical && _rangelist.Count > 0 && first <= ((SingleRange)_rangelist[_rangelist.Count - 1])._last) { _canonical = false; } } internal void AddCategoryFromName(string categoryName, bool invert, bool caseInsensitive, string pattern) { object cat = _definedCategories[categoryName]; if (cat != null) { string catstr = (string) cat; if (caseInsensitive) { if (categoryName.Equals("Lu") || categoryName.Equals("Lt")) catstr = /*catstr +*/ (string) _definedCategories["Ll"]; } if (invert) catstr = NegateCategory(catstr); // negate the category _categories.Append((string) catstr); } else AddSet(SetFromProperty(categoryName, invert, pattern)); } private void AddCategory(string category) { _categories.Append(category); } /* * AddLowerCase() * * Adds to the class any lowercase versions of characters already * in the class. Used for case-insensitivity. */ internal void AddLowercase(CultureInfo culture) { int i; int origSize; SingleRange range; _canonical = false; for (i = 0, origSize = _rangelist.Count; i < origSize; i++) { range = (SingleRange)_rangelist[i]; if (range._first == range._last) range._first = range._last = Char.ToLower(range._first, culture); else AddLowercaseRange(range._first, range._last, culture); } } /* * AddLowercaseRange() * * For a single range that's in the set, adds any additional ranges * necessary to ensure that lowercase equivalents are also included. */ private void AddLowercaseRange(char chMin, char chMax, CultureInfo culture) { int i, iMax, iMid; char chMinT, chMaxT; LowerCaseMapping lc; for (i = 0, iMax = _lcTable.Length; i < iMax; ) { iMid = (i + iMax) / 2; if (_lcTable[iMid]._chMax < chMin) i = iMid + 1; else iMax = iMid; } if (i >= _lcTable.Length) return; for ( ; i < _lcTable.Length && (lc = _lcTable[i])._chMin <= chMax; i++) { if ((chMinT = lc._chMin) < chMin) chMinT = chMin; if ((chMaxT = lc._chMax) > chMax) chMaxT = chMax; switch (lc._lcOp) { case LowercaseSet: chMinT = (char)lc._data; chMaxT = (char)lc._data; break; case LowercaseAdd: chMinT += (char)lc._data; chMaxT += (char)lc._data; break; case LowercaseBor: chMinT |= (char)1; chMaxT |= (char)1; break; case LowercaseBad: chMinT += (char)(chMinT & 1); chMaxT += (char)(chMaxT & 1); break; } if (chMinT < chMin || chMaxT > chMax) AddRange(chMinT, chMaxT); } } internal void AddWord(bool ecma, bool negate) { if (negate) { if (ecma) AddSet(RegexCharClass.NotECMAWordSet); else AddCategory(RegexCharClass.NotWord); } else { if (ecma) AddSet(RegexCharClass.ECMAWordSet); else AddCategory(RegexCharClass.Word); } } internal void AddSpace(bool ecma, bool negate) { if (negate) { if (ecma) AddSet(RegexCharClass.NotECMASpaceSet); else AddCategory(RegexCharClass.NotSpace); } else { if (ecma) AddSet(RegexCharClass.ECMASpaceSet); else AddCategory(RegexCharClass.Space); } } internal void AddDigit(bool ecma, bool negate, string pattern) { if (ecma) { if (negate) AddSet(RegexCharClass.NotECMADigitSet); else AddSet(RegexCharClass.ECMADigitSet); } else AddCategoryFromName("Nd", negate, false, pattern); } internal static string ConvertOldStringsToClass(string set, string category) { StringBuilder sb = new StringBuilder(set.Length + category.Length + 3); if (set.Length >= 2 && set[0] =='\0' && set[1] == '\0') { sb.Append((char) 0x1); sb.Append((char) (set.Length - 2)); sb.Append((char) category.Length); sb.Append(set.Substring(2)); } else { sb.Append((char) 0x0); sb.Append((char) set.Length); sb.Append((char) category.Length); sb.Append(set); } sb.Append(category); return sb.ToString(); } /* * SingletonChar() * * Returns the char */ internal static char SingletonChar(String set) { Debug.Assert(IsSingleton(set) || IsSingletonInverse(set), "Tried to get the singleton char out of a non singleton character class"); return set[SETSTART]; } internal static bool IsMergeable(string charClass) { return (!IsNegated(charClass) && !IsSubtraction(charClass)); } internal static bool IsEmpty(String charClass) { if (charClass[CATEGORYLENGTH] == 0 && charClass[FLAGS] == 0 && charClass[SETLENGTH] == 0 && !IsSubtraction(charClass)) return true; else return false; } /* * IsSingleton() * * True if the set contains a single character only */ internal static bool IsSingleton(String set) { if (set[FLAGS] == 0 && set[CATEGORYLENGTH] == 0 && set[SETLENGTH] == 2 && !IsSubtraction(set) && (set[SETSTART] == Lastchar || set[SETSTART]+1 == set[SETSTART+1])) return true; else return false; } internal static bool IsSingletonInverse(String set) { if (set[FLAGS] == 1 && set[CATEGORYLENGTH] == 0 && set[SETLENGTH] == 2 && !IsSubtraction(set) && (set[SETSTART] == Lastchar || set[SETSTART]+1 == set[SETSTART+1])) return true; else return false; } private static bool IsSubtraction(string charClass) { return (charClass.Length > SETSTART + charClass[SETLENGTH] + charClass[CATEGORYLENGTH]); } internal static bool IsNegated(string set) { return (set != null && set[FLAGS] == 1); } internal static bool IsECMAWordChar(char ch) { return CharInClass(ch, ECMAWordClass); } internal static bool IsWordChar(char ch) { return CharInClass(ch, WordClass); } internal static bool CharInClass(char ch, String set) { return CharInClassRecursive(ch, set, 0); } internal static bool CharInClassRecursive(char ch, String set, int start) { int mySetLength = set[start+SETLENGTH]; int myCategoryLength = set[start+CATEGORYLENGTH]; int myEndPosition = start + SETSTART + mySetLength + myCategoryLength; bool subtracted = false; if (set.Length > myEndPosition) { subtracted = CharInClassRecursive(ch, set, myEndPosition); } bool b = CharInClassInternal(ch, set, start, mySetLength, myCategoryLength); // Note that we apply the negation *before* performing the subtraction. This is because // the negation only applies to the first char class, not the entire subtraction. if (set[start+FLAGS] == 1) b = !b; return b && !subtracted; } /* * CharInClass() * * Determines a character's membership in a character class (via the * string representation of the class). */ private static bool CharInClassInternal(char ch, string set, int start, int mySetLength, int myCategoryLength) { int min; int max; int mid; min = start + SETSTART; max = min + mySetLength; while (min != max) { mid = (min + max) / 2; if (ch < set[mid]) max = mid; else min = mid + 1; } // The starting position of the set within the character class determines // whether what an odd or even ending position means. If the start is odd, // an *even* ending position means the character was in the set. With recursive // subtractions in the mix, the starting position = start+SETSTART. Since we know that // SETSTART is odd, we can simplify it out of the equation. But if it changes we need to // reverse this check. Debug.Assert((SETSTART & 0x1) == 1, "If SETSTART is not odd, the calculation below this will be reversed"); if ((min & 0x1) == (start & 0x1)) return true; else { if (myCategoryLength == 0) return false; return CharInCategory(ch, set, start, mySetLength, myCategoryLength); } } private static bool CharInCategory(char ch, string set, int start, int mySetLength, int myCategoryLength) { UnicodeCategory chcategory = char.GetUnicodeCategory(ch); int i=start + SETSTART + mySetLength; int end = i + myCategoryLength; while (i 0) { // greater than zero is a positive case if (curcat == SpaceConst) { if (Char.IsWhiteSpace(ch)) return true; else { i++; continue; } } --curcat; if (chcategory == (UnicodeCategory) curcat) return true; } else { // less than zero is a negative case if (curcat == NotSpaceConst) { if (!Char.IsWhiteSpace(ch)) return true; else { i++; continue; } } //curcat = -curcat; //--curcat; curcat = -1 - curcat; if (chcategory != (UnicodeCategory) curcat) return true; } i++; } return false; } /* * CharInCategoryGroup * This is used for categories which are composed of other categories - L, N, Z, W... * These groups need special treatment when they are negated */ private static bool CharInCategoryGroup(char ch, UnicodeCategory chcategory, string category, ref int i) { i++; int curcat = (short) category[i]; if (curcat > 0) { // positive case - the character must be in ANY of the categories in the group bool answer = false; while (curcat != 0) { if (!answer) { --curcat; if (chcategory == (UnicodeCategory) curcat) answer = true; } i++; curcat = (short) category[i]; } return answer; } else { // negative case - the character must be in NONE of the categories in the group bool answer = true; while (curcat != 0) { if (answer) { //curcat = -curcat; //--curcat; curcat = -1 - curcat; if (chcategory == (UnicodeCategory) curcat) answer = false; } i++; curcat = (short) category[i]; } return answer; } } private static string NegateCategory(string category) { if (category == null) return null; StringBuilder sb = new StringBuilder(category.Length); for (int i=0; i myEndPosition) sub = ParseRecursive(charClass, myEndPosition); return new RegexCharClass(charClass[start+FLAGS] == 1, ranges, new StringBuilder(charClass.Substring(end, myCategoryLength)), sub); } /* * RangeCount() * * The number of single ranges that have been accumulated so far. */ private int RangeCount() { return _rangelist.Count; } /* * ToString() * * Constructs the string representation of the class. */ internal String ToStringClass() { if (!_canonical) Canonicalize(); // make a guess about the length of the ranges. We'll update this at the end. // This is important because if the last range ends in LastChar, we won't append // LastChar to the list. int rangeLen = _rangelist.Count * 2 ; StringBuilder sb = new StringBuilder(rangeLen + _categories.Length + 3); int flags; if (_negate) flags = 1; else flags = 0; sb.Append((char) flags); sb.Append((char) rangeLen); sb.Append((char) _categories.Length); for (int i = 0; i < _rangelist.Count; i++) { SingleRange currentRange = (SingleRange)_rangelist[i]; sb.Append(currentRange._first); if (currentRange._last != Lastchar) sb.Append((char)(currentRange._last + 1)); } sb[SETLENGTH] = (char) (sb.Length - SETSTART); sb.Append(_categories); if (_subtractor != null) sb.Append(_subtractor.ToStringClass()); return sb.ToString(); } /* * GetRangeAt(int i) * * The ith range. */ private SingleRange GetRangeAt(int i) { return(SingleRange)_rangelist[i]; } /* * Canonicalize() * * Logic to reduce a character class to a unique, sorted form. */ private void Canonicalize() { SingleRange CurrentRange; int i; int j; char last; bool Done; _canonical = true; _rangelist.Sort(0, _rangelist.Count, new SingleRangeComparer()); // // Find and eliminate overlapping or abutting ranges // if (_rangelist.Count > 1) { Done = false; for (i = 1, j = 0; ; i++) { for (last = ((SingleRange)_rangelist[j])._last; ; i++) { if (i == _rangelist.Count || last == Lastchar) { Done = true; break; } if ((CurrentRange = (SingleRange)_rangelist[i])._first > last + 1) break; if (last < CurrentRange._last) last = CurrentRange._last; } ((SingleRange)_rangelist[j])._last = last; j++; if (Done) break; if (j < i) _rangelist[j] = _rangelist[i]; } _rangelist.RemoveRange(j, _rangelist.Count - j); } } private static String SetFromProperty(String capname, bool invert, string pattern) { int min = 0; int max = _propTable.GetLength(0); while (min != max) { int mid = (min + max) / 2; int res = String.Compare(capname, _propTable[mid,0], StringComparison.Ordinal); if (res < 0) max = mid; else if (res > 0) min = mid + 1; else { String set = _propTable[mid,1]; Debug.Assert(!String.IsNullOrEmpty(set), "Found a null/empty element in RegexCharClass prop table"); if (invert) { if (set[0] == Nullchar) { return set.Substring(1); } return Nullchar + set; } else { return set; } } } throw new ArgumentException(SR.GetString(SR.MakeException, pattern, SR.GetString(SR.UnknownProperty, capname))); } #if DBG /* * SetDescription() * * Produces a human-readable description for a set string. */ internal static String SetDescription(String set) { int mySetLength = set[SETLENGTH]; int myCategoryLength = set[CATEGORYLENGTH]; int myEndPosition = SETSTART + mySetLength + myCategoryLength; StringBuilder desc = new StringBuilder("["); int index = SETSTART; char ch1; char ch2; if (IsNegated(set)) desc.Append('^'); while (index < SETSTART + set[SETLENGTH]) { ch1 = set[index]; if (index + 1 < set.Length) ch2 = (char)(set[index + 1] - 1); else ch2 = Lastchar; desc.Append(CharDescription(ch1)); if (ch2 != ch1) { if (ch1 + 1 != ch2) desc.Append('-'); desc.Append(CharDescription(ch2)); } index += 2; } while (index < SETSTART + set[SETLENGTH] + set[CATEGORYLENGTH]) { ch1 = set[index]; if (ch1 == 0) { bool found = false; int lastindex = set.IndexOf(GroupChar, index+1); string group = set.Substring(index,lastindex-index + 1); IDictionaryEnumerator en = _definedCategories.GetEnumerator(); while(en.MoveNext()) { if (group.Equals(en.Value)) { if ((short) set[index+1] > 0) desc.Append("\\p{" + en.Key + "}"); else desc.Append("\\P{" + en.Key + "}"); found = true; break; } } if (!found) { if (group.Equals(Word)) desc.Append("\\w"); else if (group.Equals(NotWord)) desc.Append("\\W"); else Debug.Assert(false, "Couldn't find a goup to match '" + group + "'"); } index = lastindex; } else { desc.Append(CategoryDescription(ch1)); } index++; } if (set.Length > myEndPosition) { desc.Append('-'); desc.Append(SetDescription(set.Substring(myEndPosition))); } desc.Append(']'); return desc.ToString(); } internal static readonly char [] Hex = new char [] {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; internal static readonly string[] Categories = new string[] {"Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Mc", "Me", "Nd", "Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", "Cs", "Co", "Pc", "Pd", "Ps", "Pe", "Pi", "Pf", "Po", "Sm", "Sc", "Sk", "So", "Cn" }; /* * CharDescription() * * Produces a human-readable description for a single character. */ internal static String CharDescription(char ch) { StringBuilder sb = new StringBuilder(); int shift; if (ch == '\\') return "\\\\"; if (ch >= ' ' && ch <= '~') { sb.Append(ch); return sb.ToString(); } if (ch < 256) { sb.Append("\\x"); shift = 8; } else { sb.Append("\\u"); shift = 16; } while (shift > 0) { shift -= 4; sb.Append(Hex[(ch >> shift) & 0xF]); } return sb.ToString(); } private static String CategoryDescription(char ch) { if (ch == SpaceConst) return "\\s"; else if ((short) ch == NotSpaceConst) return "\\S"; else if ((short) ch < 0) { return "\\P{" + Categories[(- ((short)ch) - 1)] + "}"; } else { return "\\p{" + Categories[(ch - 1)] + "}"; } } #endif // Lower case mapping descriptor. private struct LowerCaseMapping { internal LowerCaseMapping(char chMin, char chMax, int lcOp, int data) { _chMin = chMin; _chMax = chMax; _lcOp = lcOp; _data = data; } internal char _chMin; internal char _chMax; internal int _lcOp; internal int _data; } /* * SingleRangeComparer * * For sorting ranges; compare based on the first char in the range. */ private sealed class SingleRangeComparer : IComparer { public int Compare(Object x, Object y) { return(((SingleRange)x)._first < ((SingleRange)y)._first ? -1 : (((SingleRange)x)._first > ((SingleRange)y)._first ? 1 : 0)); } } /* * SingleRange * * A first/last pair representing a single range of characters. */ private sealed class SingleRange { internal SingleRange(char first, char last) { _first = first; _last = last; } internal char _first; internal char _last; } } }
Link Menu
This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- RawStylusActions.cs
- ResolveMatches11.cs
- Misc.cs
- DataReceivedEventArgs.cs
- SafeNativeMethodsOther.cs
- ZoneIdentityPermission.cs
- HtmlLiteralTextAdapter.cs
- Deflater.cs
- WindowsFont.cs
- GridViewSortEventArgs.cs
- Events.cs
- ComponentEditorPage.cs
- ByteArrayHelperWithString.cs
- BezierSegment.cs
- TextFormatterImp.cs
- InsufficientMemoryException.cs
- HttpListenerContext.cs
- HttpWebResponse.cs
- DynamicRouteExpression.cs
- MexBindingElement.cs
- IncrementalHitTester.cs
- XmlReaderSettings.cs
- namescope.cs
- RotateTransform3D.cs
- XmlTextAttribute.cs
- CodeAttributeDeclaration.cs
- ListViewGroup.cs
- NonPrimarySelectionGlyph.cs
- Thread.cs
- DbParameterCollectionHelper.cs
- ValidationResult.cs
- CategoryGridEntry.cs
- DbConnectionClosed.cs
- SamlAssertionKeyIdentifierClause.cs
- InputReportEventArgs.cs
- webbrowsersite.cs
- SecUtil.cs
- StorageAssociationTypeMapping.cs
- ResourceDisplayNameAttribute.cs
- BufferedWebEventProvider.cs
- ExceptionHandler.cs
- DataListItemCollection.cs
- RijndaelManaged.cs
- MLangCodePageEncoding.cs
- GridViewUpdatedEventArgs.cs
- PrintDialog.cs
- BufferModesCollection.cs
- ToggleProviderWrapper.cs
- CryptoStream.cs
- ImmutableClientRuntime.cs
- JavaScriptString.cs
- CategoryNameCollection.cs
- ValueTypeFixupInfo.cs
- DayRenderEvent.cs
- IList.cs
- DocumentsTrace.cs
- HtmlImageAdapter.cs
- CodeGen.cs
- CallSiteHelpers.cs
- HyperlinkAutomationPeer.cs
- GlobalItem.cs
- Directory.cs
- SessionStateUtil.cs
- CalendarDay.cs
- ContainerParaClient.cs
- ShutDownListener.cs
- KeyboardDevice.cs
- Privilege.cs
- SimpleHandlerFactory.cs
- PassportAuthentication.cs
- IsolatedStorageException.cs
- SqlRemoveConstantOrderBy.cs
- SignalGate.cs
- UnhandledExceptionEventArgs.cs
- MenuCommands.cs
- DataIdProcessor.cs
- DataRelationPropertyDescriptor.cs
- DesignerHelpers.cs
- As.cs
- AdornedElementPlaceholder.cs
- ControlPropertyNameConverter.cs
- BindingSource.cs
- PolyQuadraticBezierSegment.cs
- UrlAuthFailedErrorFormatter.cs
- RemoveStoryboard.cs
- InfoCardRSAPKCS1SignatureDeformatter.cs
- GeneralTransform.cs
- SqlDataSource.cs
- DesigntimeLicenseContext.cs
- List.cs
- arclist.cs
- StylusLogic.cs
- ComPlusSynchronizationContext.cs
- ResourcePermissionBaseEntry.cs
- DefaultWorkflowLoaderService.cs
- PropertyConverter.cs
- NewItemsContextMenuStrip.cs
- ChangeTracker.cs
- PrimitiveCodeDomSerializer.cs
- RepeatEnumerable.cs