Code:
/ 4.0 / 4.0 / DEVDIV_TFS / Dev10 / Releases / RTMRel / ndp / fx / src / Services / IO / System / IO / PatternMatcher.cs / 1305376 / PatternMatcher.cs
//------------------------------------------------------------------------------ //// Copyright (c) Microsoft Corporation. All rights reserved. // //----------------------------------------------------------------------------- /* */ namespace System.IO { using System.Text; using System.Diagnostics; using System; using Microsoft.Win32; using System.Globalization; internal static class PatternMatcher { ////// Private constants (directly from C header files) /// private const int MATCHES_ARRAY_SIZE = 16; private const char ANSI_DOS_STAR = '>'; private const char ANSI_DOS_QM = '<'; private const char DOS_DOT = '"'; ////// Tells whether a given name matches the expression given with a strict (i.e. UNIX like) /// semantics. This code is a port of unmanaged code. Original code comment follows: /// /// Routine Description: /// /// This routine compares a Dbcs name and an expression and tells the caller /// if the name is in the language defined by the expression. The input name /// cannot contain wildcards, while the expression may contain wildcards. /// /// Expression wild cards are evaluated as shown in the nondeterministic /// finite automatons below. Note that ~* and ~? are DOS_STAR and DOS_QM. /// /// /// ~* is DOS_STAR, ~? is DOS_QM, and ~. is DOS_DOT /// /// /// S /// <-----< /// X | | e Y /// X * Y == (0)----->-(1)->-----(2)-----(3) /// /// /// S-. /// <-----< /// X | | e Y /// X ~* Y == (0)----->-(1)->-----(2)-----(3) /// /// /// /// X S S Y /// X ?? Y == (0)---(1)---(2)---(3)---(4) /// /// /// /// X . . Y /// X ~.~. Y == (0)---(1)----(2)------(3)---(4) /// | |________| /// | ^ | /// |_______________| /// ^EOF or .^ /// /// /// X S-. S-. Y /// X ~?~? Y == (0)---(1)-----(2)-----(3)---(4) /// | |________| /// | ^ | /// |_______________| /// ^EOF or .^ /// /// /// /// where S is any single character /// /// S-. is any single character except the final . /// /// e is a null character transition /// /// EOF is the end of the name string /// /// In words: /// /// * matches 0 or more characters. /// /// ? matches exactly 1 character. /// /// DOS_STAR matches 0 or more characters until encountering and matching /// the final . in the name. /// /// DOS_QM matches any single character, or upon encountering a period or /// end of name string, advances the expression to the end of the /// set of contiguous DOS_QMs. /// /// DOS_DOT matches either a . or zero characters beyond name string. /// /// Arguments: /// /// Expression - Supplies the input expression to check against /// /// Name - Supplies the input name to check for. /// /// Return Value: /// /// BOOLEAN - TRUE if Name is an element in the set of strings denoted /// by the input Expression and FALSE otherwise. /// /// public static bool StrictMatchPattern(string expression, string name) { int nameOffset; int exprOffset; int length; int srcCount; int destCount; int previousDestCount; int matchesCount; char nameChar = '\0'; char exprChar = '\0'; int[] previousMatches = new int[MATCHES_ARRAY_SIZE]; int[] currentMatches = new int[MATCHES_ARRAY_SIZE]; int maxState; int currentState; bool nameFinished = false; // // The idea behind the algorithm is pretty simple. We keep track of // all possible locations in the regular expression that are matching // the name. If when the name has been exhausted one of the locations // in the expression is also just exhausted, the name is in the language // defined by the regular expression. // if (name == null || name.Length == 0 || expression == null || expression.Length == 0) { return false; } // // Special case by far the most common wild card search of * or *.* // if (expression.Equals("*") || expression.Equals("*.*")) { return true; } // If this class is ever exposed for generic use, // we need to make sure that name doesn't contain wildcards. Currently // the only component that calls this method is FileSystemWatcher and // it will never pass a name that contains a wildcard. // // Also special case expressions of the form *X. With this and the prior // case we have covered virtually all normal queries. // if (expression[0] == '*' && expression.IndexOf('*', 1) == -1) { int rightLength = expression.Length - 1; // if name is shorter that the stuff to the right of * in expression, we don't // need to do the string compare, otherwise we compare rightlength characters // and the end of both strings. if (name.Length >= rightLength && String.Compare(expression, 1, name, name.Length - rightLength, rightLength, StringComparison.OrdinalIgnoreCase) == 0) { return true; } } // // Walk through the name string, picking off characters. We go one // character beyond the end because some wild cards are able to match // zero characters beyond the end of the string. // // With each new name character we determine a new set of states that // match the name so far. We use two arrays that we swap back and forth // for this purpose. One array lists the possible expression states for // all name characters up to but not including the current one, and other // array is used to build up the list of states considering the current // name character as well. The arrays are then switched and the process // repeated. // // There is not a one-to-one correspondence between state number and // offset into the expression. This is evident from the NFAs in the // initial comment to this function. State numbering is not continuous. // This allows a simple conversion between state number and expression // offset. Each character in the expression can represent one or two // states. * and DOS_STAR generate two states: ExprOffset*2 and // ExprOffset*2 + 1. All other expreesion characters can produce only // a single state. Thus ExprOffset = State/2. // // // Here is a short description of the variables involved: // // NameOffset - The offset of the current name char being processed. // // ExprOffset - The offset of the current expression char being processed. // // SrcCount - Prior match being investigated with current name char // // DestCount - Next location to put a matching assuming current name char // // NameFinished - Allows one more itteration through the Matches array // after the name is exhusted (to come *s for example) // // PreviousDestCount - This is used to prevent entry duplication, see coment // // PreviousMatches - Holds the previous set of matches (the Src array) // // CurrentMatches - Holds the current set of matches (the Dest array) // // AuxBuffer, LocalBuffer - the storage for the Matches arrays // // // Set up the initial variables // previousMatches[0] = 0; matchesCount = 1; nameOffset = 0; maxState = expression.Length * 2; while (! nameFinished) { if (nameOffset < name.Length) { nameChar = name[nameOffset]; length = 1; nameOffset++; } else { nameFinished = true; // // if we have already exhasted the expression, C#. Don't // continue. // if (previousMatches[matchesCount - 1] == maxState) { break; } } // // Now, for each of the previous stored expression matches, see what // we can do with this name character. // srcCount = 0; destCount = 0; previousDestCount = 0; while (srcCount < matchesCount) { // // We have to carry on our expression analysis as far as possible // for each character of name, so we loop here until the // expression stops matching. A clue here is that expression // cases that can match zero or more characters end with a // continue, while those that can accept only a single character // end with a break. // exprOffset = ((previousMatches[srcCount++] + 1) / 2); length = 0; while (true) { if ( exprOffset == expression.Length ) { break; } // // The first time through the loop we don't want // to increment ExprOffset. // exprOffset += length; currentState = exprOffset * 2; if (exprOffset == expression.Length) { currentMatches[destCount++] = maxState; break; } exprChar = expression[exprOffset]; length = 1; // // Before we get started, we have to check for something // really gross. We may be about to exhaust the local // space for ExpressionMatches[][], so we have to allocate // some pool if this is the case. Yuk! // if (destCount >= MATCHES_ARRAY_SIZE - 2) { int newSize = currentMatches.Length * 2; int [] tmp = new int[newSize]; Array.Copy(currentMatches, tmp, currentMatches.Length); currentMatches = tmp; tmp = new int[newSize]; Array.Copy(previousMatches, tmp, previousMatches.Length); previousMatches = tmp; } // // * matches any character zero or more times. // if (exprChar == '*') { currentMatches[destCount++] = currentState; currentMatches[destCount++] = (currentState + 1); continue; } // // DOS_STAR matches any character except . zero or more times. // if (exprChar == ANSI_DOS_STAR) { bool iCanEatADot = false; // // If we are at a period, determine if we are allowed to // consume it, ie. make sure it is not the last one. // if (!nameFinished && (nameChar == '.') ) { char tmpChar; int offset; int nameLength = name.Length; for (offset = nameOffset; offset < nameLength; offset ++ ) { tmpChar = name[offset]; length = 1; if (tmpChar == '.') { iCanEatADot = true; break; } } } if (nameFinished || (nameChar != '.') || iCanEatADot) { currentMatches[destCount++] = currentState; currentMatches[destCount++] = (currentState + 1); continue; } else { // // We are at a period. We can only match zero // characters (ie. the epsilon transition). // currentMatches[destCount++] = (currentState + 1); continue; } } // // The following expreesion characters all match by consuming // a character, thus force the expression, and thus state // forward. // currentState += length * 2; // // DOS_QM is the most complicated. If the name is finished, // we can match zero characters. If this name is a '.', we // don't match, but look at the next expression. Otherwise // we match a single character. // if (exprChar == ANSI_DOS_QM) { if (nameFinished || (nameChar == '.') ) { continue; } currentMatches[destCount++] = currentState; break; } // // A DOS_DOT can match either a period, or zero characters // beyond the end of name. // if (exprChar == DOS_DOT) { if (nameFinished) { continue; } if (nameChar == '.') { currentMatches[destCount++] = currentState; break; } } // // From this point on a name character is required to even // continue, let alone make a match. // if (nameFinished) { break; } // // If this expression was a '?' we can match it once. // if (exprChar == '?') { currentMatches[destCount++] = currentState; break; } // // Finally, check if the expression char matches the name char // if (exprChar == nameChar) { currentMatches[destCount++] = currentState; break; } // // The expression didn't match so go look at the next // previous match. // break; } // // Prevent duplication in the destination array. // // Each of the arrays is montonically increasing and non- // duplicating, thus we skip over any source element in the src // array if we just added the same element to the destination // array. This guarentees non-duplication in the dest. array. // if ((srcCount < matchesCount) && (previousDestCount < destCount) ) { while (previousDestCount < destCount) { int previousLength = previousMatches.Length; while ((srcCount < previousLength) && (previousMatches[srcCount] < currentMatches[previousDestCount])) { srcCount += 1; } previousDestCount += 1; } } } // // If we found no matches in the just finished itteration, it's time // to bail. // if ( destCount == 0 ) { return false; } // // Swap the meaning the two arrays // { int[] tmp; tmp = previousMatches; previousMatches = currentMatches; currentMatches = tmp; } matchesCount = destCount; } currentState = previousMatches[matchesCount - 1]; return currentState == maxState; } } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007. //------------------------------------------------------------------------------ //// Copyright (c) Microsoft Corporation. All rights reserved. // //----------------------------------------------------------------------------- /* */ namespace System.IO { using System.Text; using System.Diagnostics; using System; using Microsoft.Win32; using System.Globalization; internal static class PatternMatcher { ////// Private constants (directly from C header files) /// private const int MATCHES_ARRAY_SIZE = 16; private const char ANSI_DOS_STAR = '>'; private const char ANSI_DOS_QM = '<'; private const char DOS_DOT = '"'; ////// Tells whether a given name matches the expression given with a strict (i.e. UNIX like) /// semantics. This code is a port of unmanaged code. Original code comment follows: /// /// Routine Description: /// /// This routine compares a Dbcs name and an expression and tells the caller /// if the name is in the language defined by the expression. The input name /// cannot contain wildcards, while the expression may contain wildcards. /// /// Expression wild cards are evaluated as shown in the nondeterministic /// finite automatons below. Note that ~* and ~? are DOS_STAR and DOS_QM. /// /// /// ~* is DOS_STAR, ~? is DOS_QM, and ~. is DOS_DOT /// /// /// S /// <-----< /// X | | e Y /// X * Y == (0)----->-(1)->-----(2)-----(3) /// /// /// S-. /// <-----< /// X | | e Y /// X ~* Y == (0)----->-(1)->-----(2)-----(3) /// /// /// /// X S S Y /// X ?? Y == (0)---(1)---(2)---(3)---(4) /// /// /// /// X . . Y /// X ~.~. Y == (0)---(1)----(2)------(3)---(4) /// | |________| /// | ^ | /// |_______________| /// ^EOF or .^ /// /// /// X S-. S-. Y /// X ~?~? Y == (0)---(1)-----(2)-----(3)---(4) /// | |________| /// | ^ | /// |_______________| /// ^EOF or .^ /// /// /// /// where S is any single character /// /// S-. is any single character except the final . /// /// e is a null character transition /// /// EOF is the end of the name string /// /// In words: /// /// * matches 0 or more characters. /// /// ? matches exactly 1 character. /// /// DOS_STAR matches 0 or more characters until encountering and matching /// the final . in the name. /// /// DOS_QM matches any single character, or upon encountering a period or /// end of name string, advances the expression to the end of the /// set of contiguous DOS_QMs. /// /// DOS_DOT matches either a . or zero characters beyond name string. /// /// Arguments: /// /// Expression - Supplies the input expression to check against /// /// Name - Supplies the input name to check for. /// /// Return Value: /// /// BOOLEAN - TRUE if Name is an element in the set of strings denoted /// by the input Expression and FALSE otherwise. /// /// public static bool StrictMatchPattern(string expression, string name) { int nameOffset; int exprOffset; int length; int srcCount; int destCount; int previousDestCount; int matchesCount; char nameChar = '\0'; char exprChar = '\0'; int[] previousMatches = new int[MATCHES_ARRAY_SIZE]; int[] currentMatches = new int[MATCHES_ARRAY_SIZE]; int maxState; int currentState; bool nameFinished = false; // // The idea behind the algorithm is pretty simple. We keep track of // all possible locations in the regular expression that are matching // the name. If when the name has been exhausted one of the locations // in the expression is also just exhausted, the name is in the language // defined by the regular expression. // if (name == null || name.Length == 0 || expression == null || expression.Length == 0) { return false; } // // Special case by far the most common wild card search of * or *.* // if (expression.Equals("*") || expression.Equals("*.*")) { return true; } // If this class is ever exposed for generic use, // we need to make sure that name doesn't contain wildcards. Currently // the only component that calls this method is FileSystemWatcher and // it will never pass a name that contains a wildcard. // // Also special case expressions of the form *X. With this and the prior // case we have covered virtually all normal queries. // if (expression[0] == '*' && expression.IndexOf('*', 1) == -1) { int rightLength = expression.Length - 1; // if name is shorter that the stuff to the right of * in expression, we don't // need to do the string compare, otherwise we compare rightlength characters // and the end of both strings. if (name.Length >= rightLength && String.Compare(expression, 1, name, name.Length - rightLength, rightLength, StringComparison.OrdinalIgnoreCase) == 0) { return true; } } // // Walk through the name string, picking off characters. We go one // character beyond the end because some wild cards are able to match // zero characters beyond the end of the string. // // With each new name character we determine a new set of states that // match the name so far. We use two arrays that we swap back and forth // for this purpose. One array lists the possible expression states for // all name characters up to but not including the current one, and other // array is used to build up the list of states considering the current // name character as well. The arrays are then switched and the process // repeated. // // There is not a one-to-one correspondence between state number and // offset into the expression. This is evident from the NFAs in the // initial comment to this function. State numbering is not continuous. // This allows a simple conversion between state number and expression // offset. Each character in the expression can represent one or two // states. * and DOS_STAR generate two states: ExprOffset*2 and // ExprOffset*2 + 1. All other expreesion characters can produce only // a single state. Thus ExprOffset = State/2. // // // Here is a short description of the variables involved: // // NameOffset - The offset of the current name char being processed. // // ExprOffset - The offset of the current expression char being processed. // // SrcCount - Prior match being investigated with current name char // // DestCount - Next location to put a matching assuming current name char // // NameFinished - Allows one more itteration through the Matches array // after the name is exhusted (to come *s for example) // // PreviousDestCount - This is used to prevent entry duplication, see coment // // PreviousMatches - Holds the previous set of matches (the Src array) // // CurrentMatches - Holds the current set of matches (the Dest array) // // AuxBuffer, LocalBuffer - the storage for the Matches arrays // // // Set up the initial variables // previousMatches[0] = 0; matchesCount = 1; nameOffset = 0; maxState = expression.Length * 2; while (! nameFinished) { if (nameOffset < name.Length) { nameChar = name[nameOffset]; length = 1; nameOffset++; } else { nameFinished = true; // // if we have already exhasted the expression, C#. Don't // continue. // if (previousMatches[matchesCount - 1] == maxState) { break; } } // // Now, for each of the previous stored expression matches, see what // we can do with this name character. // srcCount = 0; destCount = 0; previousDestCount = 0; while (srcCount < matchesCount) { // // We have to carry on our expression analysis as far as possible // for each character of name, so we loop here until the // expression stops matching. A clue here is that expression // cases that can match zero or more characters end with a // continue, while those that can accept only a single character // end with a break. // exprOffset = ((previousMatches[srcCount++] + 1) / 2); length = 0; while (true) { if ( exprOffset == expression.Length ) { break; } // // The first time through the loop we don't want // to increment ExprOffset. // exprOffset += length; currentState = exprOffset * 2; if (exprOffset == expression.Length) { currentMatches[destCount++] = maxState; break; } exprChar = expression[exprOffset]; length = 1; // // Before we get started, we have to check for something // really gross. We may be about to exhaust the local // space for ExpressionMatches[][], so we have to allocate // some pool if this is the case. Yuk! // if (destCount >= MATCHES_ARRAY_SIZE - 2) { int newSize = currentMatches.Length * 2; int [] tmp = new int[newSize]; Array.Copy(currentMatches, tmp, currentMatches.Length); currentMatches = tmp; tmp = new int[newSize]; Array.Copy(previousMatches, tmp, previousMatches.Length); previousMatches = tmp; } // // * matches any character zero or more times. // if (exprChar == '*') { currentMatches[destCount++] = currentState; currentMatches[destCount++] = (currentState + 1); continue; } // // DOS_STAR matches any character except . zero or more times. // if (exprChar == ANSI_DOS_STAR) { bool iCanEatADot = false; // // If we are at a period, determine if we are allowed to // consume it, ie. make sure it is not the last one. // if (!nameFinished && (nameChar == '.') ) { char tmpChar; int offset; int nameLength = name.Length; for (offset = nameOffset; offset < nameLength; offset ++ ) { tmpChar = name[offset]; length = 1; if (tmpChar == '.') { iCanEatADot = true; break; } } } if (nameFinished || (nameChar != '.') || iCanEatADot) { currentMatches[destCount++] = currentState; currentMatches[destCount++] = (currentState + 1); continue; } else { // // We are at a period. We can only match zero // characters (ie. the epsilon transition). // currentMatches[destCount++] = (currentState + 1); continue; } } // // The following expreesion characters all match by consuming // a character, thus force the expression, and thus state // forward. // currentState += length * 2; // // DOS_QM is the most complicated. If the name is finished, // we can match zero characters. If this name is a '.', we // don't match, but look at the next expression. Otherwise // we match a single character. // if (exprChar == ANSI_DOS_QM) { if (nameFinished || (nameChar == '.') ) { continue; } currentMatches[destCount++] = currentState; break; } // // A DOS_DOT can match either a period, or zero characters // beyond the end of name. // if (exprChar == DOS_DOT) { if (nameFinished) { continue; } if (nameChar == '.') { currentMatches[destCount++] = currentState; break; } } // // From this point on a name character is required to even // continue, let alone make a match. // if (nameFinished) { break; } // // If this expression was a '?' we can match it once. // if (exprChar == '?') { currentMatches[destCount++] = currentState; break; } // // Finally, check if the expression char matches the name char // if (exprChar == nameChar) { currentMatches[destCount++] = currentState; break; } // // The expression didn't match so go look at the next // previous match. // break; } // // Prevent duplication in the destination array. // // Each of the arrays is montonically increasing and non- // duplicating, thus we skip over any source element in the src // array if we just added the same element to the destination // array. This guarentees non-duplication in the dest. array. // if ((srcCount < matchesCount) && (previousDestCount < destCount) ) { while (previousDestCount < destCount) { int previousLength = previousMatches.Length; while ((srcCount < previousLength) && (previousMatches[srcCount] < currentMatches[previousDestCount])) { srcCount += 1; } previousDestCount += 1; } } } // // If we found no matches in the just finished itteration, it's time // to bail. // if ( destCount == 0 ) { return false; } // // Swap the meaning the two arrays // { int[] tmp; tmp = previousMatches; previousMatches = currentMatches; currentMatches = tmp; } matchesCount = destCount; } currentState = previousMatches[matchesCount - 1]; return currentState == maxState; } } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007.
Link Menu
This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- BulletedListEventArgs.cs
- WpfXamlMember.cs
- DataGridViewColumnStateChangedEventArgs.cs
- VisualBasicHelper.cs
- FixUpCollection.cs
- ReliableSessionElement.cs
- SHA384.cs
- WebHttpBindingCollectionElement.cs
- PrintControllerWithStatusDialog.cs
- Vector3DCollectionValueSerializer.cs
- CheckBox.cs
- sortedlist.cs
- SetterBase.cs
- PeerPresenceInfo.cs
- SplitContainer.cs
- LongValidator.cs
- PeerTransportListenAddressValidator.cs
- DataGridBeginningEditEventArgs.cs
- TypeElement.cs
- FilterFactory.cs
- Popup.cs
- QuotaExceededException.cs
- NetworkInformationException.cs
- SqlErrorCollection.cs
- OleDbTransaction.cs
- AuthenticationManager.cs
- RoleManagerSection.cs
- RangeBaseAutomationPeer.cs
- SimpleMailWebEventProvider.cs
- InstanceDataCollection.cs
- ThicknessAnimationBase.cs
- FutureFactory.cs
- XamlStyleSerializer.cs
- TextParagraph.cs
- FormsIdentity.cs
- MailAddressCollection.cs
- SQLCharsStorage.cs
- WebZone.cs
- MemberRelationshipService.cs
- SecureStringHasher.cs
- wgx_render.cs
- ArrangedElementCollection.cs
- ValidatorCompatibilityHelper.cs
- StorageMappingItemLoader.cs
- Exception.cs
- ResXBuildProvider.cs
- LeaseManager.cs
- SimpleBitVector32.cs
- EntityContainerEntitySet.cs
- DebuggerService.cs
- HtmlValidatorAdapter.cs
- TemplateBindingExtension.cs
- EntityViewGenerator.cs
- BorderGapMaskConverter.cs
- UpDownBase.cs
- SymbolEqualComparer.cs
- GuidConverter.cs
- ZipIOLocalFileBlock.cs
- Label.cs
- NameTable.cs
- Track.cs
- SafeSystemMetrics.cs
- SelectedCellsCollection.cs
- QilList.cs
- ReferenceEqualityComparer.cs
- BindingList.cs
- UserPreferenceChangingEventArgs.cs
- EventRoute.cs
- DockPattern.cs
- SyncOperationState.cs
- ClipboardProcessor.cs
- BinaryWriter.cs
- BrowserCapabilitiesCodeGenerator.cs
- DataListCommandEventArgs.cs
- x509utils.cs
- RestClientProxyHandler.cs
- ToolTipAutomationPeer.cs
- OpenFileDialog.cs
- Substitution.cs
- GenerateScriptTypeAttribute.cs
- PageBuildProvider.cs
- SamlAction.cs
- TypeConverterHelper.cs
- HttpApplicationFactory.cs
- XmlC14NWriter.cs
- TempFiles.cs
- ProcessManager.cs
- EntitySqlQueryState.cs
- AnnotationHighlightLayer.cs
- ChildDocumentBlock.cs
- ContentTextAutomationPeer.cs
- DockingAttribute.cs
- DataControlPagerLinkButton.cs
- CodeAttributeArgument.cs
- SHA1Managed.cs
- WebPartHeaderCloseVerb.cs
- ComAdminInterfaces.cs
- DisplayNameAttribute.cs
- GridItemCollection.cs
- CredentialCache.cs