Code:
/ FX-1434 / FX-1434 / 1.0 / untmp / whidbey / REDBITS / ndp / fx / src / Services / IO / System / IO / PatternMatcher.cs / 1 / PatternMatcher.cs
//------------------------------------------------------------------------------ //// Copyright (c) Microsoft Corporation. All rights reserved. // //----------------------------------------------------------------------------- /* */ namespace System.IO { using System.Text; using System.Diagnostics; using System; using Microsoft.Win32; using System.Globalization; internal static class PatternMatcher { ////// Private constants (directly from C header files) /// private const int MATCHES_ARRAY_SIZE = 16; private const char ANSI_DOS_STAR = '>'; private const char ANSI_DOS_QM = '<'; private const char DOS_DOT = '"'; ////// Tells whether a given name matches the expression given with a strict (i.e. UNIX like) /// semantics. This code is a port of unmanaged code. Original code comment follows: /// /// Routine Description: /// /// This routine compares a Dbcs name and an expression and tells the caller /// if the name is in the language defined by the expression. The input name /// cannot contain wildcards, while the expression may contain wildcards. /// /// Expression wild cards are evaluated as shown in the nondeterministic /// finite automatons below. Note that ~* and ~? are DOS_STAR and DOS_QM. /// /// /// ~* is DOS_STAR, ~? is DOS_QM, and ~. is DOS_DOT /// /// /// S /// <-----< /// X | | e Y /// X * Y == (0)----->-(1)->-----(2)-----(3) /// /// /// S-. /// <-----< /// X | | e Y /// X ~* Y == (0)----->-(1)->-----(2)-----(3) /// /// /// /// X S S Y /// X ?? Y == (0)---(1)---(2)---(3)---(4) /// /// /// /// X . . Y /// X ~.~. Y == (0)---(1)----(2)------(3)---(4) /// | |________| /// | ^ | /// |_______________| /// ^EOF or .^ /// /// /// X S-. S-. Y /// X ~?~? Y == (0)---(1)-----(2)-----(3)---(4) /// | |________| /// | ^ | /// |_______________| /// ^EOF or .^ /// /// /// /// where S is any single character /// /// S-. is any single character except the final . /// /// e is a null character transition /// /// EOF is the end of the name string /// /// In words: /// /// * matches 0 or more characters. /// /// ? matches exactly 1 character. /// /// DOS_STAR matches 0 or more characters until encountering and matching /// the final . in the name. /// /// DOS_QM matches any single character, or upon encountering a period or /// end of name string, advances the expression to the end of the /// set of contiguous DOS_QMs. /// /// DOS_DOT matches either a . or zero characters beyond name string. /// /// Arguments: /// /// Expression - Supplies the input expression to check against /// /// Name - Supplies the input name to check for. /// /// Return Value: /// /// BOOLEAN - TRUE if Name is an element in the set of strings denoted /// by the input Expression and FALSE otherwise. /// /// public static bool StrictMatchPattern(string expression, string name) { int nameOffset; int exprOffset; int length; int srcCount; int destCount; int previousDestCount; int matchesCount; char nameChar = '\0'; char exprChar = '\0'; int[] previousMatches = new int[MATCHES_ARRAY_SIZE]; int[] currentMatches = new int[MATCHES_ARRAY_SIZE]; int maxState; int currentState; bool nameFinished = false; // // The idea behind the algorithm is pretty simple. We keep track of // all possible locations in the regular expression that are matching // the name. If when the name has been exhausted one of the locations // in the expression is also just exhausted, the name is in the language // defined by the regular expression. // if (name == null || name.Length == 0 || expression == null || expression.Length == 0) { return false; } // // Special case by far the most common wild card search of * or *.* // if (expression.Equals("*") || expression.Equals("*.*")) { return true; } // If this class is ever exposed for generic use, // we need to make sure that name doesn't contain wildcards. Currently // the only component that calls this method is FileSystemWatcher and // it will never pass a name that contains a wildcard. // // Also special case expressions of the form *X. With this and the prior // case we have covered virtually all normal queries. // if (expression[0] == '*' && expression.IndexOf('*', 1) == -1) { int rightLength = expression.Length - 1; // if name is shorter that the stuff to the right of * in expression, we don't // need to do the string compare, otherwise we compare rightlength characters // and the end of both strings. if (name.Length >= rightLength && String.Compare(expression, 1, name, name.Length - rightLength, rightLength, StringComparison.OrdinalIgnoreCase) == 0) { return true; } } // // Walk through the name string, picking off characters. We go one // character beyond the end because some wild cards are able to match // zero characters beyond the end of the string. // // With each new name character we determine a new set of states that // match the name so far. We use two arrays that we swap back and forth // for this purpose. One array lists the possible expression states for // all name characters up to but not including the current one, and other // array is used to build up the list of states considering the current // name character as well. The arrays are then switched and the process // repeated. // // There is not a one-to-one correspondence between state number and // offset into the expression. This is evident from the NFAs in the // initial comment to this function. State numbering is not continuous. // This allows a simple conversion between state number and expression // offset. Each character in the expression can represent one or two // states. * and DOS_STAR generate two states: ExprOffset*2 and // ExprOffset*2 + 1. All other expreesion characters can produce only // a single state. Thus ExprOffset = State/2. // // // Here is a short description of the variables involved: // // NameOffset - The offset of the current name char being processed. // // ExprOffset - The offset of the current expression char being processed. // // SrcCount - Prior match being investigated with current name char // // DestCount - Next location to put a matching assuming current name char // // NameFinished - Allows one more itteration through the Matches array // after the name is exhusted (to come *s for example) // // PreviousDestCount - This is used to prevent entry duplication, see coment // // PreviousMatches - Holds the previous set of matches (the Src array) // // CurrentMatches - Holds the current set of matches (the Dest array) // // AuxBuffer, LocalBuffer - the storage for the Matches arrays // // // Set up the initial variables // previousMatches[0] = 0; matchesCount = 1; nameOffset = 0; maxState = expression.Length * 2; while (! nameFinished) { if (nameOffset < name.Length) { nameChar = name[nameOffset]; length = 1; nameOffset++; } else { nameFinished = true; // // if we have already exhasted the expression, C#. Don't // continue. // if (previousMatches[matchesCount - 1] == maxState) { break; } } // // Now, for each of the previous stored expression matches, see what // we can do with this name character. // srcCount = 0; destCount = 0; previousDestCount = 0; while (srcCount < matchesCount) { // // We have to carry on our expression analysis as far as possible // for each character of name, so we loop here until the // expression stops matching. A clue here is that expression // cases that can match zero or more characters end with a // continue, while those that can accept only a single character // end with a break. // exprOffset = ((previousMatches[srcCount++] + 1) / 2); length = 0; while (true) { if ( exprOffset == expression.Length ) { break; } // // The first time through the loop we don't want // to increment ExprOffset. // exprOffset += length; currentState = exprOffset * 2; if (exprOffset == expression.Length) { currentMatches[destCount++] = maxState; break; } exprChar = expression[exprOffset]; length = 1; // // Before we get started, we have to check for something // really gross. We may be about to exhaust the local // space for ExpressionMatches[][], so we have to allocate // some pool if this is the case. Yuk! // if (destCount >= MATCHES_ARRAY_SIZE - 2) { int newSize = currentMatches.Length * 2; int [] tmp = new int[newSize]; Array.Copy(currentMatches, tmp, currentMatches.Length); currentMatches = tmp; tmp = new int[newSize]; Array.Copy(previousMatches, tmp, previousMatches.Length); previousMatches = tmp; } // // * matches any character zero or more times. // if (exprChar == '*') { currentMatches[destCount++] = currentState; currentMatches[destCount++] = (currentState + 1); continue; } // // DOS_STAR matches any character except . zero or more times. // if (exprChar == ANSI_DOS_STAR) { bool iCanEatADot = false; // // If we are at a period, determine if we are allowed to // consume it, ie. make sure it is not the last one. // if (!nameFinished && (nameChar == '.') ) { char tmpChar; int offset; int nameLength = name.Length; for (offset = nameOffset; offset < nameLength; offset ++ ) { tmpChar = name[offset]; length = 1; if (tmpChar == '.') { iCanEatADot = true; break; } } } if (nameFinished || (nameChar != '.') || iCanEatADot) { currentMatches[destCount++] = currentState; currentMatches[destCount++] = (currentState + 1); continue; } else { // // We are at a period. We can only match zero // characters (ie. the epsilon transition). // currentMatches[destCount++] = (currentState + 1); continue; } } // // The following expreesion characters all match by consuming // a character, thus force the expression, and thus state // forward. // currentState += length * 2; // // DOS_QM is the most complicated. If the name is finished, // we can match zero characters. If this name is a '.', we // don't match, but look at the next expression. Otherwise // we match a single character. // if (exprChar == ANSI_DOS_QM) { if (nameFinished || (nameChar == '.') ) { continue; } currentMatches[destCount++] = currentState; break; } // // A DOS_DOT can match either a period, or zero characters // beyond the end of name. // if (exprChar == DOS_DOT) { if (nameFinished) { continue; } if (nameChar == '.') { currentMatches[destCount++] = currentState; break; } } // // From this point on a name character is required to even // continue, let alone make a match. // if (nameFinished) { break; } // // If this expression was a '?' we can match it once. // if (exprChar == '?') { currentMatches[destCount++] = currentState; break; } // // Finally, check if the expression char matches the name char // if (exprChar == nameChar) { currentMatches[destCount++] = currentState; break; } // // The expression didn't match so go look at the next // previous match. // break; } // // Prevent duplication in the destination array. // // Each of the arrays is montonically increasing and non- // duplicating, thus we skip over any source element in the src // array if we just added the same element to the destination // array. This guarentees non-duplication in the dest. array. // if ((srcCount < matchesCount) && (previousDestCount < destCount) ) { while (previousDestCount < destCount) { int previousLength = previousMatches.Length; while ((srcCount < previousLength) && (previousMatches[srcCount] < currentMatches[previousDestCount])) { srcCount += 1; } previousDestCount += 1; } } } // // If we found no matches in the just finished itteration, it's time // to bail. // if ( destCount == 0 ) { return false; } // // Swap the meaning the two arrays // { int[] tmp; tmp = previousMatches; previousMatches = currentMatches; currentMatches = tmp; } matchesCount = destCount; } currentState = previousMatches[matchesCount - 1]; return currentState == maxState; } } }
Link Menu
This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- ThrowHelper.cs
- JsonReaderDelegator.cs
- SqlNodeAnnotation.cs
- ProcessHostMapPath.cs
- Matrix3D.cs
- CqlIdentifiers.cs
- PermissionSetEnumerator.cs
- Menu.cs
- Visual3DCollection.cs
- FormViewPageEventArgs.cs
- NavigationWindow.cs
- ObjectStateFormatter.cs
- ProviderConnectionPointCollection.cs
- WebPartHeaderCloseVerb.cs
- UserInitiatedRoutedEventPermissionAttribute.cs
- ExpanderAutomationPeer.cs
- WebPartExportVerb.cs
- NavigationWindowAutomationPeer.cs
- TypeConverterMarkupExtension.cs
- ModuleConfigurationInfo.cs
- recordstatefactory.cs
- X509Certificate2Collection.cs
- ArrangedElement.cs
- MachineSettingsSection.cs
- BrowserCapabilitiesCompiler.cs
- DataGridViewRowPrePaintEventArgs.cs
- RoutedPropertyChangedEventArgs.cs
- MethodSignatureGenerator.cs
- CacheEntry.cs
- FlowPosition.cs
- RtfControlWordInfo.cs
- DataSetMappper.cs
- MsmqTransportBindingElement.cs
- SpellerHighlightLayer.cs
- ScriptingProfileServiceSection.cs
- OnOperation.cs
- WebAdminConfigurationHelper.cs
- HideDisabledControlAdapter.cs
- EraserBehavior.cs
- SqlDataReader.cs
- ListViewInsertionMark.cs
- PasswordDeriveBytes.cs
- VisualStyleElement.cs
- ThrowHelper.cs
- _NtlmClient.cs
- ContainerActivationHelper.cs
- PartitionedStream.cs
- EventSinkActivityDesigner.cs
- MultipleViewProviderWrapper.cs
- StylusEventArgs.cs
- WebServiceTypeData.cs
- MenuScrollingVisibilityConverter.cs
- XPathAxisIterator.cs
- SoapMessage.cs
- PersonalizationStateInfo.cs
- CompensateDesigner.cs
- ConfigurationSettings.cs
- Bitmap.cs
- WindowsRichEdit.cs
- DescendantBaseQuery.cs
- UnsafeNativeMethodsCLR.cs
- ZipPackagePart.cs
- SqlComparer.cs
- XmlSchemaSimpleTypeRestriction.cs
- ConfigXmlElement.cs
- HttpApplicationStateWrapper.cs
- TreeViewHitTestInfo.cs
- CreateUserWizardAutoFormat.cs
- Interop.cs
- CodeCastExpression.cs
- NativeMethods.cs
- Italic.cs
- Set.cs
- LinearKeyFrames.cs
- VirtualizingStackPanel.cs
- CollectionBase.cs
- WsiProfilesElementCollection.cs
- ServiceHostingEnvironment.cs
- COAUTHIDENTITY.cs
- TranslateTransform.cs
- XmlObjectSerializerReadContextComplex.cs
- CollectionBuilder.cs
- StringResourceManager.cs
- AnimatedTypeHelpers.cs
- GenericNameHandler.cs
- ConnectionStringsSection.cs
- ContainerControl.cs
- UInt32Converter.cs
- Trace.cs
- WmlCommandAdapter.cs
- GeneralTransform3DCollection.cs
- XmlObjectSerializer.cs
- ColorTranslator.cs
- SelectedGridItemChangedEvent.cs
- CodeGeneratorOptions.cs
- TransformGroup.cs
- GenericUI.cs
- SafeRightsManagementQueryHandle.cs
- LinqToSqlWrapper.cs
- CanonicalFontFamilyReference.cs