Code:
/ DotNET / DotNET / 8.0 / untmp / whidbey / REDBITS / ndp / fx / src / Services / IO / System / IO / PatternMatcher.cs / 1 / PatternMatcher.cs
//------------------------------------------------------------------------------ //// Copyright (c) Microsoft Corporation. All rights reserved. // //----------------------------------------------------------------------------- /* */ namespace System.IO { using System.Text; using System.Diagnostics; using System; using Microsoft.Win32; using System.Globalization; internal static class PatternMatcher { ////// Private constants (directly from C header files) /// private const int MATCHES_ARRAY_SIZE = 16; private const char ANSI_DOS_STAR = '>'; private const char ANSI_DOS_QM = '<'; private const char DOS_DOT = '"'; ////// Tells whether a given name matches the expression given with a strict (i.e. UNIX like) /// semantics. This code is a port of unmanaged code. Original code comment follows: /// /// Routine Description: /// /// This routine compares a Dbcs name and an expression and tells the caller /// if the name is in the language defined by the expression. The input name /// cannot contain wildcards, while the expression may contain wildcards. /// /// Expression wild cards are evaluated as shown in the nondeterministic /// finite automatons below. Note that ~* and ~? are DOS_STAR and DOS_QM. /// /// /// ~* is DOS_STAR, ~? is DOS_QM, and ~. is DOS_DOT /// /// /// S /// <-----< /// X | | e Y /// X * Y == (0)----->-(1)->-----(2)-----(3) /// /// /// S-. /// <-----< /// X | | e Y /// X ~* Y == (0)----->-(1)->-----(2)-----(3) /// /// /// /// X S S Y /// X ?? Y == (0)---(1)---(2)---(3)---(4) /// /// /// /// X . . Y /// X ~.~. Y == (0)---(1)----(2)------(3)---(4) /// | |________| /// | ^ | /// |_______________| /// ^EOF or .^ /// /// /// X S-. S-. Y /// X ~?~? Y == (0)---(1)-----(2)-----(3)---(4) /// | |________| /// | ^ | /// |_______________| /// ^EOF or .^ /// /// /// /// where S is any single character /// /// S-. is any single character except the final . /// /// e is a null character transition /// /// EOF is the end of the name string /// /// In words: /// /// * matches 0 or more characters. /// /// ? matches exactly 1 character. /// /// DOS_STAR matches 0 or more characters until encountering and matching /// the final . in the name. /// /// DOS_QM matches any single character, or upon encountering a period or /// end of name string, advances the expression to the end of the /// set of contiguous DOS_QMs. /// /// DOS_DOT matches either a . or zero characters beyond name string. /// /// Arguments: /// /// Expression - Supplies the input expression to check against /// /// Name - Supplies the input name to check for. /// /// Return Value: /// /// BOOLEAN - TRUE if Name is an element in the set of strings denoted /// by the input Expression and FALSE otherwise. /// /// public static bool StrictMatchPattern(string expression, string name) { int nameOffset; int exprOffset; int length; int srcCount; int destCount; int previousDestCount; int matchesCount; char nameChar = '\0'; char exprChar = '\0'; int[] previousMatches = new int[MATCHES_ARRAY_SIZE]; int[] currentMatches = new int[MATCHES_ARRAY_SIZE]; int maxState; int currentState; bool nameFinished = false; // // The idea behind the algorithm is pretty simple. We keep track of // all possible locations in the regular expression that are matching // the name. If when the name has been exhausted one of the locations // in the expression is also just exhausted, the name is in the language // defined by the regular expression. // if (name == null || name.Length == 0 || expression == null || expression.Length == 0) { return false; } // // Special case by far the most common wild card search of * or *.* // if (expression.Equals("*") || expression.Equals("*.*")) { return true; } // If this class is ever exposed for generic use, // we need to make sure that name doesn't contain wildcards. Currently // the only component that calls this method is FileSystemWatcher and // it will never pass a name that contains a wildcard. // // Also special case expressions of the form *X. With this and the prior // case we have covered virtually all normal queries. // if (expression[0] == '*' && expression.IndexOf('*', 1) == -1) { int rightLength = expression.Length - 1; // if name is shorter that the stuff to the right of * in expression, we don't // need to do the string compare, otherwise we compare rightlength characters // and the end of both strings. if (name.Length >= rightLength && String.Compare(expression, 1, name, name.Length - rightLength, rightLength, StringComparison.OrdinalIgnoreCase) == 0) { return true; } } // // Walk through the name string, picking off characters. We go one // character beyond the end because some wild cards are able to match // zero characters beyond the end of the string. // // With each new name character we determine a new set of states that // match the name so far. We use two arrays that we swap back and forth // for this purpose. One array lists the possible expression states for // all name characters up to but not including the current one, and other // array is used to build up the list of states considering the current // name character as well. The arrays are then switched and the process // repeated. // // There is not a one-to-one correspondence between state number and // offset into the expression. This is evident from the NFAs in the // initial comment to this function. State numbering is not continuous. // This allows a simple conversion between state number and expression // offset. Each character in the expression can represent one or two // states. * and DOS_STAR generate two states: ExprOffset*2 and // ExprOffset*2 + 1. All other expreesion characters can produce only // a single state. Thus ExprOffset = State/2. // // // Here is a short description of the variables involved: // // NameOffset - The offset of the current name char being processed. // // ExprOffset - The offset of the current expression char being processed. // // SrcCount - Prior match being investigated with current name char // // DestCount - Next location to put a matching assuming current name char // // NameFinished - Allows one more itteration through the Matches array // after the name is exhusted (to come *s for example) // // PreviousDestCount - This is used to prevent entry duplication, see coment // // PreviousMatches - Holds the previous set of matches (the Src array) // // CurrentMatches - Holds the current set of matches (the Dest array) // // AuxBuffer, LocalBuffer - the storage for the Matches arrays // // // Set up the initial variables // previousMatches[0] = 0; matchesCount = 1; nameOffset = 0; maxState = expression.Length * 2; while (! nameFinished) { if (nameOffset < name.Length) { nameChar = name[nameOffset]; length = 1; nameOffset++; } else { nameFinished = true; // // if we have already exhasted the expression, C#. Don't // continue. // if (previousMatches[matchesCount - 1] == maxState) { break; } } // // Now, for each of the previous stored expression matches, see what // we can do with this name character. // srcCount = 0; destCount = 0; previousDestCount = 0; while (srcCount < matchesCount) { // // We have to carry on our expression analysis as far as possible // for each character of name, so we loop here until the // expression stops matching. A clue here is that expression // cases that can match zero or more characters end with a // continue, while those that can accept only a single character // end with a break. // exprOffset = ((previousMatches[srcCount++] + 1) / 2); length = 0; while (true) { if ( exprOffset == expression.Length ) { break; } // // The first time through the loop we don't want // to increment ExprOffset. // exprOffset += length; currentState = exprOffset * 2; if (exprOffset == expression.Length) { currentMatches[destCount++] = maxState; break; } exprChar = expression[exprOffset]; length = 1; // // Before we get started, we have to check for something // really gross. We may be about to exhaust the local // space for ExpressionMatches[][], so we have to allocate // some pool if this is the case. Yuk! // if (destCount >= MATCHES_ARRAY_SIZE - 2) { int newSize = currentMatches.Length * 2; int [] tmp = new int[newSize]; Array.Copy(currentMatches, tmp, currentMatches.Length); currentMatches = tmp; tmp = new int[newSize]; Array.Copy(previousMatches, tmp, previousMatches.Length); previousMatches = tmp; } // // * matches any character zero or more times. // if (exprChar == '*') { currentMatches[destCount++] = currentState; currentMatches[destCount++] = (currentState + 1); continue; } // // DOS_STAR matches any character except . zero or more times. // if (exprChar == ANSI_DOS_STAR) { bool iCanEatADot = false; // // If we are at a period, determine if we are allowed to // consume it, ie. make sure it is not the last one. // if (!nameFinished && (nameChar == '.') ) { char tmpChar; int offset; int nameLength = name.Length; for (offset = nameOffset; offset < nameLength; offset ++ ) { tmpChar = name[offset]; length = 1; if (tmpChar == '.') { iCanEatADot = true; break; } } } if (nameFinished || (nameChar != '.') || iCanEatADot) { currentMatches[destCount++] = currentState; currentMatches[destCount++] = (currentState + 1); continue; } else { // // We are at a period. We can only match zero // characters (ie. the epsilon transition). // currentMatches[destCount++] = (currentState + 1); continue; } } // // The following expreesion characters all match by consuming // a character, thus force the expression, and thus state // forward. // currentState += length * 2; // // DOS_QM is the most complicated. If the name is finished, // we can match zero characters. If this name is a '.', we // don't match, but look at the next expression. Otherwise // we match a single character. // if (exprChar == ANSI_DOS_QM) { if (nameFinished || (nameChar == '.') ) { continue; } currentMatches[destCount++] = currentState; break; } // // A DOS_DOT can match either a period, or zero characters // beyond the end of name. // if (exprChar == DOS_DOT) { if (nameFinished) { continue; } if (nameChar == '.') { currentMatches[destCount++] = currentState; break; } } // // From this point on a name character is required to even // continue, let alone make a match. // if (nameFinished) { break; } // // If this expression was a '?' we can match it once. // if (exprChar == '?') { currentMatches[destCount++] = currentState; break; } // // Finally, check if the expression char matches the name char // if (exprChar == nameChar) { currentMatches[destCount++] = currentState; break; } // // The expression didn't match so go look at the next // previous match. // break; } // // Prevent duplication in the destination array. // // Each of the arrays is montonically increasing and non- // duplicating, thus we skip over any source element in the src // array if we just added the same element to the destination // array. This guarentees non-duplication in the dest. array. // if ((srcCount < matchesCount) && (previousDestCount < destCount) ) { while (previousDestCount < destCount) { int previousLength = previousMatches.Length; while ((srcCount < previousLength) && (previousMatches[srcCount] < currentMatches[previousDestCount])) { srcCount += 1; } previousDestCount += 1; } } } // // If we found no matches in the just finished itteration, it's time // to bail. // if ( destCount == 0 ) { return false; } // // Swap the meaning the two arrays // { int[] tmp; tmp = previousMatches; previousMatches = currentMatches; currentMatches = tmp; } matchesCount = destCount; } currentState = previousMatches[matchesCount - 1]; return currentState == maxState; } } }
Link Menu
This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- BitmapEffectDrawingContextWalker.cs
- EmptyStringExpandableObjectConverter.cs
- PeerApplicationLaunchInfo.cs
- HtmlProps.cs
- SQLGuid.cs
- FusionWrap.cs
- IgnoreSectionHandler.cs
- XmlReaderDelegator.cs
- SqlCacheDependencySection.cs
- TableDesigner.cs
- CrossSiteScriptingValidation.cs
- UpdateProgress.cs
- DefaultAutoFieldGenerator.cs
- NumberSubstitution.cs
- AnchoredBlock.cs
- DataServices.cs
- UIntPtr.cs
- ColumnWidthChangedEvent.cs
- Point3DAnimationUsingKeyFrames.cs
- ClientBuildManager.cs
- DynamicPhysicalDiscoSearcher.cs
- Inline.cs
- DatePickerAutomationPeer.cs
- FormsAuthenticationUser.cs
- ResizeGrip.cs
- CodeAttributeDeclaration.cs
- ProjectionPruner.cs
- DocumentViewerConstants.cs
- CalendarTable.cs
- TdsParserSessionPool.cs
- ProcessHost.cs
- FilteredAttributeCollection.cs
- XmlSortKeyAccumulator.cs
- TextEffect.cs
- peersecuritysettings.cs
- Geometry3D.cs
- DecoderFallback.cs
- TcpHostedTransportConfiguration.cs
- DataError.cs
- HTMLTextWriter.cs
- PageWrapper.cs
- DependencySource.cs
- HttpRawResponse.cs
- _SslStream.cs
- ToolStripScrollButton.cs
- SizeValueSerializer.cs
- QilXmlWriter.cs
- ListSortDescriptionCollection.cs
- PerspectiveCamera.cs
- CalendarTable.cs
- TypeEnumerableViewSchema.cs
- SortedDictionary.cs
- TTSEngineProxy.cs
- EditBehavior.cs
- TargetFrameworkAttribute.cs
- XslException.cs
- ScrollProviderWrapper.cs
- Label.cs
- HttpModuleActionCollection.cs
- VisualStates.cs
- WindowsBrush.cs
- RequestDescription.cs
- UrlPath.cs
- ExpandSegment.cs
- FunctionDetailsReader.cs
- DiscoveryEndpointElement.cs
- DataAccessException.cs
- AutoSizeComboBox.cs
- JavaScriptObjectDeserializer.cs
- IApplicationTrustManager.cs
- CodeParameterDeclarationExpressionCollection.cs
- StrokeIntersection.cs
- HttpChannelHelpers.cs
- SettingsContext.cs
- XhtmlTextWriter.cs
- SmiContextFactory.cs
- Stackframe.cs
- StreamingContext.cs
- ScrollBar.cs
- CroppedBitmap.cs
- LogEntryHeaderSerializer.cs
- DesignerCommandAdapter.cs
- OrthographicCamera.cs
- PageSettings.cs
- WeakReferenceList.cs
- PropertyInformation.cs
- IntSecurity.cs
- CommandDevice.cs
- _ConnectStream.cs
- JsonQNameDataContract.cs
- EventLogInformation.cs
- WeakReferenceEnumerator.cs
- InvalidateEvent.cs
- IHttpResponseInternal.cs
- WebServiceClientProxyGenerator.cs
- ScalarConstant.cs
- EventToken.cs
- WizardForm.cs
- DataGridViewColumnCollectionEditor.cs
- RTLAwareMessageBox.cs