RegexFCD.cs source code in C# .NET

Source code for the .NET framework in C#

                        

Code:

/ Dotnetfx_Win7_3.5.1 / Dotnetfx_Win7_3.5.1 / 3.5.1 / DEVDIV / depot / DevDiv / releases / whidbey / NetFXspW7 / ndp / fx / src / Regex / System / Text / RegularExpressions / RegexFCD.cs / 1 / RegexFCD.cs

                            //------------------------------------------------------------------------------ 
// 
//     Copyright (c) Microsoft Corporation.  All rights reserved.
// 
//----------------------------------------------------------------------------- 

// This RegexFCD class is internal to the Regex package. 
// It builds a bunch of FC information (RegexFC) about 
// the regex for optimization purposes.
 
// Implementation notes:
//
// This step is as simple as walking the tree and emitting
// sequences of codes. 

namespace System.Text.RegularExpressions { 
 
    using System.Collections;
    using System.Globalization; 

    internal sealed class RegexFCD {
        private int[]      _intStack;
        private int        _intDepth; 
        private RegexFC[]  _fcStack;
        private int        _fcDepth; 
        private bool    _skipAllChildren;      // don't process any more children at the current level 
        private bool    _skipchild;            // don't process the current child.
        private bool    _failed = false; 

        private const int BeforeChild = 64;
        private const int AfterChild = 128;
 
        // where the regex can be pegged
 
        internal  const int Beginning  = 0x0001; 
        internal  const int Bol        = 0x0002;
        internal  const int Start      = 0x0004; 
        internal  const int Eol        = 0x0008;
        internal  const int EndZ       = 0x0010;
        internal  const int End        = 0x0020;
        internal  const int Boundary   = 0x0040; 
        internal  const int ECMABoundary = 0x0080;
 
        /* 
         * This is the one of the only two functions that should be called from outside.
         * It takes a RegexTree and computes the set of chars that can start it. 
         */
        internal static RegexPrefix FirstChars(RegexTree t) {
            RegexFCD s = new RegexFCD();
            RegexFC fc = s.RegexFCFromRegexTree(t); 

            if (fc == null || fc._nullable) 
                return null; 

            CultureInfo culture = ((t._options & RegexOptions.CultureInvariant) != 0) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; 
            return new RegexPrefix(fc.GetFirstChars(culture), fc.IsCaseInsensitive());
        }

        /* 
         * This is a related computation: it takes a RegexTree and computes the
         * leading substring if it see one. It's quite trivial and gives up easily. 
         */ 
        internal static RegexPrefix Prefix(RegexTree tree) {
            RegexNode curNode; 
            RegexNode concatNode = null;
            int nextChild = 0;

            curNode = tree._root; 

            for (;;) { 
                switch (curNode._type) { 
                    case RegexNode.Concatenate:
                        if (curNode.ChildCount() > 0) { 
                            concatNode = curNode;
                            nextChild = 0;
                        }
                        break; 

                    case RegexNode.Greedy: 
                    case RegexNode.Capture: 
                        curNode = curNode.Child(0);
                        concatNode = null; 
                        continue;

                    case RegexNode.Oneloop:
                    case RegexNode.Onelazy: 
                        if (curNode._m > 0) {
                            string pref = String.Empty.PadRight(curNode._m, curNode._ch); 
                            return new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase)); 
                        }
                        else 
                            return RegexPrefix.Empty;

                    case RegexNode.One:
                        return new RegexPrefix(curNode._ch.ToString(CultureInfo.InvariantCulture), 0 != (curNode._options & RegexOptions.IgnoreCase)); 

                    case RegexNode.Multi: 
                        return new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase)); 

                    case RegexNode.Bol: 
                    case RegexNode.Eol:
                    case RegexNode.Boundary:
                    case RegexNode.ECMABoundary:
                    case RegexNode.Beginning: 
                    case RegexNode.Start:
                    case RegexNode.EndZ: 
                    case RegexNode.End: 
                    case RegexNode.Empty:
                    case RegexNode.Require: 
                    case RegexNode.Prevent:
                        break;

                    default: 
                        return RegexPrefix.Empty;
                } 
 
                if (concatNode == null || nextChild >= concatNode.ChildCount())
                    return RegexPrefix.Empty; 

                curNode = concatNode.Child(nextChild++);
            }
        } 

        /* 
         * Yet another related computation: it takes a RegexTree and computes the 
         * leading anchors that it encounters.
         */ 
        internal static int Anchors(RegexTree tree) {
            RegexNode curNode;
            RegexNode concatNode = null;
            int nextChild = 0; 
            int result = 0;
 
            curNode = tree._root; 

            for (;;) { 
                switch (curNode._type) {
                    case RegexNode.Concatenate:
                        if (curNode.ChildCount() > 0) {
                            concatNode = curNode; 
                            nextChild = 0;
                        } 
                        break; 

                    case RegexNode.Greedy: 
                    case RegexNode.Capture:
                        curNode = curNode.Child(0);
                        concatNode = null;
                        continue; 

                    case RegexNode.Bol: 
                    case RegexNode.Eol: 
                    case RegexNode.Boundary:
                    case RegexNode.ECMABoundary: 
                    case RegexNode.Beginning:
                    case RegexNode.Start:
                    case RegexNode.EndZ:
                    case RegexNode.End: 
                        return result | AnchorFromType(curNode._type);
 
                    case RegexNode.Empty: 
                    case RegexNode.Require:
                    case RegexNode.Prevent: 
                        break;

                    default:
                        return result; 
                }
 
                if (concatNode == null || nextChild >= concatNode.ChildCount()) 
                    return result;
 
                curNode = concatNode.Child(nextChild++);
            }
        }
 
        /*
         * Convert anchor type to anchor bit. 
         */ 
        private static int AnchorFromType(int type) {
            switch (type) { 
                case RegexNode.Bol:             return Bol;
                case RegexNode.Eol:             return Eol;
                case RegexNode.Boundary:        return Boundary;
                case RegexNode.ECMABoundary:    return ECMABoundary; 
                case RegexNode.Beginning:       return Beginning;
                case RegexNode.Start:           return Start; 
                case RegexNode.EndZ:            return EndZ; 
                case RegexNode.End:             return End;
                default:                        return 0; 
            }
        }

#if DBG 
        internal static String AnchorDescription(int anchors) {
            StringBuilder sb = new StringBuilder(); 
 
            if (0 != (anchors & Beginning))     sb.Append(", Beginning");
            if (0 != (anchors & Start))         sb.Append(", Start"); 
            if (0 != (anchors & Bol))           sb.Append(", Bol");
            if (0 != (anchors & Boundary))      sb.Append(", Boundary");
            if (0 != (anchors & ECMABoundary))  sb.Append(", ECMABoundary");
            if (0 != (anchors & Eol))           sb.Append(", Eol"); 
            if (0 != (anchors & End))           sb.Append(", End");
            if (0 != (anchors & EndZ))          sb.Append(", EndZ"); 
 
            if (sb.Length >= 2)
                return(sb.ToString(2, sb.Length - 2)); 

            return "None";
        }
#endif 

        /* 
         * private constructor; can't be created outside 
         */
        private RegexFCD() { 
            _fcStack = new RegexFC[32];
            _intStack = new int[32];
        }
 
        /*
         * To avoid recursion, we use a simple integer stack. 
         * This is the push. 
         */
        private void PushInt(int I) { 
            if (_intDepth >= _intStack.Length) {
                int [] expanded = new int[_intDepth * 2];

                System.Array.Copy(_intStack, 0, expanded, 0, _intDepth); 

                _intStack = expanded; 
            } 

            _intStack[_intDepth++] = I; 
        }

        /*
         * True if the stack is empty. 
         */
        private bool IntIsEmpty() { 
            return _intDepth == 0; 
        }
 
        /*
         * This is the pop.
         */
        private int PopInt() { 
            return _intStack[--_intDepth];
        } 
 
        /*
          * We also use a stack of RegexFC objects. 
          * This is the push.
          */
        private void PushFC(RegexFC fc) {
            if (_fcDepth >= _fcStack.Length) { 
                RegexFC[] expanded = new RegexFC[_fcDepth * 2];
 
                System.Array.Copy(_fcStack, 0, expanded, 0, _fcDepth); 
                _fcStack = expanded;
            } 

            _fcStack[_fcDepth++] = fc;
        }
 
        /*
         * True if the stack is empty. 
         */ 
        private bool FCIsEmpty() {
            return _fcDepth == 0; 
        }

        /*
         * This is the pop. 
         */
        private RegexFC PopFC() { 
            return _fcStack[--_fcDepth]; 
        }
 
        /*
         * This is the top.
         */
        private RegexFC TopFC() { 
            return _fcStack[_fcDepth - 1];
        } 
 
        /*
         * The main FC computation. It does a shortcutted depth-first walk 
         * through the tree and calls CalculateFC to emits code before
         * and after each child of an interior node, and at each leaf.
         */
        private RegexFC RegexFCFromRegexTree(RegexTree tree) { 
            RegexNode curNode;
            int curChild; 
 
            curNode = tree._root;
            curChild = 0; 

            for (;;) {
                if (curNode._children == null) {
                    // This is a leaf node 
                    CalculateFC(curNode._type, curNode, 0);
                } 
                else if (curChild < curNode._children.Count && !_skipAllChildren) { 
                    // This is an interior node, and we have more children to analyze
                    CalculateFC(curNode._type | BeforeChild, curNode, curChild); 

                    if (!_skipchild) {
                        curNode = (RegexNode)curNode._children[curChild];
                        // this stack is how we get a depth first walk of the tree. 
                        PushInt(curChild);
                        curChild = 0; 
                    } 
                    else {
                        curChild++; 
                        _skipchild = false;
                    }
                    continue;
                } 

                // This is an interior node where we've finished analyzing all the children, or 
                // the end of a leaf node. 
                _skipAllChildren = false;
 
                if (IntIsEmpty())
                    break;

                curChild = PopInt(); 
                curNode = curNode._next;
 
                CalculateFC(curNode._type | AfterChild, curNode, curChild); 
                if (_failed)
                    return null; 

                curChild++;
            }
 
            if (FCIsEmpty())
                return null; 
 
            return PopFC();
        } 

        /*
         * Called in Beforechild to prevent further processing of the current child
         */ 
        private void SkipChild() {
            _skipchild = true; 
        } 

        /* 
         * FC computation and shortcut cases for each node type
         */
        private void CalculateFC(int NodeType, RegexNode node, int CurIndex) {
            bool ci = false; 
            bool rtl = false;
 
            if (NodeType <= RegexNode.Ref) { 
                if ((node._options & RegexOptions.IgnoreCase) != 0)
                    ci = true; 
                if ((node._options & RegexOptions.RightToLeft) != 0)
                    rtl = true;
            }
 
            switch (NodeType) {
                case RegexNode.Concatenate | BeforeChild: 
                case RegexNode.Alternate | BeforeChild: 
                case RegexNode.Testref | BeforeChild:
                case RegexNode.Loop | BeforeChild: 
                case RegexNode.Lazyloop | BeforeChild:
                    break;

                case RegexNode.Testgroup | BeforeChild: 
                    if (CurIndex == 0)
                        SkipChild(); 
                    break; 

                case RegexNode.Empty: 
                    PushFC(new RegexFC(true));
                    break;

                case RegexNode.Concatenate | AfterChild: 
                    if (CurIndex != 0) {
                        RegexFC child = PopFC(); 
                        RegexFC cumul = TopFC(); 

                        _failed = !cumul.AddFC(child, true); 
                    }

                    if (!TopFC()._nullable)
                        _skipAllChildren = true; 
                    break;
 
                case RegexNode.Testgroup | AfterChild: 
                    if (CurIndex > 1) {
                        RegexFC child = PopFC(); 
                        RegexFC cumul = TopFC();

                        _failed = !cumul.AddFC(child, false);
                    } 
                    break;
 
                case RegexNode.Alternate | AfterChild: 
                case RegexNode.Testref | AfterChild:
                    if (CurIndex != 0) { 
                        RegexFC child = PopFC();
                        RegexFC cumul = TopFC();

                        _failed = !cumul.AddFC(child, false); 
                    }
                    break; 
 
                case RegexNode.Loop | AfterChild:
                case RegexNode.Lazyloop | AfterChild: 
                    if (node._m == 0)
                        TopFC()._nullable = true;
                    break;
 
                case RegexNode.Group | BeforeChild:
                case RegexNode.Group | AfterChild: 
                case RegexNode.Capture | BeforeChild: 
                case RegexNode.Capture | AfterChild:
                case RegexNode.Greedy | BeforeChild: 
                case RegexNode.Greedy | AfterChild:
                    break;

                case RegexNode.Require | BeforeChild: 
                case RegexNode.Prevent | BeforeChild:
                    SkipChild(); 
                    PushFC(new RegexFC(true)); 
                    break;
 
                case RegexNode.Require | AfterChild:
                case RegexNode.Prevent | AfterChild:
                    break;
 
                case RegexNode.One:
                case RegexNode.Notone: 
                    PushFC(new RegexFC(node._ch, NodeType == RegexNode.Notone, false, ci)); 
                    break;
 
                case RegexNode.Oneloop:
                case RegexNode.Onelazy:
                    PushFC(new RegexFC(node._ch, false, node._m == 0, ci));
                    break; 

                case RegexNode.Notoneloop: 
                case RegexNode.Notonelazy: 
                    PushFC(new RegexFC(node._ch, true, node._m == 0, ci));
                    break; 

                case RegexNode.Multi:
                    if (node._str.Length == 0)
                        PushFC(new RegexFC(true)); 
                    else if (!rtl)
                        PushFC(new RegexFC(node._str[0], false, false, ci)); 
                    else 
                        PushFC(new RegexFC(node._str[node._str.Length - 1], false, false, ci));
                    break; 

                case RegexNode.Set:
                    PushFC(new RegexFC(node._str, false, ci));
                    break; 

                case RegexNode.Setloop: 
                case RegexNode.Setlazy: 
                    PushFC(new RegexFC(node._str, node._m == 0, ci));
                    break; 

                case RegexNode.Ref:
                    PushFC(new RegexFC(RegexCharClass.AnyClass, true, false));
                    break; 

                case RegexNode.Nothing: 
                case RegexNode.Bol: 
                case RegexNode.Eol:
                case RegexNode.Boundary: 
                case RegexNode.Nonboundary:
                case RegexNode.ECMABoundary:
                case RegexNode.NonECMABoundary:
                case RegexNode.Beginning: 
                case RegexNode.Start:
                case RegexNode.EndZ: 
                case RegexNode.End: 
                    PushFC(new RegexFC(true));
                    break; 

                default:
                    throw new ArgumentException(SR.GetString(SR.UnexpectedOpcode, NodeType.ToString(CultureInfo.CurrentCulture)));
            } 
        }
    } 
 
    internal sealed class RegexFC {
        internal RegexCharClass _cc; 
        internal bool _nullable;
        internal bool _caseInsensitive;

        internal RegexFC(bool nullable) { 
            _cc = new RegexCharClass();
            _nullable = nullable; 
        } 

        internal RegexFC(char ch, bool not, bool nullable, bool caseInsensitive) { 
            _cc = new RegexCharClass();

            if (not) {
                if (ch > 0) 
                    _cc.AddRange('\0', (char)(ch - 1));
                if (ch < 0xFFFF) 
                    _cc.AddRange((char)(ch + 1), '\uFFFF'); 
            }
            else { 
                _cc.AddRange(ch, ch);
            }

            _caseInsensitive = caseInsensitive; 
            _nullable = nullable;
        } 
 
        internal RegexFC(String charClass, bool nullable, bool caseInsensitive) {
            _cc = RegexCharClass.Parse(charClass); 

            _nullable = nullable;
            _caseInsensitive = caseInsensitive;
        } 

        internal bool AddFC(RegexFC fc, bool concatenate) { 
            if (!_cc.CanMerge || !fc._cc.CanMerge) { 
                return false;
            } 

            if (concatenate) {
                if (!_nullable)
                    return true; 

                if (!fc._nullable) 
                    _nullable = false; 
            }
            else { 
                if (fc._nullable)
                    _nullable = true;
            }
 
            _caseInsensitive |= fc._caseInsensitive;
            _cc.AddCharClass(fc._cc); 
            return true; 
        }
 
        internal String GetFirstChars(CultureInfo culture) {
            if (_caseInsensitive)
                _cc.AddLowercase(culture);
 
            return _cc.ToStringClass();
        } 
 
        internal bool IsCaseInsensitive() {
            return _caseInsensitive; 
        }
    }

    internal sealed class RegexPrefix { 
        internal String _prefix;
        internal bool _caseInsensitive; 
 
        internal static RegexPrefix _empty = new RegexPrefix(String.Empty, false);
 
        internal RegexPrefix(String prefix, bool ci) {
            _prefix = prefix;
            _caseInsensitive = ci;
        } 

        internal String Prefix { 
            get { 
                return _prefix;
            } 
        }

        internal bool CaseInsensitive {
            get { 
                return _caseInsensitive;
            } 
        } 
        internal static RegexPrefix Empty {
            get { 
                return _empty;
            }
        }
    } 
}

// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
//------------------------------------------------------------------------------ 
// 
//     Copyright (c) Microsoft Corporation.  All rights reserved.
// 
//----------------------------------------------------------------------------- 

// This RegexFCD class is internal to the Regex package. 
// It builds a bunch of FC information (RegexFC) about 
// the regex for optimization purposes.
 
// Implementation notes:
//
// This step is as simple as walking the tree and emitting
// sequences of codes. 

namespace System.Text.RegularExpressions { 
 
    using System.Collections;
    using System.Globalization; 

    internal sealed class RegexFCD {
        private int[]      _intStack;
        private int        _intDepth; 
        private RegexFC[]  _fcStack;
        private int        _fcDepth; 
        private bool    _skipAllChildren;      // don't process any more children at the current level 
        private bool    _skipchild;            // don't process the current child.
        private bool    _failed = false; 

        private const int BeforeChild = 64;
        private const int AfterChild = 128;
 
        // where the regex can be pegged
 
        internal  const int Beginning  = 0x0001; 
        internal  const int Bol        = 0x0002;
        internal  const int Start      = 0x0004; 
        internal  const int Eol        = 0x0008;
        internal  const int EndZ       = 0x0010;
        internal  const int End        = 0x0020;
        internal  const int Boundary   = 0x0040; 
        internal  const int ECMABoundary = 0x0080;
 
        /* 
         * This is the one of the only two functions that should be called from outside.
         * It takes a RegexTree and computes the set of chars that can start it. 
         */
        internal static RegexPrefix FirstChars(RegexTree t) {
            RegexFCD s = new RegexFCD();
            RegexFC fc = s.RegexFCFromRegexTree(t); 

            if (fc == null || fc._nullable) 
                return null; 

            CultureInfo culture = ((t._options & RegexOptions.CultureInvariant) != 0) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; 
            return new RegexPrefix(fc.GetFirstChars(culture), fc.IsCaseInsensitive());
        }

        /* 
         * This is a related computation: it takes a RegexTree and computes the
         * leading substring if it see one. It's quite trivial and gives up easily. 
         */ 
        internal static RegexPrefix Prefix(RegexTree tree) {
            RegexNode curNode; 
            RegexNode concatNode = null;
            int nextChild = 0;

            curNode = tree._root; 

            for (;;) { 
                switch (curNode._type) { 
                    case RegexNode.Concatenate:
                        if (curNode.ChildCount() > 0) { 
                            concatNode = curNode;
                            nextChild = 0;
                        }
                        break; 

                    case RegexNode.Greedy: 
                    case RegexNode.Capture: 
                        curNode = curNode.Child(0);
                        concatNode = null; 
                        continue;

                    case RegexNode.Oneloop:
                    case RegexNode.Onelazy: 
                        if (curNode._m > 0) {
                            string pref = String.Empty.PadRight(curNode._m, curNode._ch); 
                            return new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase)); 
                        }
                        else 
                            return RegexPrefix.Empty;

                    case RegexNode.One:
                        return new RegexPrefix(curNode._ch.ToString(CultureInfo.InvariantCulture), 0 != (curNode._options & RegexOptions.IgnoreCase)); 

                    case RegexNode.Multi: 
                        return new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase)); 

                    case RegexNode.Bol: 
                    case RegexNode.Eol:
                    case RegexNode.Boundary:
                    case RegexNode.ECMABoundary:
                    case RegexNode.Beginning: 
                    case RegexNode.Start:
                    case RegexNode.EndZ: 
                    case RegexNode.End: 
                    case RegexNode.Empty:
                    case RegexNode.Require: 
                    case RegexNode.Prevent:
                        break;

                    default: 
                        return RegexPrefix.Empty;
                } 
 
                if (concatNode == null || nextChild >= concatNode.ChildCount())
                    return RegexPrefix.Empty; 

                curNode = concatNode.Child(nextChild++);
            }
        } 

        /* 
         * Yet another related computation: it takes a RegexTree and computes the 
         * leading anchors that it encounters.
         */ 
        internal static int Anchors(RegexTree tree) {
            RegexNode curNode;
            RegexNode concatNode = null;
            int nextChild = 0; 
            int result = 0;
 
            curNode = tree._root; 

            for (;;) { 
                switch (curNode._type) {
                    case RegexNode.Concatenate:
                        if (curNode.ChildCount() > 0) {
                            concatNode = curNode; 
                            nextChild = 0;
                        } 
                        break; 

                    case RegexNode.Greedy: 
                    case RegexNode.Capture:
                        curNode = curNode.Child(0);
                        concatNode = null;
                        continue; 

                    case RegexNode.Bol: 
                    case RegexNode.Eol: 
                    case RegexNode.Boundary:
                    case RegexNode.ECMABoundary: 
                    case RegexNode.Beginning:
                    case RegexNode.Start:
                    case RegexNode.EndZ:
                    case RegexNode.End: 
                        return result | AnchorFromType(curNode._type);
 
                    case RegexNode.Empty: 
                    case RegexNode.Require:
                    case RegexNode.Prevent: 
                        break;

                    default:
                        return result; 
                }
 
                if (concatNode == null || nextChild >= concatNode.ChildCount()) 
                    return result;
 
                curNode = concatNode.Child(nextChild++);
            }
        }
 
        /*
         * Convert anchor type to anchor bit. 
         */ 
        private static int AnchorFromType(int type) {
            switch (type) { 
                case RegexNode.Bol:             return Bol;
                case RegexNode.Eol:             return Eol;
                case RegexNode.Boundary:        return Boundary;
                case RegexNode.ECMABoundary:    return ECMABoundary; 
                case RegexNode.Beginning:       return Beginning;
                case RegexNode.Start:           return Start; 
                case RegexNode.EndZ:            return EndZ; 
                case RegexNode.End:             return End;
                default:                        return 0; 
            }
        }

#if DBG 
        internal static String AnchorDescription(int anchors) {
            StringBuilder sb = new StringBuilder(); 
 
            if (0 != (anchors & Beginning))     sb.Append(", Beginning");
            if (0 != (anchors & Start))         sb.Append(", Start"); 
            if (0 != (anchors & Bol))           sb.Append(", Bol");
            if (0 != (anchors & Boundary))      sb.Append(", Boundary");
            if (0 != (anchors & ECMABoundary))  sb.Append(", ECMABoundary");
            if (0 != (anchors & Eol))           sb.Append(", Eol"); 
            if (0 != (anchors & End))           sb.Append(", End");
            if (0 != (anchors & EndZ))          sb.Append(", EndZ"); 
 
            if (sb.Length >= 2)
                return(sb.ToString(2, sb.Length - 2)); 

            return "None";
        }
#endif 

        /* 
         * private constructor; can't be created outside 
         */
        private RegexFCD() { 
            _fcStack = new RegexFC[32];
            _intStack = new int[32];
        }
 
        /*
         * To avoid recursion, we use a simple integer stack. 
         * This is the push. 
         */
        private void PushInt(int I) { 
            if (_intDepth >= _intStack.Length) {
                int [] expanded = new int[_intDepth * 2];

                System.Array.Copy(_intStack, 0, expanded, 0, _intDepth); 

                _intStack = expanded; 
            } 

            _intStack[_intDepth++] = I; 
        }

        /*
         * True if the stack is empty. 
         */
        private bool IntIsEmpty() { 
            return _intDepth == 0; 
        }
 
        /*
         * This is the pop.
         */
        private int PopInt() { 
            return _intStack[--_intDepth];
        } 
 
        /*
          * We also use a stack of RegexFC objects. 
          * This is the push.
          */
        private void PushFC(RegexFC fc) {
            if (_fcDepth >= _fcStack.Length) { 
                RegexFC[] expanded = new RegexFC[_fcDepth * 2];
 
                System.Array.Copy(_fcStack, 0, expanded, 0, _fcDepth); 
                _fcStack = expanded;
            } 

            _fcStack[_fcDepth++] = fc;
        }
 
        /*
         * True if the stack is empty. 
         */ 
        private bool FCIsEmpty() {
            return _fcDepth == 0; 
        }

        /*
         * This is the pop. 
         */
        private RegexFC PopFC() { 
            return _fcStack[--_fcDepth]; 
        }
 
        /*
         * This is the top.
         */
        private RegexFC TopFC() { 
            return _fcStack[_fcDepth - 1];
        } 
 
        /*
         * The main FC computation. It does a shortcutted depth-first walk 
         * through the tree and calls CalculateFC to emits code before
         * and after each child of an interior node, and at each leaf.
         */
        private RegexFC RegexFCFromRegexTree(RegexTree tree) { 
            RegexNode curNode;
            int curChild; 
 
            curNode = tree._root;
            curChild = 0; 

            for (;;) {
                if (curNode._children == null) {
                    // This is a leaf node 
                    CalculateFC(curNode._type, curNode, 0);
                } 
                else if (curChild < curNode._children.Count && !_skipAllChildren) { 
                    // This is an interior node, and we have more children to analyze
                    CalculateFC(curNode._type | BeforeChild, curNode, curChild); 

                    if (!_skipchild) {
                        curNode = (RegexNode)curNode._children[curChild];
                        // this stack is how we get a depth first walk of the tree. 
                        PushInt(curChild);
                        curChild = 0; 
                    } 
                    else {
                        curChild++; 
                        _skipchild = false;
                    }
                    continue;
                } 

                // This is an interior node where we've finished analyzing all the children, or 
                // the end of a leaf node. 
                _skipAllChildren = false;
 
                if (IntIsEmpty())
                    break;

                curChild = PopInt(); 
                curNode = curNode._next;
 
                CalculateFC(curNode._type | AfterChild, curNode, curChild); 
                if (_failed)
                    return null; 

                curChild++;
            }
 
            if (FCIsEmpty())
                return null; 
 
            return PopFC();
        } 

        /*
         * Called in Beforechild to prevent further processing of the current child
         */ 
        private void SkipChild() {
            _skipchild = true; 
        } 

        /* 
         * FC computation and shortcut cases for each node type
         */
        private void CalculateFC(int NodeType, RegexNode node, int CurIndex) {
            bool ci = false; 
            bool rtl = false;
 
            if (NodeType <= RegexNode.Ref) { 
                if ((node._options & RegexOptions.IgnoreCase) != 0)
                    ci = true; 
                if ((node._options & RegexOptions.RightToLeft) != 0)
                    rtl = true;
            }
 
            switch (NodeType) {
                case RegexNode.Concatenate | BeforeChild: 
                case RegexNode.Alternate | BeforeChild: 
                case RegexNode.Testref | BeforeChild:
                case RegexNode.Loop | BeforeChild: 
                case RegexNode.Lazyloop | BeforeChild:
                    break;

                case RegexNode.Testgroup | BeforeChild: 
                    if (CurIndex == 0)
                        SkipChild(); 
                    break; 

                case RegexNode.Empty: 
                    PushFC(new RegexFC(true));
                    break;

                case RegexNode.Concatenate | AfterChild: 
                    if (CurIndex != 0) {
                        RegexFC child = PopFC(); 
                        RegexFC cumul = TopFC(); 

                        _failed = !cumul.AddFC(child, true); 
                    }

                    if (!TopFC()._nullable)
                        _skipAllChildren = true; 
                    break;
 
                case RegexNode.Testgroup | AfterChild: 
                    if (CurIndex > 1) {
                        RegexFC child = PopFC(); 
                        RegexFC cumul = TopFC();

                        _failed = !cumul.AddFC(child, false);
                    } 
                    break;
 
                case RegexNode.Alternate | AfterChild: 
                case RegexNode.Testref | AfterChild:
                    if (CurIndex != 0) { 
                        RegexFC child = PopFC();
                        RegexFC cumul = TopFC();

                        _failed = !cumul.AddFC(child, false); 
                    }
                    break; 
 
                case RegexNode.Loop | AfterChild:
                case RegexNode.Lazyloop | AfterChild: 
                    if (node._m == 0)
                        TopFC()._nullable = true;
                    break;
 
                case RegexNode.Group | BeforeChild:
                case RegexNode.Group | AfterChild: 
                case RegexNode.Capture | BeforeChild: 
                case RegexNode.Capture | AfterChild:
                case RegexNode.Greedy | BeforeChild: 
                case RegexNode.Greedy | AfterChild:
                    break;

                case RegexNode.Require | BeforeChild: 
                case RegexNode.Prevent | BeforeChild:
                    SkipChild(); 
                    PushFC(new RegexFC(true)); 
                    break;
 
                case RegexNode.Require | AfterChild:
                case RegexNode.Prevent | AfterChild:
                    break;
 
                case RegexNode.One:
                case RegexNode.Notone: 
                    PushFC(new RegexFC(node._ch, NodeType == RegexNode.Notone, false, ci)); 
                    break;
 
                case RegexNode.Oneloop:
                case RegexNode.Onelazy:
                    PushFC(new RegexFC(node._ch, false, node._m == 0, ci));
                    break; 

                case RegexNode.Notoneloop: 
                case RegexNode.Notonelazy: 
                    PushFC(new RegexFC(node._ch, true, node._m == 0, ci));
                    break; 

                case RegexNode.Multi:
                    if (node._str.Length == 0)
                        PushFC(new RegexFC(true)); 
                    else if (!rtl)
                        PushFC(new RegexFC(node._str[0], false, false, ci)); 
                    else 
                        PushFC(new RegexFC(node._str[node._str.Length - 1], false, false, ci));
                    break; 

                case RegexNode.Set:
                    PushFC(new RegexFC(node._str, false, ci));
                    break; 

                case RegexNode.Setloop: 
                case RegexNode.Setlazy: 
                    PushFC(new RegexFC(node._str, node._m == 0, ci));
                    break; 

                case RegexNode.Ref:
                    PushFC(new RegexFC(RegexCharClass.AnyClass, true, false));
                    break; 

                case RegexNode.Nothing: 
                case RegexNode.Bol: 
                case RegexNode.Eol:
                case RegexNode.Boundary: 
                case RegexNode.Nonboundary:
                case RegexNode.ECMABoundary:
                case RegexNode.NonECMABoundary:
                case RegexNode.Beginning: 
                case RegexNode.Start:
                case RegexNode.EndZ: 
                case RegexNode.End: 
                    PushFC(new RegexFC(true));
                    break; 

                default:
                    throw new ArgumentException(SR.GetString(SR.UnexpectedOpcode, NodeType.ToString(CultureInfo.CurrentCulture)));
            } 
        }
    } 
 
    internal sealed class RegexFC {
        internal RegexCharClass _cc; 
        internal bool _nullable;
        internal bool _caseInsensitive;

        internal RegexFC(bool nullable) { 
            _cc = new RegexCharClass();
            _nullable = nullable; 
        } 

        internal RegexFC(char ch, bool not, bool nullable, bool caseInsensitive) { 
            _cc = new RegexCharClass();

            if (not) {
                if (ch > 0) 
                    _cc.AddRange('\0', (char)(ch - 1));
                if (ch < 0xFFFF) 
                    _cc.AddRange((char)(ch + 1), '\uFFFF'); 
            }
            else { 
                _cc.AddRange(ch, ch);
            }

            _caseInsensitive = caseInsensitive; 
            _nullable = nullable;
        } 
 
        internal RegexFC(String charClass, bool nullable, bool caseInsensitive) {
            _cc = RegexCharClass.Parse(charClass); 

            _nullable = nullable;
            _caseInsensitive = caseInsensitive;
        } 

        internal bool AddFC(RegexFC fc, bool concatenate) { 
            if (!_cc.CanMerge || !fc._cc.CanMerge) { 
                return false;
            } 

            if (concatenate) {
                if (!_nullable)
                    return true; 

                if (!fc._nullable) 
                    _nullable = false; 
            }
            else { 
                if (fc._nullable)
                    _nullable = true;
            }
 
            _caseInsensitive |= fc._caseInsensitive;
            _cc.AddCharClass(fc._cc); 
            return true; 
        }
 
        internal String GetFirstChars(CultureInfo culture) {
            if (_caseInsensitive)
                _cc.AddLowercase(culture);
 
            return _cc.ToStringClass();
        } 
 
        internal bool IsCaseInsensitive() {
            return _caseInsensitive; 
        }
    }

    internal sealed class RegexPrefix { 
        internal String _prefix;
        internal bool _caseInsensitive; 
 
        internal static RegexPrefix _empty = new RegexPrefix(String.Empty, false);
 
        internal RegexPrefix(String prefix, bool ci) {
            _prefix = prefix;
            _caseInsensitive = ci;
        } 

        internal String Prefix { 
            get { 
                return _prefix;
            } 
        }

        internal bool CaseInsensitive {
            get { 
                return _caseInsensitive;
            } 
        } 
        internal static RegexPrefix Empty {
            get { 
                return _empty;
            }
        }
    } 
}

// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
                        

Link Menu

Network programming in C#, Network Programming in VB.NET, Network Programming in .NET
This book is available now!
Buy at Amazon US or
Buy at Amazon UK