();
+ _stack.Push(NewState(start));
+
+ while (_stack.Count > 0)
+ {
+ ParseState state = _stack.Peek();
+ ProductionPattern pattern = state.pattern;
+ var defaultAlt = pattern.DefaultAlternative;
+ ProductionPattern nextpattern = null;
+ while (state.altindex < pattern.Count)
+ {
+ var alt = pattern[state.altindex];
+ if (state.validnext || (defaultAlt != alt && IsNext(alt)))
+ {
+ state.validnext = true;
+ nextpattern = ParseAlternative(state, alt);
+ break;
+ }
+ else
+ {
+ state.altindex++;
+ state.validnext = false;
+ }
+ }
+
+ // check if completed pass through alt patterns. try default
+ if (state.altindex >= pattern.Count)
+ {
+ if (!state.validnext && (defaultAlt == null || !IsNext(defaultAlt)))
+ {
+ ThrowParseException(FindUnion(pattern));
+ }
+ else
+ {
+ state.validnext = true;
+ nextpattern = ParseAlternative(state, defaultAlt);
+ }
+ }
+
+ if (nextpattern != null)
+ {
+ _stack.Push(NewState(nextpattern));
+ }
+
+ // we finished current pattern, so back up to previous state.
+ else
+ {
+ // if we have a node set, add it to the parent
+ var child = state.node;
+ _stack.Pop();
+ if (_stack.Count == 0)
+ {
+ // back to top, can return our result, which is top node
+ return child;
+ }
+ state = _stack.Peek();
+ AddNode((Production)state.node, child);
+ }
+ }
+
+ // should never get here, but must show we return something.
+ return null;
+ }
+
+ /**
+ * return the pattern to push onto stack and process next.
+ */
+ private ProductionPattern ParseAlternative(ParseState state, ProductionPatternAlternative alt)
+ {
+ if (state.node == null)
+ {
+ state.node = NewProduction(alt.Pattern);
+ state.elementindex = 0;
+ EnterNode(state.node);
+ }
+ while (state.elementindex < alt.Count)
+ {
+ try
+ {
+ var pattern = ParseElement(state, alt[state.elementindex]);
+ if (pattern == null)
+ state.elementindex++;
+ else
+ return pattern;
+ }
+ catch (ParseException e)
+ {
+ AddError(e, true);
+ NextToken();
+ }
+ }
+
+ state.node = ExitNode(state.node);
+ return null;
+ }
+
+ private ProductionPattern ParseElement(ParseState state,
+ ProductionPatternElement elem)
+ {
+ for (int i = state.tokenindex; i < elem.MaxCount; i++)
+ {
+ if (i < elem.MinCount || IsNext(elem))
+ {
+ Node child;
+ if (elem.IsToken())
+ {
+ child = NextToken(elem.Id);
+ EnterNode(child);
+ AddNode((Production)state.node, ExitNode(child));
+ }
+ else
+ {
+ // continue from next token when we return
+ state.tokenindex = i + 1;
+ // return to start processing the new pattern at this state
+ return GetPattern(elem.Id); ;
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+ //
+ // we completed processing this element
+ state.tokenindex = 0;
+ return null;
+ }
+
+ private bool IsNext(ProductionPattern pattern)
+ {
+ LookAheadSet set = pattern.LookAhead;
+
+ if (set == null)
+ {
+ return false;
+ }
+ else
+ {
+ return set.IsNext(this);
+ }
+ }
+
+ private bool IsNext(ProductionPatternAlternative alt)
+ {
+ LookAheadSet set = alt.LookAhead;
+
+ if (set == null)
+ {
+ return false;
+ }
+ else
+ {
+ return set.IsNext(this);
+ }
+ }
+
+ private bool IsNext(ProductionPatternElement elem)
+ {
+ LookAheadSet set = elem.LookAhead;
+
+ if (set != null)
+ {
+ return set.IsNext(this);
+ }
+ else if (elem.IsToken())
+ {
+ return elem.IsMatch(PeekToken(0));
+ }
+ else
+ {
+ return IsNext(GetPattern(elem.Id));
+ }
+ }
+
+ private void CalculateLookAhead(ProductionPattern pattern)
+ {
+ ProductionPatternAlternative alt;
+ LookAheadSet previous = new LookAheadSet(0);
+ int length = 1;
+ int i;
+ CallStack stack = new CallStack();
+
+ // Calculate simple look-ahead
+ stack.Push(pattern.Name, 1);
+ var result = new LookAheadSet(1);
+ var alternatives = new LookAheadSet[pattern.Count];
+ for (i = 0; i < pattern.Count; i++)
+ {
+ alt = pattern[i];
+ alternatives[i] = FindLookAhead(alt, 1, 0, stack, null);
+ alt.LookAhead = alternatives[i];
+ result.AddAll(alternatives[i]);
+ }
+ if (pattern.LookAhead == null)
+ {
+ pattern.LookAhead = result;
+ }
+ var conflicts = FindConflicts(pattern, 1);
+
+ // Resolve conflicts
+ while (conflicts.Size() > 0)
+ {
+ length++;
+ stack.Clear();
+ stack.Push(pattern.Name, length);
+ conflicts.AddAll(previous);
+ for (i = 0; i < pattern.Count; i++)
+ {
+ alt = pattern[i];
+ if (alternatives[i].Intersects(conflicts))
+ {
+ alternatives[i] = FindLookAhead(alt,
+ length,
+ 0,
+ stack,
+ conflicts);
+ alt.LookAhead = alternatives[i];
+ }
+ if (alternatives[i].Intersects(conflicts))
+ {
+ if (pattern.DefaultAlternative == null)
+ {
+ pattern.DefaultAlternative = alt;
+ }
+ else if (pattern.DefaultAlternative != alt)
+ {
+ result = alternatives[i].CreateIntersection(conflicts);
+ ThrowAmbiguityException(pattern.Name,
+ null,
+ result);
+ }
+ }
+ }
+ previous = conflicts;
+ conflicts = FindConflicts(pattern, length);
+ }
+
+ // Resolve conflicts inside rules
+ for (i = 0; i < pattern.Count; i++)
+ {
+ CalculateLookAhead(pattern[i], 0);
+ }
+ }
+
+ private void CalculateLookAhead(ProductionPatternAlternative alt,
+ int pos)
+ {
+ LookAheadSet previous = new LookAheadSet(0);
+ int length = 1;
+
+ // Check trivial cases
+ if (pos >= alt.Count)
+ {
+ return;
+ }
+
+ // Check for non-optional element
+ var pattern = alt.Pattern;
+ var elem = alt[pos];
+ if (elem.MinCount == elem.MaxCount)
+ {
+ CalculateLookAhead(alt, pos + 1);
+ return;
+ }
+
+ // Calculate simple look-aheads
+ var first = FindLookAhead(elem, 1, new CallStack(), null);
+ var follow = FindLookAhead(alt, 1, pos + 1, new CallStack(), null);
+
+ // Resolve conflicts
+ var location = "at position " + (pos + 1);
+ var conflicts = FindConflicts(pattern.Name,
+ location,
+ first,
+ follow);
+ while (conflicts.Size() > 0)
+ {
+ length++;
+ conflicts.AddAll(previous);
+ first = FindLookAhead(elem,
+ length,
+ new CallStack(),
+ conflicts);
+ follow = FindLookAhead(alt,
+ length,
+ pos + 1,
+ new CallStack(),
+ conflicts);
+ first = first.CreateCombination(follow);
+ elem.LookAhead = first;
+ if (first.Intersects(conflicts))
+ {
+ first = first.CreateIntersection(conflicts);
+ ThrowAmbiguityException(pattern.Name, location, first);
+ }
+ previous = conflicts;
+ conflicts = FindConflicts(pattern.Name,
+ location,
+ first,
+ follow);
+ }
+
+ // Check remaining elements
+ CalculateLookAhead(alt, pos + 1);
+ }
+
+ private LookAheadSet FindLookAhead(ProductionPattern pattern,
+ int length,
+ CallStack stack,
+ LookAheadSet filter)
+ {
+ // Check for infinite loop
+ if (stack.Contains(pattern.Name, length))
+ {
+ throw new ParserCreationException(
+ ParserCreationException.ErrorType.INFINITE_LOOP,
+ pattern.Name,
+ (String)null);
+ }
+
+ // Find pattern look-ahead
+ stack.Push(pattern.Name, length);
+ var result = new LookAheadSet(length);
+ for (int i = 0; i < pattern.Count; i++)
+ {
+ var temp = FindLookAhead(pattern[i],
+ length,
+ 0,
+ stack,
+ filter);
+ result.AddAll(temp);
+ }
+ stack.Pop();
+
+ return result;
+ }
+
+ private LookAheadSet FindLookAhead(ProductionPatternAlternative alt,
+ int length,
+ int pos,
+ CallStack stack,
+ LookAheadSet filter)
+ {
+ LookAheadSet follow;
+ // Check trivial cases
+ if (length <= 0 || pos >= alt.Count)
+ {
+ return new LookAheadSet(0);
+ }
+
+ // Find look-ahead for this element
+ var first = FindLookAhead(alt[pos], length, stack, filter);
+ if (alt[pos].MinCount == 0)
+ {
+ first.AddEmpty();
+ }
+
+ // Find remaining look-ahead
+ if (filter == null)
+ {
+ length -= first.GetMinLength();
+ if (length > 0)
+ {
+ follow = FindLookAhead(alt, length, pos + 1, stack, null);
+ first = first.CreateCombination(follow);
+ }
+ }
+ else if (filter.IsOverlap(first))
+ {
+ var overlaps = first.CreateOverlaps(filter);
+ length -= overlaps.GetMinLength();
+ filter = filter.CreateFilter(overlaps);
+ follow = FindLookAhead(alt, length, pos + 1, stack, filter);
+ first.RemoveAll(overlaps);
+ first.AddAll(overlaps.CreateCombination(follow));
+ }
+
+ return first;
+ }
+
+ private LookAheadSet FindLookAhead(ProductionPatternElement elem,
+ int length,
+ CallStack stack,
+ LookAheadSet filter)
+ {
+ // Find initial element look-ahead
+ var first = FindLookAhead(elem, length, 0, stack, filter);
+ var result = new LookAheadSet(length);
+ result.AddAll(first);
+ if (filter == null || !filter.IsOverlap(result))
+ {
+ return result;
+ }
+
+ // Handle element repetitions
+ if (elem.MaxCount == Int32.MaxValue)
+ {
+ first = first.CreateRepetitive();
+ }
+ var max = elem.MaxCount;
+ if (length < max)
+ {
+ max = length;
+ }
+ for (int i = 1; i < max; i++)
+ {
+ first = first.CreateOverlaps(filter);
+ if (first.Size() <= 0 || first.GetMinLength() >= length)
+ {
+ break;
+ }
+ var follow = FindLookAhead(elem,
+ length,
+ 0,
+ stack,
+ filter.CreateFilter(first));
+ first = first.CreateCombination(follow);
+ result.AddAll(first);
+ }
+
+ return result;
+ }
+
+ private LookAheadSet FindLookAhead(ProductionPatternElement elem,
+ int length,
+ int dummy,
+ CallStack stack,
+ LookAheadSet filter)
+ {
+ LookAheadSet result;
+
+ if (elem.IsToken())
+ {
+ result = new LookAheadSet(length);
+ result.Add(elem.Id);
+ }
+ else
+ {
+ var pattern = GetPattern(elem.Id);
+ result = FindLookAhead(pattern, length, stack, filter);
+ if (stack.Contains(pattern.Name))
+ {
+ result = result.CreateRepetitive();
+ }
+ }
+
+ return result;
+ }
+
+ private LookAheadSet FindConflicts(ProductionPattern pattern,
+ int maxLength)
+ {
+
+ LookAheadSet result = new LookAheadSet(maxLength);
+ for (int i = 0; i < pattern.Count; i++)
+ {
+ var set1 = pattern[i].LookAhead;
+ for (int j = 0; j < i; j++)
+ {
+ var set2 = pattern[j].LookAhead;
+ result.AddAll(set1.CreateIntersection(set2));
+ }
+ }
+ if (result.IsRepetitive())
+ {
+ ThrowAmbiguityException(pattern.Name, null, result);
+ }
+ return result;
+ }
+
+ private LookAheadSet FindConflicts(string pattern,
+ string location,
+ LookAheadSet set1,
+ LookAheadSet set2)
+ {
+ var result = set1.CreateIntersection(set2);
+ if (result.IsRepetitive())
+ {
+ ThrowAmbiguityException(pattern, location, result);
+ }
+ return result;
+ }
+
+ private LookAheadSet FindUnion(ProductionPattern pattern)
+ {
+ LookAheadSet result;
+ int length = 0;
+ int i;
+
+ for (i = 0; i < pattern.Count; i++)
+ {
+ result = pattern[i].LookAhead;
+ if (result.GetMaxLength() > length)
+ {
+ length = result.GetMaxLength();
+ }
+ }
+ result = new LookAheadSet(length);
+ for (i = 0; i < pattern.Count; i++)
+ {
+ result.AddAll(pattern[i].LookAhead);
+ }
+
+ return result;
+ }
+
+
+ private void ThrowParseException(LookAheadSet set)
+ {
+ ArrayList list = new ArrayList();
+
+ // Read tokens until mismatch
+ while (set.IsNext(this, 1))
+ {
+ set = set.CreateNextSet(NextToken().Id);
+ }
+
+ // Find next token descriptions
+ var initials = set.GetInitialTokens();
+ for (int i = 0; i < initials.Length; i++)
+ {
+ list.Add(GetTokenDescription(initials[i]));
+ }
+
+ // Create exception
+ var token = NextToken();
+ throw new ParseException(ParseException.ErrorType.UNEXPECTED_TOKEN,
+ token.ToShortString(),
+ list,
+ token.StartLine,
+ token.StartColumn);
+ }
+
+ private void ThrowAmbiguityException(string pattern,
+ string location,
+ LookAheadSet set)
+ {
+
+ ArrayList list = new ArrayList();
+
+ // Find next token descriptions
+ var initials = set.GetInitialTokens();
+ for (int i = 0; i < initials.Length; i++)
+ {
+ list.Add(GetTokenDescription(initials[i]));
+ }
+
+ // Create exception
+ throw new ParserCreationException(
+ ParserCreationException.ErrorType.INHERENT_AMBIGUITY,
+ pattern,
+ location,
+ list);
+ }
+
+
+ private class CallStack
+ {
+ private readonly ArrayList _nameStack = new ArrayList();
+ private readonly ArrayList _valueStack = new ArrayList();
+ public bool Contains(string name)
+ {
+ return _nameStack.Contains(name);
+ }
+
+ public bool Contains(string name, int value)
+ {
+ for (int i = 0; i < _nameStack.Count; i++)
+ {
+ if (_nameStack[i].Equals(name)
+ && _valueStack[i].Equals(value))
+ {
+
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public void Clear()
+ {
+ _nameStack.Clear();
+ _valueStack.Clear();
+ }
+
+ public void Push(string name, int value)
+ {
+ _nameStack.Add(name);
+ _valueStack.Add(value);
+ }
+
+ public void Pop()
+ {
+ if (_nameStack.Count > 0)
+ {
+ _nameStack.RemoveAt(_nameStack.Count - 1);
+ _valueStack.RemoveAt(_valueStack.Count - 1);
+ }
+ }
+ }
+ }
+}
diff --git a/Parsing/StringElement.cs b/Parsing/StringElement.cs
new file mode 100644
index 0000000..f1c525a
--- /dev/null
+++ b/Parsing/StringElement.cs
@@ -0,0 +1,64 @@
+namespace Flee.Parsing
+{
+ /**
+ * A regular expression string element. This element only matches
+ * an exact string. Once created, the string element is immutable.
+ */
+ internal class StringElement : Element
+ {
+ private readonly string _value;
+ public StringElement(char c)
+ : this(c.ToString())
+ {
+ }
+
+ public StringElement(string str)
+ {
+ _value = str;
+ }
+
+ public string GetString()
+ {
+ return _value;
+ }
+
+ public override object Clone()
+ {
+ return this;
+ }
+
+ public override int Match(Matcher m,
+ ReaderBuffer buffer,
+ int start,
+ int skip)
+ {
+ if (skip != 0)
+ {
+ return -1;
+ }
+ for (int i = 0; i < _value.Length; i++)
+ {
+ var c = buffer.Peek(start + i);
+ if (c < 0)
+ {
+ m.SetReadEndOfString();
+ return -1;
+ }
+ if (m.IsCaseInsensitive())
+ {
+ c = (int)Char.ToLower((char)c);
+ }
+ if (c != (int)_value[i])
+ {
+ return -1;
+ }
+ }
+ return _value.Length;
+ }
+
+ public override void PrintTo(TextWriter output, string indent)
+ {
+ output.WriteLine(indent + "'" + _value + "'");
+ }
+ }
+}
diff --git a/Parsing/Token.cs b/Parsing/Token.cs
new file mode 100644
index 0000000..3386c9f
--- /dev/null
+++ b/Parsing/Token.cs
@@ -0,0 +1,168 @@
+using System.Text;
+
+namespace Flee.Parsing
+{
+ /**
+ * A token node. This class represents a token (i.e. a set of adjacent
+ * characters) in a parse tree. The tokens are created by a tokenizer,
+ * that groups characters together into tokens according to a set of
+ * token patterns.
+ */
+ internal class Token : Node
+ {
+ private readonly TokenPattern _pattern;
+ private readonly string _image;
+ private readonly int _startLine;
+ private readonly int _startColumn;
+ private readonly int _endLine;
+ private readonly int _endColumn;
+ private Token _previous = null;
+ private Token _next = null;
+
+ public Token(TokenPattern pattern, string image, int line, int col)
+ {
+ this._pattern = pattern;
+ this._image = image;
+ this._startLine = line;
+ this._startColumn = col;
+ this._endLine = line;
+ this._endColumn = col + image.Length - 1;
+ for (int pos = 0; image.IndexOf('\n', pos) >= 0;)
+ {
+ pos = image.IndexOf('\n', pos) + 1;
+ this._endLine++;
+ _endColumn = image.Length - pos;
+ }
+ }
+
+ public override int Id => _pattern.Id;
+
+ public override string Name => _pattern.Name;
+
+ public override int StartLine => _startLine;
+
+ public override int StartColumn => _startColumn;
+
+ public override int EndLine => _endLine;
+
+ public override int EndColumn => _endColumn;
+
+ public string Image => _image;
+
+ public string GetImage()
+ {
+ return Image;
+ }
+
+ internal TokenPattern Pattern => _pattern;
+ public Token Previous
+ {
+ get
+ {
+ return _previous;
+ }
+ set
+ {
+ if (_previous != null)
+ {
+ _previous._next = null;
+ }
+ _previous = value;
+ if (_previous != null)
+ {
+ _previous._next = this;
+ }
+ }
+ }
+
+ public Token GetPreviousToken()
+ {
+ return Previous;
+ }
+
+ public Token Next
+ {
+ get
+ {
+ return _next;
+ }
+ set
+ {
+ if (_next != null)
+ {
+ _next._previous = null;
+ }
+ _next = value;
+ if (_next != null)
+ {
+ _next._previous = this;
+ }
+ }
+ }
+
+ public Token GetNextToken()
+ {
+ return Next;
+ }
+
+ public override string ToString()
+ {
+ StringBuilder buffer = new StringBuilder();
+ int newline = _image.IndexOf('\n');
+
+ buffer.Append(_pattern.Name);
+ buffer.Append("(");
+ buffer.Append(_pattern.Id);
+ buffer.Append("): \"");
+ if (newline >= 0)
+ {
+ if (newline > 0 && _image[newline - 1] == '\r')
+ {
+ newline--;
+ }
+ buffer.Append(_image.Substring(0, newline));
+ buffer.Append("(...)");
+ }
+ else
+ {
+ buffer.Append(_image);
+ }
+ buffer.Append("\", line: ");
+ buffer.Append(_startLine);
+ buffer.Append(", col: ");
+ buffer.Append(_startColumn);
+
+ return buffer.ToString();
+ }
+
+ public string ToShortString()
+ {
+ StringBuilder buffer = new StringBuilder();
+ int newline = _image.IndexOf('\n');
+
+ buffer.Append('"');
+ if (newline >= 0)
+ {
+ if (newline > 0 && _image[newline - 1] == '\r')
+ {
+ newline--;
+ }
+ buffer.Append(_image.Substring(0, newline));
+ buffer.Append("(...)");
+ }
+ else
+ {
+ buffer.Append(_image);
+ }
+ buffer.Append('"');
+ if (_pattern.Type == TokenPattern.PatternType.REGEXP)
+ {
+ buffer.Append(" <");
+ buffer.Append(_pattern.Name);
+ buffer.Append(">");
+ }
+
+ return buffer.ToString();
+ }
+ }
+}
diff --git a/Parsing/TokenMatch.cs b/Parsing/TokenMatch.cs
new file mode 100644
index 0000000..fadc90d
--- /dev/null
+++ b/Parsing/TokenMatch.cs
@@ -0,0 +1,31 @@
+namespace Flee.Parsing
+{
+ /**
+ * The token match status. This class contains logic to ensure that
+ * only the longest match is considered.
+ */
+ internal class TokenMatch
+ {
+ private int _length = 0;
+ private TokenPattern _pattern = null;
+
+ public void Clear()
+ {
+ _length = 0;
+ _pattern = null;
+ }
+
+ public int Length => _length;
+
+ public TokenPattern Pattern => _pattern;
+
+ public void Update(int length, TokenPattern pattern)
+ {
+ if (this._length < length)
+ {
+ this._length = length;
+ this._pattern = pattern;
+ }
+ }
+ }
+}
diff --git a/Parsing/TokenNFA.cs b/Parsing/TokenNFA.cs
new file mode 100644
index 0000000..7d7c470
--- /dev/null
+++ b/Parsing/TokenNFA.cs
@@ -0,0 +1,825 @@
+namespace Flee.Parsing
+{
+ /**
+ * A non-deterministic finite state automaton (NFA) for matching
+ * tokens. It supports both fixed strings and simple regular
+ * expressions, but should perform similar to a DFA due to highly
+ * optimized data structures and tuning. The memory footprint during
+ * matching should be near zero, since no heap memory is allocated
+ * unless the pre-allocated queues need to be enlarged. The NFA also
+ * does not use recursion, but iterates in a loop instead.
+ */
+ internal class TokenNFA
+ {
+ private readonly NFAState[] _initialChar = new NFAState[128];
+ private readonly NFAState _initial = new NFAState();
+ private readonly NFAStateQueue _queue = new NFAStateQueue();
+
+ public void AddTextMatch(string str, bool ignoreCase, TokenPattern value)
+ {
+ NFAState state;
+ char ch = str[0];
+
+ if (ch < 128 && !ignoreCase)
+ {
+ state = _initialChar[ch];
+ if (state == null)
+ {
+ state = _initialChar[ch] = new NFAState();
+ }
+ }
+ else
+ {
+ state = _initial.AddOut(ch, ignoreCase, null);
+ }
+ for (int i = 1; i < str.Length; i++)
+ {
+ state = state.AddOut(str[i], ignoreCase, null);
+ }
+ state.Value = value;
+ }
+
+ public void AddRegExpMatch(string pattern,
+ bool ignoreCase,
+ TokenPattern value)
+ {
+ TokenRegExpParser parser = new TokenRegExpParser(pattern, ignoreCase);
+ string debug = "DFA regexp; " + parser.GetDebugInfo();
+
+ var isAscii = parser.Start.IsAsciiOutgoing();
+ for (int i = 0; isAscii && i < 128; i++)
+ {
+ bool match = false;
+ for (int j = 0; j < parser.Start.Outgoing.Length; j++)
+ {
+ if (parser.Start.Outgoing[j].Match((char)i))
+ {
+ if (match)
+ {
+ isAscii = false;
+ break;
+ }
+ match = true;
+ }
+ }
+ if (match && _initialChar[i] != null)
+ {
+ isAscii = false;
+ }
+ }
+ if (parser.Start.Incoming.Length > 0)
+ {
+ _initial.AddOut(new NFAEpsilonTransition(parser.Start));
+ debug += ", uses initial epsilon";
+ }
+ else if (isAscii && !ignoreCase)
+ {
+ for (int i = 0; isAscii && i < 128; i++)
+ {
+ for (int j = 0; j < parser.Start.Outgoing.Length; j++)
+ {
+ if (parser.Start.Outgoing[j].Match((char)i))
+ {
+ _initialChar[i] = parser.Start.Outgoing[j].State;
+ }
+ }
+ }
+ debug += ", uses ASCII lookup";
+ }
+ else
+ {
+ parser.Start.MergeInto(_initial);
+ debug += ", uses initial state";
+ }
+ parser.End.Value = value;
+ value.DebugInfo = debug;
+ }
+
+ public int Match(ReaderBuffer buffer, TokenMatch match)
+ {
+ int length = 0;
+ int pos = 1;
+ NFAState state;
+
+ // The first step of the match loop has been unrolled and
+ // optimized for performance below.
+ this._queue.Clear();
+ var peekChar = buffer.Peek(0);
+ if (0 <= peekChar && peekChar < 128)
+ {
+ state = this._initialChar[peekChar];
+ if (state != null)
+ {
+ this._queue.AddLast(state);
+ }
+ }
+ if (peekChar >= 0)
+ {
+ this._initial.MatchTransitions((char)peekChar, this._queue, true);
+ }
+ this._queue.MarkEnd();
+ peekChar = buffer.Peek(1);
+
+ // The remaining match loop processes all subsequent states
+ while (!this._queue.Empty)
+ {
+ if (this._queue.Marked)
+ {
+ pos++;
+ peekChar = buffer.Peek(pos);
+ this._queue.MarkEnd();
+ }
+ state = this._queue.RemoveFirst();
+ if (state.Value != null)
+ {
+ match.Update(pos, state.Value);
+ }
+ if (peekChar >= 0)
+ {
+ state.MatchTransitions((char)peekChar, this._queue, false);
+ }
+ }
+ return length;
+ }
+ }
+
+
+ /**
+ * An NFA state. The NFA consists of a series of states, each
+ * having zero or more transitions to other states.
+ */
+ internal class NFAState
+ {
+ internal TokenPattern Value = null;
+ internal NFATransition[] Incoming = new NFATransition[0];
+ internal NFATransition[] Outgoing = new NFATransition[0];
+ internal bool EpsilonOut = false;
+
+ public bool HasTransitions()
+ {
+ return Incoming.Length > 0 || Outgoing.Length > 0;
+ }
+ public bool IsAsciiOutgoing()
+ {
+ for (int i = 0; i < Outgoing.Length; i++)
+ {
+ if (!Outgoing[i].IsAscii())
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public void AddIn(NFATransition trans)
+ {
+ Array.Resize(ref Incoming, Incoming.Length + 1);
+ Incoming[Incoming.Length - 1] = trans;
+ }
+
+ public NFAState AddOut(char ch, bool ignoreCase, NFAState state)
+ {
+ if (ignoreCase)
+ {
+ if (state == null)
+ {
+ state = new NFAState();
+ }
+ AddOut(new NFACharTransition(Char.ToLower(ch), state));
+ AddOut(new NFACharTransition(Char.ToUpper(ch), state));
+ return state;
+ }
+ else
+ {
+ if (state == null)
+ {
+ state = FindUniqueCharTransition(ch);
+ if (state != null)
+ {
+ return state;
+ }
+ state = new NFAState();
+ }
+ return AddOut(new NFACharTransition(ch, state));
+ }
+ }
+
+ public NFAState AddOut(NFATransition trans)
+ {
+ Array.Resize(ref Outgoing, Outgoing.Length + 1);
+ Outgoing[Outgoing.Length - 1] = trans;
+ if (trans is NFAEpsilonTransition)
+ {
+ EpsilonOut = true;
+ }
+ return trans.State;
+ }
+
+ public void MergeInto(NFAState state)
+ {
+ for (int i = 0; i < Incoming.Length; i++)
+ {
+ state.AddIn(Incoming[i]);
+ Incoming[i].State = state;
+ }
+ Incoming = null;
+ for (int i = 0; i < Outgoing.Length; i++)
+ {
+ state.AddOut(Outgoing[i]);
+ }
+ Outgoing = null;
+ }
+
+ private NFAState FindUniqueCharTransition(char ch)
+ {
+ NFATransition res = null;
+ NFATransition trans;
+
+ for (int i = 0; i < Outgoing.Length; i++)
+ {
+ trans = Outgoing[i];
+ if (trans.Match(ch) && trans is NFACharTransition)
+ {
+ if (res != null)
+ {
+ return null;
+ }
+ res = trans;
+ }
+ }
+ for (int i = 0; res != null && i < Outgoing.Length; i++)
+ {
+ trans = Outgoing[i];
+ if (trans != res && trans.State == res.State)
+ {
+ return null;
+ }
+ }
+ return res?.State;
+ }
+
+ public void MatchTransitions(char ch, NFAStateQueue queue, bool initial)
+ {
+ for (int i = 0; i < Outgoing.Length; i++)
+ {
+ var trans = Outgoing[i];
+ var target = trans.State;
+ if (initial && trans is NFAEpsilonTransition)
+ {
+ target.MatchTransitions(ch, queue, true);
+ }
+ else if (trans.Match(ch))
+ {
+ queue.AddLast(target);
+ if (target.EpsilonOut)
+ {
+ target.MatchEmpty(queue);
+ }
+ }
+ }
+ }
+
+ public void MatchEmpty(NFAStateQueue queue)
+ {
+ for (int i = 0; i < Outgoing.Length; i++)
+ {
+ var trans = Outgoing[i];
+ if (trans is NFAEpsilonTransition)
+ {
+ var target = trans.State;
+ queue.AddLast(target);
+ if (target.EpsilonOut)
+ {
+ target.MatchEmpty(queue);
+ }
+ }
+ }
+ }
+ }
+
+
+ /**
+ * An NFA state transition. A transition checks a single
+ * character of input an determines if it is a match. If a match
+ * is encountered, the NFA should move forward to the transition
+ * state.
+ */
+ internal abstract class NFATransition
+ {
+
+ internal NFAState State;
+
+ protected NFATransition(NFAState state)
+ {
+ this.State = state;
+ this.State.AddIn(this);
+ }
+
+ public abstract bool IsAscii();
+
+ public abstract bool Match(char ch);
+
+ public abstract NFATransition Copy(NFAState state);
+ }
+
+
+ /**
+ * The special epsilon transition. This transition matches the
+ * empty input, i.e. it is an automatic transition that doesn't
+ * read any input. As such, it returns false in the match method
+ * and is handled specially everywhere.
+ */
+ internal class NFAEpsilonTransition : NFATransition
+ {
+ public NFAEpsilonTransition(NFAState state) : base(state)
+ {
+ }
+
+ public override bool IsAscii()
+ {
+ return false;
+ }
+
+ public override bool Match(char ch)
+ {
+ return false;
+ }
+
+ public override NFATransition Copy(NFAState state)
+ {
+ return new NFAEpsilonTransition(state);
+ }
+ }
+
+
+ /**
+ * A single character match transition.
+ */
+ internal class NFACharTransition : NFATransition
+ {
+ private readonly char _match;
+
+ public NFACharTransition(char match, NFAState state) : base(state)
+ {
+ _match = match;
+ }
+
+ public override bool IsAscii()
+ {
+ return 0 <= _match && _match < 128;
+ }
+
+ public override bool Match(char ch)
+ {
+ return this._match == ch;
+ }
+
+ public override NFATransition Copy(NFAState state)
+ {
+ return new NFACharTransition(_match, state);
+ }
+ }
+
+
+ /**
+ * A character range match transition. Used for user-defined
+ * character sets in regular expressions.
+ */
+ internal class NFACharRangeTransition : NFATransition
+ {
+
+ protected bool Inverse;
+ protected bool IgnoreCase;
+
+ private object[] _contents = new object[0];
+
+ public NFACharRangeTransition(bool inverse,
+ bool ignoreCase,
+ NFAState state) : base(state)
+ {
+ this.Inverse = inverse;
+ this.IgnoreCase = ignoreCase;
+ }
+
+ public override bool IsAscii()
+ {
+ if (Inverse)
+ {
+ return false;
+ }
+ for (int i = 0; i < _contents.Length; i++)
+ {
+ var obj = _contents[i];
+ if (obj is char)
+ {
+ var c = (char)obj;
+ if (c < 0 || 128 <= c)
+ {
+ return false;
+ }
+ }
+ else if (obj is Range)
+ {
+ if (!((Range)obj).IsAscii())
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ public void AddCharacter(char c)
+ {
+ if (IgnoreCase)
+ {
+ c = Char.ToLower(c);
+ }
+ AddContent(c);
+ }
+
+ public void AddRange(char min, char max)
+ {
+ if (IgnoreCase)
+ {
+ min = Char.ToLower(min);
+ max = Char.ToLower(max);
+ }
+ AddContent(new Range(min, max));
+ }
+
+ private void AddContent(Object obj)
+ {
+ Array.Resize(ref _contents, _contents.Length + 1);
+ _contents[_contents.Length - 1] = obj;
+ }
+
+ public override bool Match(char ch)
+ {
+ object obj;
+ char c;
+ Range r;
+
+ if (IgnoreCase)
+ {
+ ch = Char.ToLower(ch);
+ }
+ for (int i = 0; i < _contents.Length; i++)
+ {
+ obj = _contents[i];
+ if (obj is char)
+ {
+ c = (char)obj;
+ if (c == ch)
+ {
+ return !Inverse;
+ }
+ }
+ else if (obj is Range)
+ {
+ r = (Range)obj;
+ if (r.Inside(ch))
+ {
+ return !Inverse;
+ }
+ }
+ }
+ return Inverse;
+ }
+
+ public override NFATransition Copy(NFAState state)
+ {
+ var copy = new NFACharRangeTransition(Inverse, IgnoreCase, state) { _contents = _contents };
+ return copy;
+ }
+
+ private class Range
+ {
+ private readonly char _min;
+ private readonly char _max;
+
+ public Range(char min, char max)
+ {
+ this._min = min;
+ this._max = max;
+ }
+
+ public bool IsAscii()
+ {
+ return 0 <= _min && _min < 128 &&
+ 0 <= _max && _max < 128;
+ }
+
+ public bool Inside(char c)
+ {
+ return _min <= c && c <= _max;
+ }
+ }
+ }
+
+
+ /**
+ * The dot ('.') character set transition. This transition
+ * matches a single character that is not equal to a newline
+ * character.
+ */
+ internal class NFADotTransition : NFATransition
+ {
+ public NFADotTransition(NFAState state) : base(state)
+ {
+ }
+
+ public override bool IsAscii()
+ {
+ return false;
+ }
+
+ public override bool Match(char ch)
+ {
+ switch (ch)
+ {
+ case '\n':
+ case '\r':
+ case '\u0085':
+ case '\u2028':
+ case '\u2029':
+ return false;
+ default:
+ return true;
+ }
+ }
+
+ public override NFATransition Copy(NFAState state)
+ {
+ return new NFADotTransition(state);
+ }
+ }
+
+
+ /**
+ * The digit character set transition. This transition matches a
+ * single numeric character.
+ */
+ internal class NFADigitTransition : NFATransition
+ {
+ public NFADigitTransition(NFAState state) : base(state)
+ {
+ }
+
+ public override bool IsAscii()
+ {
+ return true;
+ }
+
+ public override bool Match(char ch)
+ {
+ return '0' <= ch && ch <= '9';
+ }
+
+ public override NFATransition Copy(NFAState state)
+ {
+ return new NFADigitTransition(state);
+ }
+ }
+
+
+ /**
+ * The non-digit character set transition. This transition
+ * matches a single non-numeric character.
+ */
+ internal class NFANonDigitTransition : NFATransition
+ {
+ public NFANonDigitTransition(NFAState state) : base(state)
+ {
+ }
+
+ public override bool IsAscii()
+ {
+ return false;
+ }
+
+ public override bool Match(char ch)
+ {
+ return ch < '0' || '9' < ch;
+ }
+
+ public override NFATransition Copy(NFAState state)
+ {
+ return new NFANonDigitTransition(state);
+ }
+ }
+
+ /**
+ * The whitespace character set transition. This transition
+ * matches a single whitespace character.
+ */
+ internal class NFAWhitespaceTransition : NFATransition
+ {
+ public NFAWhitespaceTransition(NFAState state) : base(state)
+ {
+ }
+
+ public override bool IsAscii()
+ {
+ return true;
+ }
+
+ public override bool Match(char ch)
+ {
+ switch (ch)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\f':
+ case '\r':
+ case (char)11:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ public override NFATransition Copy(NFAState state)
+ {
+ return new NFAWhitespaceTransition(state);
+ }
+ }
+
+
+ /**
+ * The non-whitespace character set transition. This transition
+ * matches a single non-whitespace character.
+ */
+ internal class NFANonWhitespaceTransition : NFATransition
+ {
+
+ public NFANonWhitespaceTransition(NFAState state) : base(state)
+ {
+ }
+
+ public override bool IsAscii()
+ {
+ return false;
+ }
+
+ public override bool Match(char ch)
+ {
+ switch (ch)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\f':
+ case '\r':
+ case (char)11:
+ return false;
+ default:
+ return true;
+ }
+ }
+
+ public override NFATransition Copy(NFAState state)
+ {
+ return new NFANonWhitespaceTransition(state);
+ }
+ }
+
+
+ /**
+ * The word character set transition. This transition matches a
+ * single word character.
+ */
+ internal class NFAWordTransition : NFATransition
+ {
+
+ public NFAWordTransition(NFAState state) : base(state)
+ {
+ }
+
+ public override bool IsAscii()
+ {
+ return true;
+ }
+
+
+ public override bool Match(char ch)
+ {
+ return ('a' <= ch && ch <= 'z')
+ || ('A' <= ch && ch <= 'Z')
+ || ('0' <= ch && ch <= '9')
+ || ch == '_';
+ }
+
+ public override NFATransition Copy(NFAState state)
+ {
+ return new NFAWordTransition(state);
+ }
+ }
+
+
+ /**
+ * The non-word character set transition. This transition matches
+ * a single non-word character.
+ */
+ internal class NFANonWordTransition : NFATransition
+ {
+ public NFANonWordTransition(NFAState state) : base(state)
+ {
+ }
+
+ public override bool IsAscii()
+ {
+ return false;
+ }
+
+ public override bool Match(char ch)
+ {
+ bool word = ('a' <= ch && ch <= 'z')
+ || ('A' <= ch && ch <= 'Z')
+ || ('0' <= ch && ch <= '9')
+ || ch == '_';
+ return !word;
+ }
+
+ public override NFATransition Copy(NFAState state)
+ {
+ return new NFANonWordTransition(state);
+ }
+ }
+
+
+ /**
+ * An NFA state queue. This queue is used during processing to
+ * keep track of the current and subsequent NFA states. The
+ * current state is read from the beginning of the queue, and new
+ * states are added at the end. A marker index is used to
+ * separate the current from the subsequent states.
+ *
+ * The queue implementation is optimized for quick removal at the
+ * beginning and addition at the end. It will attempt to use a
+ * fixed-size array to store the whole queue, and moves the data
+ * in this array only when absolutely needed. The array is also
+ * enlarged automatically if too many states are being processed
+ * at a single time.
+ */
+ internal class NFAStateQueue
+ {
+
+ private NFAState[] _queue = new NFAState[2048];
+
+ private int _first = 0;
+
+ private int _last = 0;
+
+ private int _mark = 0;
+
+ public bool Empty => (_last <= _first);
+
+ public bool Marked => _first == _mark;
+
+ public void Clear()
+ {
+ _first = 0;
+ _last = 0;
+ _mark = 0;
+ }
+
+ public void MarkEnd()
+ {
+ _mark = _last;
+ }
+
+ public NFAState RemoveFirst()
+ {
+ if (_first < _last)
+ {
+ _first++;
+ return _queue[_first - 1];
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public void AddLast(NFAState state)
+ {
+ if (_last >= _queue.Length)
+ {
+ if (_first <= 0)
+ {
+ Array.Resize(ref _queue, _queue.Length * 2);
+ }
+ else
+ {
+ Array.Copy(_queue, _first, _queue, 0, _last - _first);
+ _last -= _first;
+ _mark -= _first;
+ _first = 0;
+ }
+ }
+ _queue[_last++] = state;
+ }
+ }
+}
diff --git a/Parsing/TokenPattern.cs b/Parsing/TokenPattern.cs
new file mode 100644
index 0000000..7213c97
--- /dev/null
+++ b/Parsing/TokenPattern.cs
@@ -0,0 +1,303 @@
+using System.Text;
+
+namespace Flee.Parsing
+{
+ /**
+ * A token pattern. This class contains the definition of a token
+ * (i.e. it's pattern), and allows testing a string against this
+ * pattern. A token pattern is uniquely identified by an integer id,
+ * that must be provided upon creation.
+ *
+
+ */
+ internal class TokenPattern
+ {
+ public enum PatternType
+ {
+
+ /**
+ * The string pattern type is used for tokens that only
+ * match an exact string.
+ */
+ STRING,
+
+ /**
+ * The regular expression pattern type is used for tokens
+ * that match a regular expression.
+ */
+ REGEXP
+ }
+
+ private int _id;
+ private string _name;
+ private PatternType _type;
+ private string _pattern;
+ private bool _error;
+ private string _errorMessage;
+ private bool _ignore;
+ private string _ignoreMessage;
+ private string _debugInfo;
+
+ public TokenPattern(int id,
+ string name,
+ PatternType type,
+ string pattern)
+ {
+
+ this._id = id;
+ this._name = name;
+ this._type = type;
+ this._pattern = pattern;
+ }
+
+ public int Id
+ {
+ get
+ {
+ return _id;
+ }
+ set { _id = value; }
+ }
+
+ public int GetId()
+ {
+ return _id;
+ }
+
+ public string Name
+ {
+ get
+ {
+ return _name;
+ }
+ set { _name = value; }
+ }
+
+ public string GetName()
+ {
+ return _name;
+ }
+
+ public PatternType Type
+ {
+ get
+ {
+ return _type;
+ }
+ set { _type = value; }
+ }
+
+ public PatternType GetPatternType()
+ {
+ return _type;
+ }
+
+ public string Pattern
+ {
+ get
+ {
+ return _pattern;
+ }
+ set { _pattern = value; }
+ }
+
+ public string GetPattern()
+ {
+ return _pattern;
+ }
+
+ public bool Error
+ {
+ get
+ {
+ return _error;
+ }
+ set
+ {
+ _error = value;
+ if (_error && _errorMessage == null)
+ {
+ _errorMessage = "unrecognized token found";
+ }
+ }
+ }
+
+ public string ErrorMessage
+ {
+ get
+ {
+ return _errorMessage;
+ }
+ set
+ {
+ _error = true;
+ _errorMessage = value;
+ }
+ }
+
+ public bool IsError()
+ {
+ return Error;
+ }
+
+ public string GetErrorMessage()
+ {
+ return ErrorMessage;
+ }
+
+ public void SetError()
+ {
+ Error = true;
+ }
+
+ public void SetError(string message)
+ {
+ ErrorMessage = message;
+ }
+
+ public bool Ignore
+ {
+ get
+ {
+ return _ignore;
+ }
+ set
+ {
+ _ignore = value;
+ }
+ }
+
+ public string IgnoreMessage
+ {
+ get
+ {
+ return _ignoreMessage;
+ }
+ set
+ {
+ _ignore = true;
+ _ignoreMessage = value;
+ }
+ }
+
+ public bool IsIgnore()
+ {
+ return Ignore;
+ }
+
+ public string GetIgnoreMessage()
+ {
+ return IgnoreMessage;
+ }
+
+
+ public void SetIgnore()
+ {
+ Ignore = true;
+ }
+
+
+ public void SetIgnore(string message)
+ {
+ IgnoreMessage = message;
+ }
+
+ public string DebugInfo
+ {
+ get
+ {
+ return _debugInfo;
+ }
+ set
+ {
+ _debugInfo = value;
+ }
+ }
+
+ public override string ToString()
+ {
+ StringBuilder buffer = new StringBuilder();
+
+ buffer.Append(_name);
+ buffer.Append(" (");
+ buffer.Append(_id);
+ buffer.Append("): ");
+ switch (_type)
+ {
+ case PatternType.STRING:
+ buffer.Append("\"");
+ buffer.Append(_pattern);
+ buffer.Append("\"");
+ break;
+ case PatternType.REGEXP:
+ buffer.Append("<<");
+ buffer.Append(_pattern);
+ buffer.Append(">>");
+ break;
+ }
+ if (_error)
+ {
+ buffer.Append(" ERROR: \"");
+ buffer.Append(_errorMessage);
+ buffer.Append("\"");
+ }
+ if (_ignore)
+ {
+ buffer.Append(" IGNORE");
+ if (_ignoreMessage != null)
+ {
+ buffer.Append(": \"");
+ buffer.Append(_ignoreMessage);
+ buffer.Append("\"");
+ }
+ }
+ if (_debugInfo != null)
+ {
+ buffer.Append("\n ");
+ buffer.Append(_debugInfo);
+ }
+ return buffer.ToString();
+ }
+
+ public string ToShortString()
+ {
+ StringBuilder buffer = new StringBuilder();
+ int newline = _pattern.IndexOf('\n');
+
+ if (_type == PatternType.STRING)
+ {
+ buffer.Append("\"");
+ if (newline >= 0)
+ {
+ if (newline > 0 && _pattern[newline - 1] == '\r')
+ {
+ newline--;
+ }
+ buffer.Append(_pattern.Substring(0, newline));
+ buffer.Append("(...)");
+ }
+ else
+ {
+ buffer.Append(_pattern);
+ }
+ buffer.Append("\"");
+ }
+ else
+ {
+ buffer.Append("<");
+ buffer.Append(_name);
+ buffer.Append(">");
+ }
+
+ return buffer.ToString();
+ }
+
+ public void SetData(int id, string name, PatternType type, string pattern)
+ {
+ Id = id;
+ Name = name;
+ Type = type;
+ Pattern = pattern;
+ }
+ }
+}
diff --git a/Parsing/TokenRegExpParser.cs b/Parsing/TokenRegExpParser.cs
new file mode 100644
index 0000000..061003f
--- /dev/null
+++ b/Parsing/TokenRegExpParser.cs
@@ -0,0 +1,545 @@
+using System.Collections;
+using System.Globalization;
+using System.Text;
+
+namespace Flee.Parsing
+{
+ /**
+ * A regular expression parser. The parser creates an NFA for the
+ * regular expression having a single start and acceptance states.
+ */
+ internal class TokenRegExpParser
+ {
+ private readonly string _pattern;
+ private readonly bool _ignoreCase;
+ private int _pos;
+ internal NFAState Start = new NFAState();
+ internal NFAState End;
+ private int _stateCount;
+ private int _transitionCount;
+ private int _epsilonCount;
+
+ public TokenRegExpParser(string pattern) : this(pattern, false)
+ {
+ }
+
+ public TokenRegExpParser(string pattern, bool ignoreCase)
+ {
+ this._pattern = pattern;
+ this._ignoreCase = ignoreCase;
+ this._pos = 0;
+ this.End = ParseExpr(Start);
+ if (_pos < pattern.Length)
+ {
+ throw new RegExpException(
+ RegExpException.ErrorType.UNEXPECTED_CHARACTER,
+ _pos,
+ pattern);
+ }
+ }
+
+ public string GetDebugInfo()
+ {
+ if (_stateCount == 0)
+ {
+ UpdateStats(Start, new Hashtable());
+ }
+ return _stateCount + " states, " +
+ _transitionCount + " transitions, " +
+ _epsilonCount + " epsilons";
+ }
+
+ private void UpdateStats(NFAState state, Hashtable visited)
+ {
+ if (!visited.ContainsKey(state))
+ {
+ visited.Add(state, state);
+ _stateCount++;
+ for (int i = 0; i < state.Outgoing.Length; i++)
+ {
+ _transitionCount++;
+ if (state.Outgoing[i] is NFAEpsilonTransition)
+ {
+ _epsilonCount++;
+ }
+ UpdateStats(state.Outgoing[i].State, visited);
+ }
+ }
+ }
+
+ private NFAState ParseExpr(NFAState start)
+ {
+ NFAState end = new NFAState();
+ do
+ {
+ if (PeekChar(0) == '|')
+ {
+ ReadChar('|');
+ }
+ var subStart = new NFAState();
+ var subEnd = ParseTerm(subStart);
+ if (subStart.Incoming.Length == 0)
+ {
+ subStart.MergeInto(start);
+ }
+ else
+ {
+ start.AddOut(new NFAEpsilonTransition(subStart));
+ }
+ if (subEnd.Outgoing.Length == 0 ||
+ (!end.HasTransitions() && PeekChar(0) != '|'))
+ {
+ subEnd.MergeInto(end);
+ }
+ else
+ {
+ subEnd.AddOut(new NFAEpsilonTransition(end));
+ }
+ } while (PeekChar(0) == '|');
+ return end;
+ }
+
+ private NFAState ParseTerm(NFAState start)
+ {
+ var end = ParseFact(start);
+ while (true)
+ {
+ switch (PeekChar(0))
+ {
+ case -1:
+ case ')':
+ case ']':
+ case '{':
+ case '}':
+ case '?':
+ case '+':
+ case '|':
+ return end;
+ default:
+ end = ParseFact(end);
+ break;
+ }
+ }
+ }
+
+ private NFAState ParseFact(NFAState start)
+ {
+ NFAState placeholder = new NFAState();
+
+ var end = ParseAtom(placeholder);
+ switch (PeekChar(0))
+ {
+ case '?':
+ case '*':
+ case '+':
+ case '{':
+ end = ParseAtomModifier(placeholder, end);
+ break;
+ }
+ if (placeholder.Incoming.Length > 0 && start.Outgoing.Length > 0)
+ {
+ start.AddOut(new NFAEpsilonTransition(placeholder));
+ return end;
+ }
+ else
+ {
+ placeholder.MergeInto(start);
+ return (end == placeholder) ? start : end;
+ }
+ }
+
+ private NFAState ParseAtom(NFAState start)
+ {
+ NFAState end;
+
+ switch (PeekChar(0))
+ {
+ case '.':
+ ReadChar('.');
+ return start.AddOut(new NFADotTransition(new NFAState()));
+ case '(':
+ ReadChar('(');
+ end = ParseExpr(start);
+ ReadChar(')');
+ return end;
+ case '[':
+ ReadChar('[');
+ end = ParseCharSet(start);
+ ReadChar(']');
+ return end;
+ case -1:
+ case ')':
+ case ']':
+ case '{':
+ case '}':
+ case '?':
+ case '*':
+ case '+':
+ case '|':
+ throw new RegExpException(
+ RegExpException.ErrorType.UNEXPECTED_CHARACTER,
+ _pos,
+ _pattern);
+ default:
+ return ParseChar(start);
+ }
+ }
+
+ private NFAState ParseAtomModifier(NFAState start, NFAState end)
+ {
+ int min = 0;
+ int max = -1;
+ int firstPos = _pos;
+
+ // Read min and max
+ switch (ReadChar())
+ {
+ case '?':
+ min = 0;
+ max = 1;
+ break;
+ case '*':
+ min = 0;
+ max = -1;
+ break;
+ case '+':
+ min = 1;
+ max = -1;
+ break;
+ case '{':
+ min = ReadNumber();
+ max = min;
+ if (PeekChar(0) == ',')
+ {
+ ReadChar(',');
+ max = -1;
+ if (PeekChar(0) != '}')
+ {
+ max = ReadNumber();
+ }
+ }
+ ReadChar('}');
+ if (max == 0 || (max > 0 && min > max))
+ {
+ throw new RegExpException(
+ RegExpException.ErrorType.INVALID_REPEAT_COUNT,
+ firstPos,
+ _pattern);
+ }
+ break;
+ default:
+ throw new RegExpException(
+ RegExpException.ErrorType.UNEXPECTED_CHARACTER,
+ _pos - 1,
+ _pattern);
+ }
+
+ // Read possessive or reluctant modifiers
+ if (PeekChar(0) == '?')
+ {
+ throw new RegExpException(
+ RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
+ _pos,
+ _pattern);
+ }
+ else if (PeekChar(0) == '+')
+ {
+ throw new RegExpException(
+ RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
+ _pos,
+ _pattern);
+ }
+
+ // Handle supported repeaters
+ if (min == 0 && max == 1)
+ {
+ return start.AddOut(new NFAEpsilonTransition(end));
+ }
+ else if (min == 0 && max == -1)
+ {
+ if (end.Outgoing.Length == 0)
+ {
+ end.MergeInto(start);
+ }
+ else
+ {
+ end.AddOut(new NFAEpsilonTransition(start));
+ }
+ return start;
+ }
+ else if (min == 1 && max == -1)
+ {
+ if (start.Outgoing.Length == 1 &&
+ end.Outgoing.Length == 0 &&
+ end.Incoming.Length == 1 &&
+ start.Outgoing[0] == end.Incoming[0])
+ {
+
+ end.AddOut(start.Outgoing[0].Copy(end));
+ }
+ else
+ {
+ end.AddOut(new NFAEpsilonTransition(start));
+ }
+ return end;
+ }
+ else
+ {
+ throw new RegExpException(
+ RegExpException.ErrorType.INVALID_REPEAT_COUNT,
+ firstPos,
+ _pattern);
+ }
+ }
+
+ private NFAState ParseCharSet(NFAState start)
+ {
+ NFAState end = new NFAState();
+ NFACharRangeTransition range;
+
+ if (PeekChar(0) == '^')
+ {
+ ReadChar('^');
+ range = new NFACharRangeTransition(true, _ignoreCase, end);
+ }
+ else
+ {
+ range = new NFACharRangeTransition(false, _ignoreCase, end);
+ }
+ start.AddOut(range);
+ while (PeekChar(0) > 0)
+ {
+ var min = (char)PeekChar(0);
+ switch (min)
+ {
+ case ']':
+ return end;
+ case '\\':
+ range.AddCharacter(ReadEscapeChar());
+ break;
+ default:
+ ReadChar(min);
+ if (PeekChar(0) == '-' &&
+ PeekChar(1) > 0 &&
+ PeekChar(1) != ']')
+ {
+
+ ReadChar('-');
+ var max = ReadChar();
+ range.AddRange(min, max);
+ }
+ else
+ {
+ range.AddCharacter(min);
+ }
+ break;
+ }
+ }
+ return end;
+ }
+
+ private NFAState ParseChar(NFAState start)
+ {
+ switch (PeekChar(0))
+ {
+ case '\\':
+ return ParseEscapeChar(start);
+ case '^':
+ case '$':
+ throw new RegExpException(
+ RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
+ _pos,
+ _pattern);
+ default:
+ return start.AddOut(ReadChar(), _ignoreCase, new NFAState());
+ }
+ }
+
+ private NFAState ParseEscapeChar(NFAState start)
+ {
+ NFAState end = new NFAState();
+
+ if (PeekChar(0) == '\\' && PeekChar(1) > 0)
+ {
+ switch ((char)PeekChar(1))
+ {
+ case 'd':
+ ReadChar();
+ ReadChar();
+ return start.AddOut(new NFADigitTransition(end));
+ case 'D':
+ ReadChar();
+ ReadChar();
+ return start.AddOut(new NFANonDigitTransition(end));
+ case 's':
+ ReadChar();
+ ReadChar();
+ return start.AddOut(new NFAWhitespaceTransition(end));
+ case 'S':
+ ReadChar();
+ ReadChar();
+ return start.AddOut(new NFANonWhitespaceTransition(end));
+ case 'w':
+ ReadChar();
+ ReadChar();
+ return start.AddOut(new NFAWordTransition(end));
+ case 'W':
+ ReadChar();
+ ReadChar();
+ return start.AddOut(new NFANonWordTransition(end));
+ }
+ }
+ return start.AddOut(ReadEscapeChar(), _ignoreCase, end);
+ }
+
+ private char ReadEscapeChar()
+ {
+ string str;
+ int value;
+
+ ReadChar('\\');
+ var c = ReadChar();
+ switch (c)
+ {
+ case '0':
+ c = ReadChar();
+ if (c < '0' || c > '3')
+ {
+ throw new RegExpException(
+ RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
+ _pos - 3,
+ _pattern);
+ }
+ value = c - '0';
+ c = (char)PeekChar(0);
+ if ('0' <= c && c <= '7')
+ {
+ value *= 8;
+ value += ReadChar() - '0';
+ c = (char)PeekChar(0);
+ if ('0' <= c && c <= '7')
+ {
+ value *= 8;
+ value += ReadChar() - '0';
+ }
+ }
+ return (char)value;
+ case 'x':
+ str = ReadChar().ToString() + ReadChar().ToString();
+ try
+ {
+ value = Int32.Parse(str, NumberStyles.AllowHexSpecifier);
+ return (char)value;
+ }
+ catch (FormatException)
+ {
+ throw new RegExpException(
+ RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
+ _pos - str.Length - 2,
+ _pattern);
+ }
+ case 'u':
+ str = ReadChar().ToString() +
+ ReadChar().ToString() +
+ ReadChar().ToString() +
+ ReadChar().ToString();
+ try
+ {
+ value = Int32.Parse(str, NumberStyles.AllowHexSpecifier);
+ return (char)value;
+ }
+ catch (FormatException)
+ {
+ throw new RegExpException(
+ RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
+ _pos - str.Length - 2,
+ _pattern);
+ }
+ case 't':
+ return '\t';
+ case 'n':
+ return '\n';
+ case 'r':
+ return '\r';
+ case 'f':
+ return '\f';
+ case 'a':
+ return '\u0007';
+ case 'e':
+ return '\u001B';
+ default:
+ if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'))
+ {
+ throw new RegExpException(
+ RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
+ _pos - 2,
+ _pattern);
+ }
+ return c;
+ }
+ }
+
+ private int ReadNumber()
+ {
+ StringBuilder buf = new StringBuilder();
+ int c;
+
+ c = PeekChar(0);
+ while ('0' <= c && c <= '9')
+ {
+ buf.Append(ReadChar());
+ c = PeekChar(0);
+ }
+ if (buf.Length <= 0)
+ {
+ throw new RegExpException(
+ RegExpException.ErrorType.UNEXPECTED_CHARACTER,
+ _pos,
+ _pattern);
+ }
+ return Int32.Parse(buf.ToString());
+ }
+
+ private char ReadChar()
+ {
+ int c = PeekChar(0);
+
+ if (c < 0)
+ {
+ throw new RegExpException(
+ RegExpException.ErrorType.UNTERMINATED_PATTERN,
+ _pos,
+ _pattern);
+ }
+ else
+ {
+ _pos++;
+ return (char)c;
+ }
+ }
+
+ private char ReadChar(char c)
+ {
+ if (c != ReadChar())
+ {
+ throw new RegExpException(
+ RegExpException.ErrorType.UNEXPECTED_CHARACTER,
+ _pos - 1,
+ _pattern);
+ }
+ return c;
+ }
+
+ private int PeekChar(int count)
+ {
+ if (_pos + count < _pattern.Length)
+ {
+ return _pattern[_pos + count];
+ }
+ else
+ {
+ return -1;
+ }
+ }
+ }
+}
diff --git a/Parsing/TokenStringDFA.cs b/Parsing/TokenStringDFA.cs
new file mode 100644
index 0000000..710c7a2
--- /dev/null
+++ b/Parsing/TokenStringDFA.cs
@@ -0,0 +1,213 @@
+using System.Text;
+
+namespace Flee.Parsing
+{
+ /**
+ * A deterministic finite state automaton for matching exact strings.
+ * It uses a sorted binary tree representation of the state
+ * transitions in order to enable quick matches with a minimal memory
+ * footprint. It only supports a single character transition between
+ * states, but may be run in an all case-insensitive mode.
+ */
+ internal class TokenStringDFA
+ {
+
+ private readonly DFAState[] _ascii = new DFAState[128];
+ private readonly DFAState _nonAscii = new DFAState();
+
+ public TokenStringDFA()
+ {
+ }
+
+ public void AddMatch(string str, bool caseInsensitive, TokenPattern value)
+ {
+ DFAState state;
+ char c = str[0];
+ int start = 0;
+
+ if (caseInsensitive)
+ {
+ c = Char.ToLower(c);
+ }
+ if (c < 128)
+ {
+ state = _ascii[c];
+ if (state == null)
+ {
+ state = _ascii[c] = new DFAState();
+ }
+ start++;
+ }
+ else
+ {
+ state = _nonAscii;
+ }
+ for (int i = start; i < str.Length; i++)
+ {
+ var next = state.Tree.Find(str[i], caseInsensitive);
+ if (next == null)
+ {
+ next = new DFAState();
+ state.Tree.Add(str[i], caseInsensitive, next);
+ }
+ state = next;
+ }
+ state.Value = value;
+ }
+
+ public TokenPattern Match(ReaderBuffer buffer, bool caseInsensitive)
+ {
+ TokenPattern result = null;
+ DFAState state;
+ int pos = 0;
+
+ var c = buffer.Peek(0);
+ if (c < 0)
+ {
+ return null;
+ }
+ if (caseInsensitive)
+ {
+ c = Char.ToLower((char)c);
+ }
+ if (c < 128)
+ {
+ state = _ascii[c];
+ if (state == null)
+ {
+ return null;
+ }
+ else if (state.Value != null)
+ {
+ result = state.Value;
+ }
+ pos++;
+ }
+ else
+ {
+ state = _nonAscii;
+ }
+ while ((c = buffer.Peek(pos)) >= 0)
+ {
+ state = state.Tree.Find((char)c, caseInsensitive);
+ if (state == null)
+ {
+ break;
+ }
+ else if (state.Value != null)
+ {
+ result = state.Value;
+ }
+ pos++;
+ }
+ return result;
+ }
+
+ public override string ToString()
+ {
+ StringBuilder buffer = new StringBuilder();
+
+ for (int i = 0; i < _ascii.Length; i++)
+ {
+ if (_ascii[i] != null)
+ {
+ buffer.Append((char)i);
+ if (_ascii[i].Value != null)
+ {
+ buffer.Append(": ");
+ buffer.Append(_ascii[i].Value);
+ buffer.Append("\n");
+ }
+ _ascii[i].Tree.PrintTo(buffer, " ");
+ }
+ }
+ _nonAscii.Tree.PrintTo(buffer, "");
+ return buffer.ToString();
+ }
+ }
+
+ internal class DFAState
+ {
+
+ internal TokenPattern Value;
+
+ internal TransitionTree Tree = new TransitionTree();
+ }
+
+
+ internal class TransitionTree
+ {
+ private char _value = '\0';
+ private DFAState _state;
+ private TransitionTree _left;
+ private TransitionTree _right;
+
+ public TransitionTree()
+ {
+ }
+
+ public DFAState Find(char c, bool lowerCase)
+ {
+ if (lowerCase)
+ {
+ c = Char.ToLower(c);
+ }
+ if (_value == '\0' || _value == c)
+ {
+ return _state;
+ }
+ else if (_value > c)
+ {
+ return _left.Find(c, false);
+ }
+ else
+ {
+ return _right.Find(c, false);
+ }
+ }
+
+ public void Add(char c, bool lowerCase, DFAState state)
+ {
+ if (lowerCase)
+ {
+ c = Char.ToLower(c);
+ }
+ if (_value == '\0')
+ {
+ this._value = c;
+ this._state = state;
+ this._left = new TransitionTree();
+ this._right = new TransitionTree();
+ }
+ else if (_value > c)
+ {
+ _left.Add(c, false, state);
+ }
+ else
+ {
+ _right.Add(c, false, state);
+ }
+ }
+
+ public void PrintTo(StringBuilder buffer, String indent)
+ {
+ _left?.PrintTo(buffer, indent);
+ if (this._value != '\0')
+ {
+ if (buffer.Length > 0 && buffer[buffer.Length - 1] == '\n')
+ {
+ buffer.Append(indent);
+ }
+ buffer.Append(this._value);
+ if (this._state.Value != null)
+ {
+ buffer.Append(": ");
+ buffer.Append(this._state.Value);
+ buffer.Append("\n");
+ }
+ this._state.Tree.PrintTo(buffer, indent + " ");
+ }
+ _right?.PrintTo(buffer, indent);
+ }
+ }
+}
diff --git a/Parsing/Tokenizer.cs b/Parsing/Tokenizer.cs
new file mode 100644
index 0000000..82d6cb7
--- /dev/null
+++ b/Parsing/Tokenizer.cs
@@ -0,0 +1,444 @@
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Flee.Parsing
+{
+ /**
+ * A character stream tokenizer. This class groups the characters read
+ * from the stream together into tokens ("words"). The grouping is
+ * controlled by token patterns that contain either a fixed string to
+ * search for, or a regular expression. If the stream of characters
+ * don't match any of the token patterns, a parse exception is thrown.
+ */
+ internal class Tokenizer
+ {
+ private bool _useTokenList = false;
+ private readonly StringDFAMatcher _stringDfaMatcher;
+ private readonly NFAMatcher _nfaMatcher;
+ private readonly RegExpMatcher _regExpMatcher;
+ private ReaderBuffer _buffer = null;
+ private readonly TokenMatch _lastMatch = new TokenMatch();
+ private Token _previousToken = null;
+
+ public Tokenizer(TextReader input)
+ : this(input, false)
+ {
+ }
+
+ public Tokenizer(TextReader input, bool ignoreCase)
+ {
+ this._stringDfaMatcher = new StringDFAMatcher(ignoreCase);
+ this._nfaMatcher = new NFAMatcher(ignoreCase);
+ this._regExpMatcher = new RegExpMatcher(ignoreCase);
+ this._buffer = new ReaderBuffer(input);
+ }
+
+ public bool UseTokenList
+ {
+ get
+ {
+ return _useTokenList;
+ }
+ set
+ {
+ _useTokenList = value;
+ }
+ }
+
+ public bool GetUseTokenList()
+ {
+ return _useTokenList;
+ }
+
+ public void SetUseTokenList(bool useTokenList)
+ {
+ this._useTokenList = useTokenList;
+ }
+
+ public string GetPatternDescription(int id)
+ {
+ var pattern = _stringDfaMatcher.GetPattern(id);
+ if (pattern == null)
+ {
+ pattern = _nfaMatcher.GetPattern(id);
+ }
+ if (pattern == null)
+ {
+ pattern = _regExpMatcher.GetPattern(id);
+ }
+ return pattern?.ToShortString();
+ }
+
+ public int GetCurrentLine()
+ {
+ return _buffer.LineNumber;
+ }
+
+ public int GetCurrentColumn()
+ {
+ return _buffer.ColumnNumber;
+ }
+
+ /**
+ * nfa - true to attempt as an nfa pattern for regexp. This handles most things except the complex repeates, ie {1,4}
+ */
+ public void AddPattern(TokenPattern pattern, bool nfa=true)
+ {
+ switch (pattern.Type)
+ {
+ case TokenPattern.PatternType.STRING:
+ try
+ {
+ _stringDfaMatcher.AddPattern(pattern);
+ }
+ catch (Exception e)
+ {
+ throw new ParserCreationException(
+ ParserCreationException.ErrorType.INVALID_TOKEN,
+ pattern.Name,
+ "error adding string token: " +
+ e.Message);
+ }
+ break;
+ case TokenPattern.PatternType.REGEXP:
+ if (nfa)
+ {
+ try
+ {
+ _nfaMatcher.AddPattern(pattern);
+ }
+ catch (Exception)
+ {
+ nfa = false;
+ }
+ }
+ if (!nfa)
+ {
+ try
+ {
+ _regExpMatcher.AddPattern(pattern);
+ }
+ catch (Exception e)
+ {
+ throw new ParserCreationException(
+ ParserCreationException.ErrorType.INVALID_TOKEN,
+ pattern.Name,
+ "regular expression contains error(s): " +
+ e.Message);
+ }
+ }
+
+ break;
+ default:
+ throw new ParserCreationException(
+ ParserCreationException.ErrorType.INVALID_TOKEN,
+ pattern.Name,
+ "pattern type " + pattern.Type +
+ " is undefined");
+ }
+ }
+
+ public void Reset(TextReader input)
+ {
+ //this.buffer.Dispose();
+ this._buffer = new ReaderBuffer(input);
+ this._previousToken = null;
+ this._lastMatch.Clear();
+ }
+
+ public Token Next()
+ {
+ Token token = null;
+
+ do
+ {
+ token = NextToken();
+ if (token == null)
+ {
+ _previousToken = null;
+ return null;
+ }
+ if (_useTokenList)
+ {
+ token.Previous = _previousToken;
+ _previousToken = token;
+ }
+ if (token.Pattern.Ignore)
+ {
+ token = null;
+ }
+ else if (token.Pattern.Error)
+ {
+ throw new ParseException(
+ ParseException.ErrorType.INVALID_TOKEN,
+ token.Pattern.ErrorMessage,
+ token.StartLine,
+ token.StartColumn);
+ }
+ } while (token == null);
+ return token;
+ }
+
+ private Token NextToken()
+ {
+ try
+ {
+ _lastMatch.Clear();
+ _stringDfaMatcher.Match(_buffer, _lastMatch);
+ _nfaMatcher.Match(_buffer, _lastMatch);
+ _regExpMatcher.Match(_buffer, _lastMatch);
+ int line;
+ int column;
+ if (_lastMatch.Length > 0)
+ {
+ line = _buffer.LineNumber;
+ column = _buffer.ColumnNumber;
+ var str = _buffer.Read(_lastMatch.Length);
+ return NewToken(_lastMatch.Pattern, str, line, column);
+ }
+ else if (_buffer.Peek(0) < 0)
+ {
+ return null;
+ }
+ else
+ {
+ line = _buffer.LineNumber;
+ column = _buffer.ColumnNumber;
+ throw new ParseException(
+ ParseException.ErrorType.UNEXPECTED_CHAR,
+ _buffer.Read(1),
+ line,
+ column);
+ }
+ }
+ catch (IOException e)
+ {
+ throw new ParseException(ParseException.ErrorType.IO,
+ e.Message,
+ -1,
+ -1);
+ }
+ }
+
+ protected virtual Token NewToken(TokenPattern pattern,
+ string image,
+ int line,
+ int column)
+ {
+
+ return new Token(pattern, image, line, column);
+ }
+
+ public override string ToString()
+ {
+ StringBuilder buffer = new StringBuilder();
+ buffer.Append(_stringDfaMatcher);
+ buffer.Append(_nfaMatcher);
+ buffer.Append(_regExpMatcher);
+ return buffer.ToString();
+ }
+ }
+
+ internal abstract class TokenMatcher
+ {
+ protected TokenPattern[] Patterns = new TokenPattern[0];
+
+ protected bool IgnoreCase = false;
+
+ protected TokenMatcher(bool ignoreCase)
+ {
+ IgnoreCase = ignoreCase;
+ }
+
+ public abstract void Match(ReaderBuffer buffer, TokenMatch match);
+
+ public TokenPattern GetPattern(int id)
+ {
+ for (int i = 0; i < Patterns.Length; i++)
+ {
+ if (Patterns[i].Id == id)
+ {
+ return Patterns[i];
+ }
+ }
+ return null;
+ }
+
+ public virtual void AddPattern(TokenPattern pattern)
+ {
+ Array.Resize(ref Patterns, Patterns.Length + 1);
+ Patterns[Patterns.Length - 1] = pattern;
+ }
+ public override string ToString()
+ {
+ StringBuilder buffer = new StringBuilder();
+
+ for (int i = 0; i < Patterns.Length; i++)
+ {
+ buffer.Append(Patterns[i]);
+ buffer.Append("\n\n");
+ }
+ return buffer.ToString();
+ }
+ }
+
+ internal class StringDFAMatcher : TokenMatcher
+ {
+
+ private readonly TokenStringDFA _automaton = new TokenStringDFA();
+
+ public StringDFAMatcher(bool ignoreCase) : base(ignoreCase)
+ {
+ }
+
+ public override void AddPattern(TokenPattern pattern)
+ {
+ _automaton.AddMatch(pattern.Pattern, IgnoreCase, pattern);
+ base.AddPattern(pattern);
+ }
+
+ public override void Match(ReaderBuffer buffer, TokenMatch match)
+ {
+ TokenPattern res = _automaton.Match(buffer, IgnoreCase);
+
+ if (res != null)
+ {
+ match.Update(res.Pattern.Length, res);
+ }
+ }
+ }
+
+ internal class NFAMatcher : TokenMatcher
+ {
+
+ private readonly TokenNFA _automaton = new TokenNFA();
+
+ public NFAMatcher(bool ignoreCase) : base(ignoreCase)
+ {
+ }
+
+ public override void AddPattern(TokenPattern pattern)
+ {
+ if (pattern.Type == TokenPattern.PatternType.STRING)
+ {
+ _automaton.AddTextMatch(pattern.Pattern, IgnoreCase, pattern);
+ }
+ else
+ {
+ _automaton.AddRegExpMatch(pattern.Pattern, IgnoreCase, pattern);
+ }
+ base.AddPattern(pattern);
+ }
+
+ public override void Match(ReaderBuffer buffer, TokenMatch match)
+ {
+ _automaton.Match(buffer, match);
+ }
+ }
+
+ internal class RegExpMatcher : TokenMatcher
+ {
+ private REHandler[] _regExps = new REHandler[0];
+
+ public RegExpMatcher(bool ignoreCase) : base(ignoreCase)
+ {
+ }
+
+ public override void AddPattern(TokenPattern pattern)
+ {
+ REHandler re;
+ try
+ {
+ re = new GrammaticaRE(pattern.Pattern, IgnoreCase);
+ pattern.DebugInfo = "Grammatica regexp\n" + re;
+ }
+ catch (Exception)
+ {
+ re = new SystemRE(pattern.Pattern, IgnoreCase);
+ pattern.DebugInfo = "native .NET regexp";
+ }
+ Array.Resize(ref _regExps, _regExps.Length + 1);
+ _regExps[_regExps.Length - 1] = re;
+ base.AddPattern(pattern);
+ }
+
+ public override void Match(ReaderBuffer buffer, TokenMatch match)
+ {
+ for (int i = 0; i < _regExps.Length; i++)
+ {
+ int length = _regExps[i].Match(buffer);
+ if (length > 0)
+ {
+ match.Update(length, Patterns[i]);
+ }
+ }
+ }
+ }
+
+
+ internal abstract class REHandler
+ {
+ public abstract int Match(ReaderBuffer buffer);
+ }
+
+ internal class GrammaticaRE : REHandler
+ {
+ private readonly RegExp _regExp;
+ private Matcher _matcher = null;
+
+ public GrammaticaRE(string regex, bool ignoreCase)
+ {
+ _regExp = new RegExp(regex, ignoreCase);
+ }
+
+ public override int Match(ReaderBuffer buffer)
+ {
+ if (_matcher == null)
+ {
+ _matcher = _regExp.Matcher(buffer);
+ }
+ else
+ {
+ _matcher.Reset(buffer);
+ }
+ return _matcher.MatchFromBeginning() ? _matcher.Length() : 0;
+ }
+ }
+
+ internal class SystemRE : REHandler
+ {
+ private readonly Regex _reg;
+
+ public SystemRE(string regex, bool ignoreCase)
+ {
+ if (ignoreCase)
+ {
+ _reg = new Regex(regex, RegexOptions.IgnoreCase);
+ }
+ else
+ {
+ _reg = new Regex(regex);
+ }
+ }
+
+ public override int Match(ReaderBuffer buffer)
+ {
+ Match m;
+
+ // Ugly hack since .NET doesn't have a flag for when the
+ // end of the input string was encountered...
+ buffer.Peek(1024 * 16);
+ // Also, there is no API to limit the search to the specified
+ // position, so we double-check the index afterwards instead.
+ m = _reg.Match(buffer.ToString(), buffer.Position);
+ if (m.Success && m.Index == buffer.Position)
+ {
+ return m.Length;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ }
+}
diff --git a/PublicTypes/Exceptions.cs b/PublicTypes/Exceptions.cs
new file mode 100644
index 0000000..e484660
--- /dev/null
+++ b/PublicTypes/Exceptions.cs
@@ -0,0 +1,67 @@
+using Flee.InternalTypes;
+using Flee.Parsing;
+using Flee.Resources;
+
+namespace Flee.PublicTypes
+{
+ public enum CompileExceptionReason
+ {
+ SyntaxError,
+ ConstantOverflow,
+ TypeMismatch,
+ UndefinedName,
+ FunctionHasNoReturnValue,
+ InvalidExplicitCast,
+ AmbiguousMatch,
+ AccessDenied,
+ InvalidFormat
+ }
+
+ ///
+ ///
+ ///
+ [Serializable()]
+ public sealed class ExpressionCompileException : Exception
+ {
+ private readonly CompileExceptionReason _myReason;
+ internal ExpressionCompileException(string message, CompileExceptionReason reason) : base(message)
+ {
+ _myReason = reason;
+ }
+
+ internal ExpressionCompileException(ParserLogException parseException) : base(string.Empty, parseException)
+ {
+ _myReason = CompileExceptionReason.SyntaxError;
+ }
+
+ private ExpressionCompileException(System.Runtime.Serialization.SerializationInfo info, System.Runtime.Serialization.StreamingContext context) : base(info, context)
+ {
+ _myReason = (CompileExceptionReason)info.GetInt32("Reason");
+ }
+
+ public override void GetObjectData(System.Runtime.Serialization.SerializationInfo info, System.Runtime.Serialization.StreamingContext context)
+ {
+ base.GetObjectData(info, context);
+ info.AddValue("Reason", Convert.ToInt32(_myReason));
+ }
+
+ public override string Message
+ {
+ get
+ {
+ if (_myReason == CompileExceptionReason.SyntaxError)
+ {
+ Exception innerEx = this.InnerException;
+ string msg = $"{Utility.GetCompileErrorMessage(CompileErrorResourceKeys.SyntaxError)}: {innerEx.Message}";
+ return msg;
+ }
+ else
+ {
+ return base.Message;
+ }
+ }
+ }
+
+ public CompileExceptionReason Reason => _myReason;
+ }
+}
diff --git a/PublicTypes/ExpressionContext.cs b/PublicTypes/ExpressionContext.cs
new file mode 100644
index 0000000..fcfe7bc
--- /dev/null
+++ b/PublicTypes/ExpressionContext.cs
@@ -0,0 +1,251 @@
+using Flee.CalcEngine.InternalTypes;
+using Flee.CalcEngine.PublicTypes;
+using Flee.ExpressionElements.Base;
+using Flee.InternalTypes;
+using Flee.Parsing;
+using Flee.Resources;
+
+namespace Flee.PublicTypes
+{
+ public sealed class ExpressionContext
+ {
+
+ #region "Fields"
+
+ private PropertyDictionary _myProperties;
+
+ private readonly object _mySyncRoot = new object();
+
+ private VariableCollection _myVariables;
+ #endregion
+
+ #region "Constructor"
+
+ public ExpressionContext() : this(DefaultExpressionOwner.Instance)
+ {
+ }
+
+ public ExpressionContext(object expressionOwner)
+ {
+ Utility.AssertNotNull(expressionOwner, "expressionOwner");
+ _myProperties = new PropertyDictionary();
+
+ _myProperties.SetValue("CalculationEngine", null);
+ _myProperties.SetValue("CalcEngineExpressionName", null);
+ _myProperties.SetValue("IdentifierParser", null);
+
+ _myProperties.SetValue("ExpressionOwner", expressionOwner);
+
+ _myProperties.SetValue("ParserOptions", new ExpressionParserOptions(this));
+
+ _myProperties.SetValue("Options", new ExpressionOptions(this));
+ _myProperties.SetValue("Imports", new ExpressionImports());
+ this.Imports.SetContext(this);
+ _myVariables = new VariableCollection(this);
+
+ _myProperties.SetToDefault("NoClone");
+
+ this.RecreateParser();
+ }
+
+ #endregion
+
+ #region "Methods - Private"
+
+ private void AssertTypeIsAccessibleInternal(Type t)
+ {
+ bool isPublic = t.IsPublic;
+
+ if (t.IsNested == true)
+ {
+ isPublic = t.IsNestedPublic;
+ }
+
+ bool isSameModuleAsOwner = object.ReferenceEquals(t.Module, this.ExpressionOwner.GetType().Module);
+
+ // Public types are always accessible. Otherwise they have to be in the same module as the owner
+ bool isAccessible = isPublic | isSameModuleAsOwner;
+
+ if (isAccessible == false)
+ {
+ string msg = Utility.GetGeneralErrorMessage(GeneralErrorResourceKeys.TypeNotAccessibleToExpression, t.Name);
+ throw new ArgumentException(msg);
+ }
+ }
+
+ private void AssertNestedTypeIsAccessible(Type t)
+ {
+ while ((t != null))
+ {
+ AssertTypeIsAccessibleInternal(t);
+ t = t.DeclaringType;
+ }
+ }
+ #endregion
+
+ #region "Methods - Internal"
+ internal ExpressionContext CloneInternal(bool cloneVariables)
+ {
+ ExpressionContext context = (ExpressionContext)this.MemberwiseClone();
+ context._myProperties = _myProperties.Clone();
+ context._myProperties.SetValue("Options", context.Options.Clone());
+ context._myProperties.SetValue("ParserOptions", context.ParserOptions.Clone());
+ context._myProperties.SetValue("Imports", context.Imports.Clone());
+ context.Imports.SetContext(context);
+
+ if (cloneVariables == true)
+ {
+ context._myVariables = new VariableCollection(context);
+ this.Variables.Copy(context._myVariables);
+ }
+
+ return context;
+ }
+
+ internal void AssertTypeIsAccessible(Type t)
+ {
+ if (t.IsNested == true)
+ {
+ AssertNestedTypeIsAccessible(t);
+ }
+ else
+ {
+ AssertTypeIsAccessibleInternal(t);
+ }
+ }
+
+ internal ExpressionElement Parse(string expression, IServiceProvider services)
+ {
+ lock (_mySyncRoot)
+ {
+ System.IO.StringReader sr = new System.IO.StringReader(expression);
+ ExpressionParser parser = this.Parser;
+ parser.Reset(sr);
+ parser.Tokenizer.Reset(sr);
+ FleeExpressionAnalyzer analyzer = (FleeExpressionAnalyzer)parser.Analyzer;
+
+ analyzer.SetServices(services);
+
+ Node rootNode = DoParse();
+ analyzer.Reset();
+ ExpressionElement topElement = (ExpressionElement)rootNode.Values[0];
+ return topElement;
+ }
+ }
+
+ internal void RecreateParser()
+ {
+ lock (_mySyncRoot)
+ {
+ FleeExpressionAnalyzer analyzer = new FleeExpressionAnalyzer();
+ ExpressionParser parser = new ExpressionParser(TextReader.Null, analyzer, this);
+ _myProperties.SetValue("ExpressionParser", parser);
+ }
+ }
+
+ internal Node DoParse()
+ {
+ try
+ {
+ return this.Parser.Parse();
+ }
+ catch (ParserLogException ex)
+ {
+ // Syntax error; wrap it in our exception and rethrow
+ throw new ExpressionCompileException(ex);
+ }
+ }
+
+ internal void SetCalcEngine(CalculationEngine engine, string calcEngineExpressionName)
+ {
+ _myProperties.SetValue("CalculationEngine", engine);
+ _myProperties.SetValue("CalcEngineExpressionName", calcEngineExpressionName);
+ }
+
+ internal IdentifierAnalyzer ParseIdentifiers(string expression)
+ {
+ ExpressionParser parser = this.IdentifierParser;
+ StringReader sr = new StringReader(expression);
+ parser.Reset(sr);
+ parser.Tokenizer.Reset(sr);
+
+ IdentifierAnalyzer analyzer = (IdentifierAnalyzer)parser.Analyzer;
+ analyzer.Reset();
+
+ parser.Parse();
+
+ return (IdentifierAnalyzer)parser.Analyzer;
+ }
+ #endregion
+
+ #region "Methods - Public"
+
+ public ExpressionContext Clone()
+ {
+ return this.CloneInternal(true);
+ }
+
+ public IDynamicExpression CompileDynamic(string expression)
+ {
+ return new Flee.InternalTypes.Expression