Flee
This commit is contained in:
60
Parsing/AlternativeElement.cs
Normal file
60
Parsing/AlternativeElement.cs
Normal file
@@ -0,0 +1,60 @@
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A regular expression alternative element. This element matches
|
||||
* the longest alternative element.
|
||||
*/
|
||||
internal class AlternativeElement : Element
|
||||
{
|
||||
private readonly Element _elem1;
|
||||
private readonly Element _elem2;
|
||||
|
||||
public AlternativeElement(Element first, Element second)
|
||||
{
|
||||
_elem1 = first;
|
||||
_elem2 = second;
|
||||
}
|
||||
|
||||
public override object Clone()
|
||||
{
|
||||
return new AlternativeElement(_elem1, _elem2);
|
||||
}
|
||||
|
||||
public override int Match(Matcher m,
|
||||
ReaderBuffer buffer,
|
||||
int start,
|
||||
int skip)
|
||||
{
|
||||
int length = 0;
|
||||
int length1 = -1;
|
||||
int length2 = -1;
|
||||
int skip1 = 0;
|
||||
int skip2 = 0;
|
||||
|
||||
while (length >= 0 && skip1 + skip2 <= skip)
|
||||
{
|
||||
length1 = _elem1.Match(m, buffer, start, skip1);
|
||||
length2 = _elem2.Match(m, buffer, start, skip2);
|
||||
if (length1 >= length2)
|
||||
{
|
||||
length = length1;
|
||||
skip1++;
|
||||
}
|
||||
else
|
||||
{
|
||||
length = length2;
|
||||
skip2++;
|
||||
}
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
public override void PrintTo(TextWriter output, string indent)
|
||||
{
|
||||
output.WriteLine(indent + "Alternative 1");
|
||||
_elem1.PrintTo(output, indent + " ");
|
||||
output.WriteLine(indent + "Alternative 2");
|
||||
_elem2.PrintTo(output, indent + " ");
|
||||
}
|
||||
}
|
||||
}
|
240
Parsing/Analyzer.cs
Normal file
240
Parsing/Analyzer.cs
Normal file
@@ -0,0 +1,240 @@
|
||||
using System.Collections;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
[Obsolete("Creates a new parse tree analyzer.")]
|
||||
internal class Analyzer
|
||||
{
|
||||
public Analyzer()
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resets this analyzer when the parser is reset for another
|
||||
///input stream.The default implementation of this method does
|
||||
/// nothing.
|
||||
/// </summary>
|
||||
public virtual void Reset()
|
||||
{
|
||||
// Default implementation does nothing
|
||||
}
|
||||
|
||||
public Node Analyze(Node node)
|
||||
{
|
||||
ParserLogException log = new ParserLogException();
|
||||
|
||||
node = Analyze(node, log);
|
||||
if (log.Count > 0)
|
||||
{
|
||||
throw log;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
private Node Analyze(Node node, ParserLogException log)
|
||||
{
|
||||
var errorCount = log.Count;
|
||||
if (node is Production)
|
||||
{
|
||||
var prod = (Production)node;
|
||||
prod = NewProduction(prod.Pattern);
|
||||
try
|
||||
{
|
||||
Enter(prod);
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
log.AddError(e);
|
||||
}
|
||||
for (int i = 0; i < node.Count; i++)
|
||||
{
|
||||
try
|
||||
{
|
||||
Child(prod, Analyze(node[i], log));
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
log.AddError(e);
|
||||
}
|
||||
}
|
||||
try
|
||||
{
|
||||
return Exit(prod);
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
if (errorCount == log.Count)
|
||||
{
|
||||
log.AddError(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
node.Values.Clear();
|
||||
try
|
||||
{
|
||||
Enter(node);
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
log.AddError(e);
|
||||
}
|
||||
try
|
||||
{
|
||||
return Exit(node);
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
if (errorCount == log.Count)
|
||||
{
|
||||
log.AddError(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public virtual Production NewProduction(ProductionPattern pattern)
|
||||
{
|
||||
return new Production(pattern);
|
||||
}
|
||||
|
||||
public virtual void Enter(Node node)
|
||||
{
|
||||
}
|
||||
|
||||
public virtual Node Exit(Node node)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
public virtual void Child(Production node, Node child)
|
||||
{
|
||||
node.AddChild(child);
|
||||
}
|
||||
|
||||
protected Node GetChildAt(Node node, int pos)
|
||||
{
|
||||
if (node == null)
|
||||
{
|
||||
throw new ParseException(
|
||||
ParseException.ErrorType.INTERNAL,
|
||||
"attempt to read 'null' parse tree node",
|
||||
-1,
|
||||
-1);
|
||||
}
|
||||
var child = node[pos];
|
||||
if (child == null)
|
||||
{
|
||||
throw new ParseException(
|
||||
ParseException.ErrorType.INTERNAL,
|
||||
"node '" + node.Name + "' has no child at " +
|
||||
"position " + pos,
|
||||
node.StartLine,
|
||||
node.StartColumn);
|
||||
}
|
||||
return child;
|
||||
}
|
||||
|
||||
protected Node GetChildWithId(Node node, int id)
|
||||
{
|
||||
if (node == null)
|
||||
{
|
||||
throw new ParseException(
|
||||
ParseException.ErrorType.INTERNAL,
|
||||
"attempt to read 'null' parse tree node",
|
||||
-1,
|
||||
-1);
|
||||
}
|
||||
for (int i = 0; i < node.Count; i++)
|
||||
{
|
||||
var child = node[i];
|
||||
if (child != null && child.Id == id)
|
||||
{
|
||||
return child;
|
||||
}
|
||||
}
|
||||
throw new ParseException(
|
||||
ParseException.ErrorType.INTERNAL,
|
||||
"node '" + node.Name + "' has no child with id " + id,
|
||||
node.StartLine,
|
||||
node.StartColumn);
|
||||
}
|
||||
|
||||
protected object GetValue(Node node, int pos)
|
||||
{
|
||||
if (node == null)
|
||||
{
|
||||
throw new ParseException(
|
||||
ParseException.ErrorType.INTERNAL,
|
||||
"attempt to read 'null' parse tree node",
|
||||
-1,
|
||||
-1);
|
||||
}
|
||||
var value = node.Values[pos];
|
||||
if (value == null)
|
||||
{
|
||||
throw new ParseException(
|
||||
ParseException.ErrorType.INTERNAL,
|
||||
"node '" + node.Name + "' has no value at " +
|
||||
"position " + pos,
|
||||
node.StartLine,
|
||||
node.StartColumn);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
protected int GetIntValue(Node node, int pos)
|
||||
{
|
||||
var value = GetValue(node, pos);
|
||||
if (value is int)
|
||||
{
|
||||
return (int)value;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new ParseException(
|
||||
ParseException.ErrorType.INTERNAL,
|
||||
"node '" + node.Name + "' has no integer value " +
|
||||
"at position " + pos,
|
||||
node.StartLine,
|
||||
node.StartColumn);
|
||||
}
|
||||
}
|
||||
|
||||
protected string GetStringValue(Node node, int pos)
|
||||
{
|
||||
var value = GetValue(node, pos);
|
||||
if (value is string)
|
||||
{
|
||||
return (string)value;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new ParseException(
|
||||
ParseException.ErrorType.INTERNAL,
|
||||
"node '" + node.Name + "' has no string value " +
|
||||
"at position " + pos,
|
||||
node.StartLine,
|
||||
node.StartColumn);
|
||||
}
|
||||
}
|
||||
|
||||
protected ArrayList GetChildValues(Node node)
|
||||
{
|
||||
ArrayList result = new ArrayList();
|
||||
|
||||
for (int i = 0; i < node.Count; i++)
|
||||
{
|
||||
var child = node[i];
|
||||
var values = child.Values;
|
||||
if (values != null)
|
||||
{
|
||||
result.AddRange(values);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
111
Parsing/Automaton.cs
Normal file
111
Parsing/Automaton.cs
Normal file
@@ -0,0 +1,111 @@
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
internal class Automaton
|
||||
{
|
||||
private object _value;
|
||||
private readonly AutomatonTree _tree = new AutomatonTree();
|
||||
|
||||
public Automaton()
|
||||
{
|
||||
}
|
||||
|
||||
public void AddMatch(string str, bool caseInsensitive, object value)
|
||||
{
|
||||
if (str.Length == 0)
|
||||
{
|
||||
this._value = value;
|
||||
}
|
||||
else
|
||||
{
|
||||
var state = _tree.Find(str[0], caseInsensitive);
|
||||
if (state == null)
|
||||
{
|
||||
state = new Automaton();
|
||||
state.AddMatch(str.Substring(1), caseInsensitive, value);
|
||||
_tree.Add(str[0], caseInsensitive, state);
|
||||
}
|
||||
else
|
||||
{
|
||||
state.AddMatch(str.Substring(1), caseInsensitive, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public object MatchFrom(LookAheadReader input, int pos, bool caseInsensitive)
|
||||
{
|
||||
|
||||
object result = null;
|
||||
Automaton state = null;
|
||||
int c = 0;
|
||||
|
||||
c = input.Peek(pos);
|
||||
if (_tree != null && c >= 0)
|
||||
{
|
||||
state = _tree.Find(Convert.ToChar(c), caseInsensitive);
|
||||
if (state != null)
|
||||
{
|
||||
result = state.MatchFrom(input, pos + 1, caseInsensitive);
|
||||
}
|
||||
}
|
||||
return result ?? _value;
|
||||
}
|
||||
}
|
||||
|
||||
// * An automaton state transition tree. This class contains a
|
||||
// * binary search tree for the automaton transitions from one state
|
||||
// * to another. All transitions are linked to a single character.
|
||||
internal class AutomatonTree
|
||||
{
|
||||
private char _value;
|
||||
private Automaton _state;
|
||||
private AutomatonTree _left;
|
||||
private AutomatonTree _right;
|
||||
|
||||
public AutomatonTree()
|
||||
{
|
||||
}
|
||||
|
||||
public Automaton Find(char c, bool lowerCase)
|
||||
{
|
||||
if (lowerCase)
|
||||
{
|
||||
c = Char.ToLower(c);
|
||||
}
|
||||
if (_value == (char)0 || _value == c)
|
||||
{
|
||||
return _state;
|
||||
}
|
||||
else if (_value > c)
|
||||
{
|
||||
return _left.Find(c, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
return _right.Find(c, false);
|
||||
}
|
||||
}
|
||||
|
||||
public void Add(char c, bool lowerCase, Automaton state)
|
||||
{
|
||||
if (lowerCase)
|
||||
{
|
||||
c = Char.ToLower(c);
|
||||
}
|
||||
if (_value == (char)0)
|
||||
{
|
||||
this._value = c;
|
||||
this._state = state;
|
||||
this._left = new AutomatonTree();
|
||||
this._right = new AutomatonTree();
|
||||
}
|
||||
else if (_value > c)
|
||||
{
|
||||
_left.Add(c, false, state);
|
||||
}
|
||||
else
|
||||
{
|
||||
_right.Add(c, false, state);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
267
Parsing/CharacterSetElement.cs
Normal file
267
Parsing/CharacterSetElement.cs
Normal file
@@ -0,0 +1,267 @@
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A regular expression character set element. This element
|
||||
* matches a single character inside (or outside) a character set.
|
||||
* The character set is user defined and may contain ranges of
|
||||
* characters. The set may also be inverted, meaning that only
|
||||
* characters not inside the set will be considered to match.
|
||||
*/
|
||||
internal class CharacterSetElement : Element
|
||||
{
|
||||
public static CharacterSetElement Dot = new CharacterSetElement(false);
|
||||
public static CharacterSetElement Digit = new CharacterSetElement(false);
|
||||
public static CharacterSetElement NonDigit = new CharacterSetElement(true);
|
||||
public static CharacterSetElement Whitespace = new CharacterSetElement(false);
|
||||
public static CharacterSetElement NonWhitespace = new CharacterSetElement(true);
|
||||
public static CharacterSetElement Word = new CharacterSetElement(false);
|
||||
public static CharacterSetElement NonWord = new CharacterSetElement(true);
|
||||
private readonly bool _inverted;
|
||||
private readonly ArrayList _contents = new ArrayList();
|
||||
|
||||
public CharacterSetElement(bool inverted)
|
||||
{
|
||||
this._inverted = inverted;
|
||||
}
|
||||
|
||||
public void AddCharacter(char c)
|
||||
{
|
||||
_contents.Add(c);
|
||||
}
|
||||
|
||||
public void AddCharacters(string str)
|
||||
{
|
||||
for (int i = 0; i < str.Length; i++)
|
||||
{
|
||||
AddCharacter(str[i]);
|
||||
}
|
||||
}
|
||||
|
||||
public void AddCharacters(StringElement elem)
|
||||
{
|
||||
AddCharacters(elem.GetString());
|
||||
}
|
||||
|
||||
public void AddRange(char min, char max)
|
||||
{
|
||||
_contents.Add(new Range(min, max));
|
||||
}
|
||||
|
||||
public void AddCharacterSet(CharacterSetElement elem)
|
||||
{
|
||||
_contents.Add(elem);
|
||||
}
|
||||
|
||||
public override object Clone()
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
public override int Match(Matcher m,
|
||||
ReaderBuffer buffer,
|
||||
int start,
|
||||
int skip)
|
||||
{
|
||||
|
||||
int c;
|
||||
|
||||
if (skip != 0)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
c = buffer.Peek(start);
|
||||
if (c < 0)
|
||||
{
|
||||
m.SetReadEndOfString();
|
||||
return -1;
|
||||
}
|
||||
if (m.IsCaseInsensitive())
|
||||
{
|
||||
c = (int)Char.ToLower((char)c);
|
||||
}
|
||||
return InSet((char)c) ? 1 : -1;
|
||||
}
|
||||
|
||||
private bool InSet(char c)
|
||||
{
|
||||
if (this == Dot)
|
||||
{
|
||||
return InDotSet(c);
|
||||
}
|
||||
else if (this == Digit || this == NonDigit)
|
||||
{
|
||||
return InDigitSet(c) != _inverted;
|
||||
}
|
||||
else if (this == Whitespace || this == NonWhitespace)
|
||||
{
|
||||
return InWhitespaceSet(c) != _inverted;
|
||||
}
|
||||
else if (this == Word || this == NonWord)
|
||||
{
|
||||
return InWordSet(c) != _inverted;
|
||||
}
|
||||
else
|
||||
{
|
||||
return InUserSet(c) != _inverted;
|
||||
}
|
||||
}
|
||||
|
||||
private bool InDotSet(char c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '\n':
|
||||
case '\r':
|
||||
case '\u0085':
|
||||
case '\u2028':
|
||||
case '\u2029':
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private bool InDigitSet(char c)
|
||||
{
|
||||
return '0' <= c && c <= '9';
|
||||
}
|
||||
|
||||
private bool InWhitespaceSet(char c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\n':
|
||||
case '\f':
|
||||
case '\r':
|
||||
case (char)11:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private bool InWordSet(char c)
|
||||
{
|
||||
return ('a' <= c && c <= 'z')
|
||||
|| ('A' <= c && c <= 'Z')
|
||||
|| ('0' <= c && c <= '9')
|
||||
|| c == '_';
|
||||
}
|
||||
|
||||
private bool InUserSet(char value)
|
||||
{
|
||||
for (int i = 0; i < _contents.Count; i++)
|
||||
{
|
||||
var obj = _contents[i];
|
||||
if (obj is char)
|
||||
{
|
||||
var c = (char)obj;
|
||||
if (c == value)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else if (obj is Range)
|
||||
{
|
||||
var r = (Range)obj;
|
||||
if (r.Inside(value))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else if (obj is CharacterSetElement)
|
||||
{
|
||||
var e = (CharacterSetElement)obj;
|
||||
if (e.InSet(value))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public override void PrintTo(TextWriter output, string indent)
|
||||
{
|
||||
output.WriteLine(indent + ToString());
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
// Handle predefined character sets
|
||||
if (this == Dot)
|
||||
{
|
||||
return ".";
|
||||
}
|
||||
else if (this == Digit)
|
||||
{
|
||||
return "\\d";
|
||||
}
|
||||
else if (this == NonDigit)
|
||||
{
|
||||
return "\\D";
|
||||
}
|
||||
else if (this == Whitespace)
|
||||
{
|
||||
return "\\s";
|
||||
}
|
||||
else if (this == NonWhitespace)
|
||||
{
|
||||
return "\\S";
|
||||
}
|
||||
else if (this == Word)
|
||||
{
|
||||
return "\\w";
|
||||
}
|
||||
else if (this == NonWord)
|
||||
{
|
||||
return "\\W";
|
||||
}
|
||||
|
||||
// Handle user-defined character sets
|
||||
var buffer = new StringBuilder();
|
||||
if (_inverted)
|
||||
{
|
||||
buffer.Append("^[");
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.Append("[");
|
||||
}
|
||||
for (int i = 0; i < _contents.Count; i++)
|
||||
{
|
||||
buffer.Append(_contents[i]);
|
||||
}
|
||||
buffer.Append("]");
|
||||
|
||||
return buffer.ToString();
|
||||
}
|
||||
|
||||
private class Range
|
||||
{
|
||||
private readonly char _min;
|
||||
private readonly char _max;
|
||||
|
||||
public Range(char min, char max)
|
||||
{
|
||||
this._min = min;
|
||||
this._max = max;
|
||||
}
|
||||
|
||||
public bool Inside(char c)
|
||||
{
|
||||
return _min <= c && c <= _max;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return _min + "-" + _max;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
58
Parsing/CombineElement.cs
Normal file
58
Parsing/CombineElement.cs
Normal file
@@ -0,0 +1,58 @@
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
internal class CombineElement : Element
|
||||
{
|
||||
private readonly Element _elem1;
|
||||
private readonly Element _elem2;
|
||||
|
||||
public CombineElement(Element first, Element second)
|
||||
{
|
||||
_elem1 = first;
|
||||
_elem2 = second;
|
||||
}
|
||||
|
||||
public override object Clone()
|
||||
{
|
||||
return new CombineElement(_elem1, _elem2);
|
||||
}
|
||||
|
||||
public override int Match(Matcher m,
|
||||
ReaderBuffer buffer,
|
||||
int start,
|
||||
int skip)
|
||||
{
|
||||
int length1 = -1;
|
||||
int length2 = 0;
|
||||
int skip1 = 0;
|
||||
int skip2 = 0;
|
||||
|
||||
while (skip >= 0)
|
||||
{
|
||||
length1 = _elem1.Match(m, buffer, start, skip1);
|
||||
if (length1 < 0)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
length2 = _elem2.Match(m, buffer, start + length1, skip2);
|
||||
if (length2 < 0)
|
||||
{
|
||||
skip1++;
|
||||
skip2 = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
skip2++;
|
||||
skip--;
|
||||
}
|
||||
}
|
||||
|
||||
return length1 + length2;
|
||||
}
|
||||
|
||||
public override void PrintTo(TextWriter output, string indent)
|
||||
{
|
||||
_elem1.PrintTo(output, indent);
|
||||
_elem2.PrintTo(output, indent);
|
||||
}
|
||||
}
|
||||
}
|
596
Parsing/CustomExpressionAnalyzer.cs
Normal file
596
Parsing/CustomExpressionAnalyzer.cs
Normal file
@@ -0,0 +1,596 @@
|
||||
using System.Collections;
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.Text.RegularExpressions;
|
||||
using Flee.ExpressionElements;
|
||||
using Flee.ExpressionElements.Base;
|
||||
using Flee.ExpressionElements.Base.Literals;
|
||||
using Flee.ExpressionElements.Literals;
|
||||
using Flee.ExpressionElements.Literals.Integral;
|
||||
using Flee.ExpressionElements.LogicalBitwise;
|
||||
using Flee.ExpressionElements.MemberElements;
|
||||
using Flee.InternalTypes;
|
||||
using Flee.PublicTypes;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
internal class FleeExpressionAnalyzer : ExpressionAnalyzer
|
||||
{
|
||||
|
||||
private IServiceProvider _myServices;
|
||||
private readonly Regex _myUnicodeEscapeRegex;
|
||||
private readonly Regex _myRegularEscapeRegex;
|
||||
|
||||
private bool _myInUnaryNegate;
|
||||
internal FleeExpressionAnalyzer()
|
||||
{
|
||||
_myUnicodeEscapeRegex = new Regex("\\\\u[0-9a-f]{4}", RegexOptions.IgnoreCase);
|
||||
_myRegularEscapeRegex = new Regex("\\\\[\\\\\"'trn]", RegexOptions.IgnoreCase);
|
||||
}
|
||||
|
||||
public void SetServices(IServiceProvider services)
|
||||
{
|
||||
_myServices = services;
|
||||
}
|
||||
|
||||
public override void Reset()
|
||||
{
|
||||
_myServices = null;
|
||||
}
|
||||
|
||||
public override Node ExitExpression(Production node)
|
||||
{
|
||||
this.AddFirstChildValue(node);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitExpressionGroup(Production node)
|
||||
{
|
||||
node.AddValues(this.GetChildValues(node));
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitXorExpression(Production node)
|
||||
{
|
||||
this.AddBinaryOp(node, typeof(XorElement));
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitOrExpression(Production node)
|
||||
{
|
||||
this.AddBinaryOp(node, typeof(AndOrElement));
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitAndExpression(Production node)
|
||||
{
|
||||
this.AddBinaryOp(node, typeof(AndOrElement));
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitNotExpression(Production node)
|
||||
{
|
||||
this.AddUnaryOp(node, typeof(NotElement));
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitCompareExpression(Production node)
|
||||
{
|
||||
this.AddBinaryOp(node, typeof(CompareElement));
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitShiftExpression(Production node)
|
||||
{
|
||||
this.AddBinaryOp(node, typeof(ShiftElement));
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitAdditiveExpression(Production node)
|
||||
{
|
||||
this.AddBinaryOp(node, typeof(ArithmeticElement));
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitMultiplicativeExpression(Production node)
|
||||
{
|
||||
this.AddBinaryOp(node, typeof(ArithmeticElement));
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitPowerExpression(Production node)
|
||||
{
|
||||
this.AddBinaryOp(node, typeof(ArithmeticElement));
|
||||
return node;
|
||||
}
|
||||
|
||||
// Try to fold a negated constant int32. We have to do this so that parsing int32.MinValue will work
|
||||
public override Node ExitNegateExpression(Production node)
|
||||
{
|
||||
IList childValues = this.GetChildValues(node);
|
||||
|
||||
// Get last child
|
||||
ExpressionElement childElement = (ExpressionElement)childValues[childValues.Count - 1];
|
||||
|
||||
// Is it an signed integer constant?
|
||||
if (object.ReferenceEquals(childElement.GetType(), typeof(Int32LiteralElement)) & childValues.Count == 2)
|
||||
{
|
||||
((Int32LiteralElement)childElement).Negate();
|
||||
// Add it directly instead of the negate element since it will already be negated
|
||||
node.AddValue(childElement);
|
||||
}
|
||||
else if (object.ReferenceEquals(childElement.GetType(), typeof(Int64LiteralElement)) & childValues.Count == 2)
|
||||
{
|
||||
((Int64LiteralElement)childElement).Negate();
|
||||
// Add it directly instead of the negate element since it will already be negated
|
||||
node.AddValue(childElement);
|
||||
}
|
||||
else
|
||||
{
|
||||
// No so just add a regular negate
|
||||
this.AddUnaryOp(node, typeof(NegateElement));
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitMemberExpression(Production node)
|
||||
{
|
||||
IList childValues = this.GetChildValues(node);
|
||||
object first = childValues[0];
|
||||
|
||||
if (childValues.Count == 1 && !(first is MemberElement))
|
||||
{
|
||||
node.AddValue(first);
|
||||
}
|
||||
else
|
||||
{
|
||||
InvocationListElement list = new InvocationListElement(childValues, _myServices);
|
||||
node.AddValue(list);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitIndexExpression(Production node)
|
||||
{
|
||||
IList childValues = this.GetChildValues(node);
|
||||
ArgumentList args = new ArgumentList(childValues);
|
||||
IndexerElement e = new IndexerElement(args);
|
||||
node.AddValue(e);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitMemberAccessExpression(Production node)
|
||||
{
|
||||
node.AddValue(node.GetChildAt(1).GetValue(0));
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitSpecialFunctionExpression(Production node)
|
||||
{
|
||||
this.AddFirstChildValue(node);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitIfExpression(Production node)
|
||||
{
|
||||
IList childValues = this.GetChildValues(node);
|
||||
ConditionalElement op = new ConditionalElement((ExpressionElement)childValues[0], (ExpressionElement)childValues[1], (ExpressionElement)childValues[2]);
|
||||
node.AddValue(op);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitInExpression(Production node)
|
||||
{
|
||||
IList childValues = this.GetChildValues(node);
|
||||
|
||||
if (childValues.Count == 1)
|
||||
{
|
||||
this.AddFirstChildValue(node);
|
||||
return node;
|
||||
}
|
||||
|
||||
ExpressionElement operand = (ExpressionElement)childValues[0];
|
||||
childValues.RemoveAt(0);
|
||||
|
||||
object second = childValues[0];
|
||||
InElement op = default(InElement);
|
||||
|
||||
if ((second) is IList)
|
||||
{
|
||||
op = new InElement(operand, (IList)second);
|
||||
}
|
||||
else
|
||||
{
|
||||
InvocationListElement il = new InvocationListElement(childValues, _myServices);
|
||||
op = new InElement(operand, il);
|
||||
}
|
||||
|
||||
node.AddValue(op);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitInTargetExpression(Production node)
|
||||
{
|
||||
this.AddFirstChildValue(node);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitInListTargetExpression(Production node)
|
||||
{
|
||||
IList childValues = this.GetChildValues(node);
|
||||
node.AddValue(childValues);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitCastExpression(Production node)
|
||||
{
|
||||
IList childValues = this.GetChildValues(node);
|
||||
string[] destTypeParts = (string[])childValues[1];
|
||||
bool isArray = (bool)childValues[2];
|
||||
CastElement op = new CastElement((ExpressionElement)childValues[0], destTypeParts, isArray, _myServices);
|
||||
node.AddValue(op);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitCastTypeExpression(Production node)
|
||||
{
|
||||
IList childValues = this.GetChildValues(node);
|
||||
List<string> parts = new List<string>();
|
||||
|
||||
foreach (string part in childValues)
|
||||
{
|
||||
parts.Add(part);
|
||||
}
|
||||
|
||||
bool isArray = false;
|
||||
|
||||
if (parts[parts.Count - 1] == "[]")
|
||||
{
|
||||
isArray = true;
|
||||
parts.RemoveAt(parts.Count - 1);
|
||||
}
|
||||
|
||||
node.AddValue(parts.ToArray());
|
||||
node.AddValue(isArray);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitMemberFunctionExpression(Production node)
|
||||
{
|
||||
this.AddFirstChildValue(node);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitFieldPropertyExpression(Production node)
|
||||
{
|
||||
//string name = ((Token)node.GetChildAt(0))?.Image;
|
||||
string name = node.GetChildAt(0).GetValue(0).ToString();
|
||||
IdentifierElement elem = new IdentifierElement(name);
|
||||
node.AddValue(elem);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitFunctionCallExpression(Production node)
|
||||
{
|
||||
IList childValues = this.GetChildValues(node);
|
||||
string name = (string)childValues[0];
|
||||
childValues.RemoveAt(0);
|
||||
ArgumentList args = new ArgumentList(childValues);
|
||||
FunctionCallElement funcCall = new FunctionCallElement(name, args);
|
||||
node.AddValue(funcCall);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitArgumentList(Production node)
|
||||
{
|
||||
IList childValues = this.GetChildValues(node);
|
||||
node.AddValues((ArrayList)childValues);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitBasicExpression(Production node)
|
||||
{
|
||||
this.AddFirstChildValue(node);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitLiteralExpression(Production node)
|
||||
{
|
||||
this.AddFirstChildValue(node);
|
||||
return node;
|
||||
}
|
||||
|
||||
private void AddFirstChildValue(Production node)
|
||||
{
|
||||
node.AddValue(this.GetChildAt(node, 0).Values[0]);
|
||||
}
|
||||
|
||||
private void AddUnaryOp(Production node, Type elementType)
|
||||
{
|
||||
IList childValues = this.GetChildValues(node);
|
||||
|
||||
if (childValues.Count == 2)
|
||||
{
|
||||
UnaryElement element = (UnaryElement)Activator.CreateInstance(elementType);
|
||||
element.SetChild((ExpressionElement)childValues[1]);
|
||||
node.AddValue(element);
|
||||
}
|
||||
else
|
||||
{
|
||||
node.AddValue(childValues[0]);
|
||||
}
|
||||
}
|
||||
|
||||
private void AddBinaryOp(Production node, Type elementType)
|
||||
{
|
||||
IList childValues = this.GetChildValues(node);
|
||||
|
||||
if (childValues.Count > 1)
|
||||
{
|
||||
BinaryExpressionElement e = BinaryExpressionElement.CreateElement(childValues, elementType);
|
||||
node.AddValue(e);
|
||||
}
|
||||
else if (childValues.Count == 1)
|
||||
{
|
||||
node.AddValue(childValues[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
Debug.Assert(false, "wrong number of chilren");
|
||||
}
|
||||
}
|
||||
|
||||
public override Node ExitReal(Token node)
|
||||
{
|
||||
string image = node.Image;
|
||||
LiteralElement element = RealLiteralElement.Create(image, _myServices);
|
||||
|
||||
node.AddValue(element);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitInteger(Token node)
|
||||
{
|
||||
LiteralElement element = IntegralLiteralElement.Create(node.Image, false, _myInUnaryNegate, _myServices);
|
||||
node.AddValue(element);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitHexliteral(Token node)
|
||||
{
|
||||
LiteralElement element = IntegralLiteralElement.Create(node.Image, true, _myInUnaryNegate, _myServices);
|
||||
node.AddValue(element);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitBooleanLiteralExpression(Production node)
|
||||
{
|
||||
this.AddFirstChildValue(node);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitTrue(Token node)
|
||||
{
|
||||
node.AddValue(new BooleanLiteralElement(true));
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitFalse(Token node)
|
||||
{
|
||||
node.AddValue(new BooleanLiteralElement(false));
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitStringLiteral(Token node)
|
||||
{
|
||||
string s = this.DoEscapes(node.Image);
|
||||
StringLiteralElement element = new StringLiteralElement(s);
|
||||
node.AddValue(element);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitCharLiteral(Token node)
|
||||
{
|
||||
string s = this.DoEscapes(node.Image);
|
||||
node.AddValue(new CharLiteralElement(s[0]));
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitDatetime(Token node)
|
||||
{
|
||||
ExpressionContext context = (ExpressionContext)_myServices.GetService(typeof(ExpressionContext));
|
||||
string image = node.Image.Substring(1, node.Image.Length - 2);
|
||||
DateTimeLiteralElement element = new DateTimeLiteralElement(image, context);
|
||||
node.AddValue(element);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitTimespan(Token node)
|
||||
{
|
||||
string image = node.Image.Substring(2, node.Image.Length - 3);
|
||||
TimeSpanLiteralElement element = new TimeSpanLiteralElement(image);
|
||||
node.AddValue(element);
|
||||
return node;
|
||||
}
|
||||
|
||||
private string DoEscapes(string image)
|
||||
{
|
||||
// Remove outer quotes
|
||||
image = image.Substring(1, image.Length - 2);
|
||||
image = _myUnicodeEscapeRegex.Replace(image, UnicodeEscapeMatcher);
|
||||
image = _myRegularEscapeRegex.Replace(image, RegularEscapeMatcher);
|
||||
return image;
|
||||
}
|
||||
|
||||
private string RegularEscapeMatcher(Match m)
|
||||
{
|
||||
string s = m.Value;
|
||||
// Remove leading \
|
||||
s = s.Remove(0, 1);
|
||||
|
||||
switch (s)
|
||||
{
|
||||
case "\\":
|
||||
case "\"":
|
||||
case "'":
|
||||
return s;
|
||||
case "t":
|
||||
case "T":
|
||||
return Convert.ToChar(9).ToString();
|
||||
case "n":
|
||||
case "N":
|
||||
return Convert.ToChar(10).ToString();
|
||||
case "r":
|
||||
case "R":
|
||||
return Convert.ToChar(13).ToString();
|
||||
default:
|
||||
Debug.Assert(false, "Unrecognized escape sequence");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private string UnicodeEscapeMatcher(Match m)
|
||||
{
|
||||
string s = m.Value;
|
||||
// Remove \u
|
||||
s = s.Remove(0, 2);
|
||||
int code = int.Parse(s, NumberStyles.AllowHexSpecifier);
|
||||
char c = Convert.ToChar(code);
|
||||
return c.ToString();
|
||||
}
|
||||
|
||||
public override Node ExitIdentifier(Token node)
|
||||
{
|
||||
node.AddValue(node.Image);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitNullLiteral(Token node)
|
||||
{
|
||||
node.AddValue(new NullLiteralElement());
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitArrayBraces(Token node)
|
||||
{
|
||||
node.AddValue("[]");
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitAdd(Token node)
|
||||
{
|
||||
node.AddValue(BinaryArithmeticOperation.Add);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitSub(Token node)
|
||||
{
|
||||
node.AddValue(BinaryArithmeticOperation.Subtract);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitMul(Token node)
|
||||
{
|
||||
node.AddValue(BinaryArithmeticOperation.Multiply);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitDiv(Token node)
|
||||
{
|
||||
node.AddValue(BinaryArithmeticOperation.Divide);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitMod(Token node)
|
||||
{
|
||||
node.AddValue(BinaryArithmeticOperation.Mod);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitPower(Token node)
|
||||
{
|
||||
node.AddValue(BinaryArithmeticOperation.Power);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitEq(Token node)
|
||||
{
|
||||
node.AddValue(LogicalCompareOperation.Equal);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitNe(Token node)
|
||||
{
|
||||
node.AddValue(LogicalCompareOperation.NotEqual);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitLt(Token node)
|
||||
{
|
||||
node.AddValue(LogicalCompareOperation.LessThan);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitGt(Token node)
|
||||
{
|
||||
node.AddValue(LogicalCompareOperation.GreaterThan);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitLte(Token node)
|
||||
{
|
||||
node.AddValue(LogicalCompareOperation.LessThanOrEqual);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitGte(Token node)
|
||||
{
|
||||
node.AddValue(LogicalCompareOperation.GreaterThanOrEqual);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitAnd(Token node)
|
||||
{
|
||||
node.AddValue(AndOrOperation.And);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitOr(Token node)
|
||||
{
|
||||
node.AddValue(AndOrOperation.Or);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitXor(Token node)
|
||||
{
|
||||
node.AddValue("Xor");
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitNot(Token node)
|
||||
{
|
||||
node.AddValue(string.Empty);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitLeftShift(Token node)
|
||||
{
|
||||
node.AddValue(ShiftOperation.LeftShift);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override Node ExitRightShift(Token node)
|
||||
{
|
||||
node.AddValue(ShiftOperation.RightShift);
|
||||
return node;
|
||||
}
|
||||
|
||||
public override void Child(Production node, Node child)
|
||||
{
|
||||
base.Child(node, child);
|
||||
_myInUnaryNegate = node.Id == (int)ExpressionConstants.NEGATE_EXPRESSION & child.Id == (int)ExpressionConstants.SUB;
|
||||
}
|
||||
}
|
||||
}
|
49
Parsing/CustomTokenPatterns.cs
Normal file
49
Parsing/CustomTokenPatterns.cs
Normal file
@@ -0,0 +1,49 @@
|
||||
using Flee.PublicTypes;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
internal abstract class CustomTokenPattern : TokenPattern
|
||||
{
|
||||
protected CustomTokenPattern(int id, string name, PatternType type, string pattern) : base(id, name, type, pattern)
|
||||
{
|
||||
}
|
||||
|
||||
public void Initialize(int id, string name, PatternType type, string pattern, ExpressionContext context)
|
||||
{
|
||||
this.ComputeToken(id, name, type, pattern, context);
|
||||
}
|
||||
|
||||
protected abstract void ComputeToken(int id, string name, PatternType type, string pattern, ExpressionContext context);
|
||||
}
|
||||
|
||||
internal class RealPattern : CustomTokenPattern
|
||||
{
|
||||
public RealPattern(int id, string name, PatternType type, string pattern) : base(id, name, type, pattern)
|
||||
{
|
||||
}
|
||||
|
||||
protected override void ComputeToken(int id, string name, PatternType type, string pattern, ExpressionContext context)
|
||||
{
|
||||
ExpressionParserOptions options = context.ParserOptions;
|
||||
|
||||
char digitsBeforePattern = (options.RequireDigitsBeforeDecimalPoint ? '+' : '*');
|
||||
|
||||
pattern = string.Format(pattern, digitsBeforePattern, options.DecimalSeparator);
|
||||
|
||||
this.SetData(id, name, type, pattern);
|
||||
}
|
||||
}
|
||||
|
||||
internal class ArgumentSeparatorPattern : CustomTokenPattern
|
||||
{
|
||||
public ArgumentSeparatorPattern(int id, string name, PatternType type, string pattern) : base(id, name, type, pattern)
|
||||
{
|
||||
}
|
||||
|
||||
protected override void ComputeToken(int id, string name, PatternType type, string pattern, ExpressionContext context)
|
||||
{
|
||||
ExpressionParserOptions options = context.ParserOptions;
|
||||
this.SetData(id, name, type, options.FunctionArgumentSeparator.ToString());
|
||||
}
|
||||
}
|
||||
}
|
19
Parsing/Element.cs
Normal file
19
Parsing/Element.cs
Normal file
@@ -0,0 +1,19 @@
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A regular expression element. This is the common base class for
|
||||
* all regular expression elements, i.e. the parts of the regular
|
||||
* expression.
|
||||
*/
|
||||
internal abstract class Element : ICloneable
|
||||
{
|
||||
public abstract object Clone();
|
||||
|
||||
public abstract int Match(Matcher m,
|
||||
ReaderBuffer buffer,
|
||||
int start,
|
||||
int skip);
|
||||
|
||||
public abstract void PrintTo(TextWriter output, string indent);
|
||||
}
|
||||
}
|
133
Parsing/Expression.grammar
Normal file
133
Parsing/Expression.grammar
Normal file
@@ -0,0 +1,133 @@
|
||||
%header%
|
||||
|
||||
DESCRIPTION = "A general expression grammar"
|
||||
AUTHOR = "Eugene Ciloci"
|
||||
VERSION = "1.0"
|
||||
DATE = "May 2007"
|
||||
|
||||
GRAMMARTYPE = "LL"
|
||||
CASESENSITIVE = "False"
|
||||
|
||||
LICENSE = "This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public License
|
||||
as published by the Free Software Foundation; either version 2.1
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
||||
MA 02111-1307, USA.
|
||||
"
|
||||
|
||||
COPYRIGHT = "Copyright (c) 2007 Eugene Ciloci"
|
||||
|
||||
%tokens%
|
||||
ADD = "+"
|
||||
SUB = "-"
|
||||
MUL = "*"
|
||||
DIV = "/"
|
||||
POWER = "^"
|
||||
MOD = "%"
|
||||
LEFT_PAREN = "("
|
||||
RIGHT_PAREN = ")"
|
||||
LEFT_BRACE = "["
|
||||
RIGHT_BRACE = "]"
|
||||
EQ = "="
|
||||
LT = "<"
|
||||
GT = ">"
|
||||
LTE = "<="
|
||||
GTE = ">="
|
||||
NE = "<>"
|
||||
AND = "AND"
|
||||
OR = "OR"
|
||||
XOR = "XOR"
|
||||
NOT = "NOT"
|
||||
IN = "in"
|
||||
DOT = "."
|
||||
ARGUMENT_SEPARATOR = ","
|
||||
ARRAY_BRACES = "[]"
|
||||
LEFT_SHIFT = "<<"
|
||||
RIGHT_SHIFT = ">>"
|
||||
WHITESPACE = <<\s+>> %ignore%
|
||||
|
||||
// Primitives
|
||||
INTEGER = <<\d+(u|l|ul|lu)?>>
|
||||
REAL = <<\d*\.\d+([e][+-]\d{1,3})?f?>>
|
||||
STRING_LITERAL = <<"([^"\r\n\\]|\\u[0-9a-f]{4}|\\[\\"'trn])*">>
|
||||
CHAR_LITERAL = <<'([^'\r\n\\]|\\u[0-9a-f]{4}|\\[\\"'trn])'>>
|
||||
TRUE = "True"
|
||||
FALSE = "False"
|
||||
IDENTIFIER = <<[a-z_]\w*>>
|
||||
HEX_LITERAL = <<0x[0-9a-f]+(u|l|ul|lu)?>>
|
||||
NULL_LITERAL = "null"
|
||||
TIMESPAN = <<##(\d+\.)?\d{2}:\d{2}(:\d{2}(\.\d{1,7})?)?#>>
|
||||
DATETIME = <<#[^#]+#>>
|
||||
|
||||
// Special Functions
|
||||
IF = "if"
|
||||
CAST = "cast"
|
||||
|
||||
%productions%
|
||||
|
||||
Expression = XorExpression;
|
||||
|
||||
XorExpression = OrExpression {XOR OrExpression};
|
||||
|
||||
OrExpression = AndExpression {OR AndExpression};
|
||||
|
||||
AndExpression = NotExpression {AND NotExpression};
|
||||
|
||||
NotExpression = NOT? InExpression;
|
||||
|
||||
InExpression = CompareExpression [IN InTargetExpression];
|
||||
|
||||
InTargetExpression = FieldPropertyExpression | InListTargetExpression;
|
||||
|
||||
InListTargetExpression = "(" ArgumentList ")";
|
||||
|
||||
CompareExpression = ShiftExpression {("=" | ">" | "<" | ">=" | "<=" | "<>") ShiftExpression};
|
||||
|
||||
ShiftExpression = AdditiveExpression {("<<" | ">>") AdditiveExpression};
|
||||
|
||||
AdditiveExpression = MultiplicativeExpression {("+" | "-") MultiplicativeExpression};
|
||||
|
||||
MultiplicativeExpression = PowerExpression {("*" | "/" | "%") PowerExpression};
|
||||
|
||||
PowerExpression = NegateExpression {"^" NegateExpression};
|
||||
|
||||
NegateExpression = "-"? MemberExpression;
|
||||
|
||||
MemberExpression = BasicExpression {MemberAccessExpression | IndexExpression};
|
||||
|
||||
MemberAccessExpression = "." MemberFunctionExpression;
|
||||
|
||||
BasicExpression = LiteralExpression | ExpressionGroup | MemberFunctionExpression | SpecialFunctionExpression;
|
||||
|
||||
MemberFunctionExpression = FieldPropertyExpression | FunctionCallExpression;
|
||||
|
||||
FieldPropertyExpression = IDENTIFIER;
|
||||
|
||||
SpecialFunctionExpression = IfExpression | CastExpression;
|
||||
|
||||
IfExpression = IF "(" Expression "," Expression "," Expression ")";
|
||||
|
||||
CastExpression = CAST "(" Expression "," CastTypeExpression ")";
|
||||
|
||||
CastTypeExpression = IDENTIFIER {"." IDENTIFIER} ARRAY_BRACES?;
|
||||
|
||||
IndexExpression = "[" ArgumentList "]";
|
||||
|
||||
FunctionCallExpression = IDENTIFIER "(" ArgumentList? ")";
|
||||
|
||||
ArgumentList = Expression {"," Expression};
|
||||
|
||||
LiteralExpression = INTEGER | REAL | STRING_LITERAL | BooleanLiteralExpression | HEX_LITERAL | CHAR_LITERAL | NULL_LITERAL | DATETIME | TIMESPAN;
|
||||
|
||||
BooleanLiteralExpression = TRUE | FALSE;
|
||||
|
||||
ExpressionGroup = "(" Expression ")";
|
1395
Parsing/ExpressionAnalyzer.cs
Normal file
1395
Parsing/ExpressionAnalyzer.cs
Normal file
File diff suppressed because it is too large
Load Diff
78
Parsing/ExpressionConstants.cs
Normal file
78
Parsing/ExpressionConstants.cs
Normal file
@@ -0,0 +1,78 @@
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
///<remarks>
|
||||
/// An enumeration with token and production node
|
||||
///constants.</remarks>
|
||||
internal enum ExpressionConstants
|
||||
{
|
||||
ADD = 1001,
|
||||
SUB = 1002,
|
||||
MUL = 1003,
|
||||
DIV = 1004,
|
||||
POWER = 1005,
|
||||
MOD = 1006,
|
||||
LEFT_PAREN = 1007,
|
||||
RIGHT_PAREN = 1008,
|
||||
LEFT_BRACE = 1009,
|
||||
RIGHT_BRACE = 1010,
|
||||
EQ = 1011,
|
||||
LT = 1012,
|
||||
GT = 1013,
|
||||
LTE = 1014,
|
||||
GTE = 1015,
|
||||
NE = 1016,
|
||||
AND = 1017,
|
||||
OR = 1018,
|
||||
XOR = 1019,
|
||||
NOT = 1020,
|
||||
IN = 1021,
|
||||
DOT = 1022,
|
||||
ARGUMENT_SEPARATOR = 1023,
|
||||
ARRAY_BRACES = 1024,
|
||||
LEFT_SHIFT = 1025,
|
||||
RIGHT_SHIFT = 1026,
|
||||
WHITESPACE = 1027,
|
||||
INTEGER = 1028,
|
||||
REAL = 1029,
|
||||
STRING_LITERAL = 1030,
|
||||
CHAR_LITERAL = 1031,
|
||||
TRUE = 1032,
|
||||
FALSE = 1033,
|
||||
NULL_LITERAL = 1034,
|
||||
HEX_LITERAL = 1035,
|
||||
IDENTIFIER = 1036,
|
||||
TIMESPAN = 1037,
|
||||
DATETIME = 1038,
|
||||
IF = 1039,
|
||||
CAST = 1040,
|
||||
EXPRESSION = 2001,
|
||||
XOR_EXPRESSION = 2002,
|
||||
OR_EXPRESSION = 2003,
|
||||
AND_EXPRESSION = 2004,
|
||||
NOT_EXPRESSION = 2005,
|
||||
IN_EXPRESSION = 2006,
|
||||
IN_TARGET_EXPRESSION = 2007,
|
||||
IN_LIST_TARGET_EXPRESSION = 2008,
|
||||
COMPARE_EXPRESSION = 2009,
|
||||
SHIFT_EXPRESSION = 2010,
|
||||
ADDITIVE_EXPRESSION = 2011,
|
||||
MULTIPLICATIVE_EXPRESSION = 2012,
|
||||
POWER_EXPRESSION = 2013,
|
||||
NEGATE_EXPRESSION = 2014,
|
||||
MEMBER_EXPRESSION = 2015,
|
||||
MEMBER_ACCESS_EXPRESSION = 2016,
|
||||
BASIC_EXPRESSION = 2017,
|
||||
MEMBER_FUNCTION_EXPRESSION = 2018,
|
||||
FIELD_PROPERTY_EXPRESSION = 2019,
|
||||
SPECIAL_FUNCTION_EXPRESSION = 2020,
|
||||
IF_EXPRESSION = 2021,
|
||||
CAST_EXPRESSION = 2022,
|
||||
CAST_TYPE_EXPRESSION = 2023,
|
||||
INDEX_EXPRESSION = 2024,
|
||||
FUNCTION_CALL_EXPRESSION = 2025,
|
||||
ARGUMENT_LIST = 2026,
|
||||
LITERAL_EXPRESSION = 2027,
|
||||
BOOLEAN_LITERAL_EXPRESSION = 2028,
|
||||
EXPRESSION_GROUP = 2029
|
||||
}
|
||||
}
|
460
Parsing/ExpressionParser.cs
Normal file
460
Parsing/ExpressionParser.cs
Normal file
@@ -0,0 +1,460 @@
|
||||
using Flee.PublicTypes;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/// <summary>
|
||||
/// A token stream parser.
|
||||
/// </summary>
|
||||
internal class ExpressionParser : StackParser
|
||||
{
|
||||
private enum SynteticPatterns
|
||||
{
|
||||
SUBPRODUCTION_1 = 3001,
|
||||
SUBPRODUCTION_2 = 3002,
|
||||
SUBPRODUCTION_3 = 3003,
|
||||
SUBPRODUCTION_4 = 3004,
|
||||
SUBPRODUCTION_5 = 3005,
|
||||
SUBPRODUCTION_6 = 3006,
|
||||
SUBPRODUCTION_7 = 3007,
|
||||
SUBPRODUCTION_8 = 3008,
|
||||
SUBPRODUCTION_9 = 3009,
|
||||
SUBPRODUCTION_10 = 3010,
|
||||
SUBPRODUCTION_11 = 3011,
|
||||
SUBPRODUCTION_12 = 3012,
|
||||
SUBPRODUCTION_13 = 3013,
|
||||
SUBPRODUCTION_14 = 3014,
|
||||
SUBPRODUCTION_15 = 3015,
|
||||
SUBPRODUCTION_16 = 3016
|
||||
}
|
||||
|
||||
public ExpressionParser(TextReader input, Analyzer analyzer, ExpressionContext context) : base(new ExpressionTokenizer(input, context), analyzer)
|
||||
{
|
||||
CreatePatterns();
|
||||
}
|
||||
|
||||
public ExpressionParser(TextReader input) : base(new ExpressionTokenizer(input))
|
||||
{
|
||||
CreatePatterns();
|
||||
}
|
||||
|
||||
public ExpressionParser(TextReader input, Analyzer analyzer) : base(new ExpressionTokenizer(input), analyzer)
|
||||
{
|
||||
CreatePatterns();
|
||||
}
|
||||
|
||||
private void CreatePatterns()
|
||||
{
|
||||
ProductionPattern pattern = default(ProductionPattern);
|
||||
ProductionPatternAlternative alt = default(ProductionPatternAlternative);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.EXPRESSION), "Expression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.XOR_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.XOR_EXPRESSION), "XorExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.OR_EXPRESSION), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_1), 0, -1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.OR_EXPRESSION), "OrExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.AND_EXPRESSION), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_2), 0, -1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.AND_EXPRESSION), "AndExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.NOT_EXPRESSION), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_3), 0, -1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.NOT_EXPRESSION), "NotExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.NOT), 0, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.IN_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.IN_EXPRESSION), "InExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.COMPARE_EXPRESSION), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_4), 0, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.IN_TARGET_EXPRESSION), "InTargetExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.FIELD_PROPERTY_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.IN_LIST_TARGET_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.IN_LIST_TARGET_EXPRESSION), "InListTargetExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.LEFT_PAREN), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.ARGUMENT_LIST), 1, 1);
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.RIGHT_PAREN), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.COMPARE_EXPRESSION), "CompareExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.SHIFT_EXPRESSION), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_6), 0, -1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.SHIFT_EXPRESSION), "ShiftExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.ADDITIVE_EXPRESSION), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_8), 0, -1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.ADDITIVE_EXPRESSION), "AdditiveExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.MULTIPLICATIVE_EXPRESSION), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_10), 0, -1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.MULTIPLICATIVE_EXPRESSION), "MultiplicativeExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.POWER_EXPRESSION), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_12), 0, -1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.POWER_EXPRESSION), "PowerExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.NEGATE_EXPRESSION), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_13), 0, -1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.NEGATE_EXPRESSION), "NegateExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.SUB), 0, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.MEMBER_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.MEMBER_EXPRESSION), "MemberExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.BASIC_EXPRESSION), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_14), 0, -1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.MEMBER_ACCESS_EXPRESSION), "MemberAccessExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.DOT), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.MEMBER_FUNCTION_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.BASIC_EXPRESSION), "BasicExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.LITERAL_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION_GROUP), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.MEMBER_FUNCTION_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.SPECIAL_FUNCTION_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.MEMBER_FUNCTION_EXPRESSION), "MemberFunctionExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.FIELD_PROPERTY_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.FUNCTION_CALL_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.FIELD_PROPERTY_EXPRESSION), "FieldPropertyExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.IDENTIFIER), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.SPECIAL_FUNCTION_EXPRESSION), "SpecialFunctionExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.IF_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.CAST_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.IF_EXPRESSION), "IfExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.IF), 1, 1);
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.LEFT_PAREN), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION), 1, 1);
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.ARGUMENT_SEPARATOR), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION), 1, 1);
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.ARGUMENT_SEPARATOR), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION), 1, 1);
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.RIGHT_PAREN), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.CAST_EXPRESSION), "CastExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.CAST), 1, 1);
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.LEFT_PAREN), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION), 1, 1);
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.ARGUMENT_SEPARATOR), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.CAST_TYPE_EXPRESSION), 1, 1);
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.RIGHT_PAREN), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.CAST_TYPE_EXPRESSION), "CastTypeExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.IDENTIFIER), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_15), 0, -1);
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.ARRAY_BRACES), 0, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.INDEX_EXPRESSION), "IndexExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.LEFT_BRACE), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.ARGUMENT_LIST), 1, 1);
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.RIGHT_BRACE), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.FUNCTION_CALL_EXPRESSION), "FunctionCallExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.IDENTIFIER), 1, 1);
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.LEFT_PAREN), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.ARGUMENT_LIST), 0, 1);
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.RIGHT_PAREN), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.ARGUMENT_LIST), "ArgumentList");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_16), 0, -1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.LITERAL_EXPRESSION), "LiteralExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.INTEGER), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.REAL), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.STRING_LITERAL), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.BOOLEAN_LITERAL_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.HEX_LITERAL), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.CHAR_LITERAL), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.NULL_LITERAL), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.DATETIME), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.TIMESPAN), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.BOOLEAN_LITERAL_EXPRESSION), "BooleanLiteralExpression");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.TRUE), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.FALSE), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.EXPRESSION_GROUP), "ExpressionGroup");
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.LEFT_PAREN), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION), 1, 1);
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.RIGHT_PAREN), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_1), "Subproduction1");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.XOR), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.OR_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_2), "Subproduction2");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.OR), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.AND_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_3), "Subproduction3");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.AND), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.NOT_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_4), "Subproduction4");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.IN), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.IN_TARGET_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_5), "Subproduction5");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.EQ), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.GT), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.LT), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.GTE), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.LTE), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.NE), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_6), "Subproduction6");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_5), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.SHIFT_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_7), "Subproduction7");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.LEFT_SHIFT), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.RIGHT_SHIFT), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_8), "Subproduction8");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_7), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.ADDITIVE_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_9), "Subproduction9");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.ADD), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.SUB), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_10), "Subproduction10");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_9), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.MULTIPLICATIVE_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_11), "Subproduction11");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.MUL), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.DIV), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.MOD), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_12), "Subproduction12");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_11), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.POWER_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_13), "Subproduction13");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.POWER), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.NEGATE_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_14), "Subproduction14");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.MEMBER_ACCESS_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.INDEX_EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_15), "Subproduction15");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.DOT), 1, 1);
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.IDENTIFIER), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_16), "Subproduction16");
|
||||
pattern.Synthetic = true;
|
||||
alt = new ProductionPatternAlternative();
|
||||
alt.AddToken(Convert.ToInt32(ExpressionConstants.ARGUMENT_SEPARATOR), 1, 1);
|
||||
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION), 1, 1);
|
||||
pattern.AddAlternative(alt);
|
||||
AddPattern(pattern);
|
||||
}
|
||||
}
|
||||
}
|
153
Parsing/ExpressionTokenizer.cs
Normal file
153
Parsing/ExpressionTokenizer.cs
Normal file
@@ -0,0 +1,153 @@
|
||||
using Flee.PublicTypes;
|
||||
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/// <summary>
|
||||
/// A character stream tokenizer.
|
||||
/// </summary>
|
||||
internal class ExpressionTokenizer : Tokenizer
|
||||
{
|
||||
private readonly ExpressionContext _myContext;
|
||||
|
||||
public ExpressionTokenizer(TextReader input, ExpressionContext context) : base(input, true)
|
||||
{
|
||||
_myContext = context;
|
||||
CreatePatterns();
|
||||
}
|
||||
|
||||
public ExpressionTokenizer(TextReader input) : base(input, true)
|
||||
{
|
||||
CreatePatterns();
|
||||
}
|
||||
|
||||
private void CreatePatterns()
|
||||
{
|
||||
TokenPattern pattern = default(TokenPattern);
|
||||
CustomTokenPattern customPattern = default(CustomTokenPattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.ADD), "ADD", TokenPattern.PatternType.STRING, "+");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.SUB), "SUB", TokenPattern.PatternType.STRING, "-");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.MUL), "MUL", TokenPattern.PatternType.STRING, "*");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.DIV), "DIV", TokenPattern.PatternType.STRING, "/");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.POWER), "POWER", TokenPattern.PatternType.STRING, "^");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.MOD), "MOD", TokenPattern.PatternType.STRING, "%");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.LEFT_PAREN), "LEFT_PAREN", TokenPattern.PatternType.STRING, "(");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.RIGHT_PAREN), "RIGHT_PAREN", TokenPattern.PatternType.STRING, ")");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.LEFT_BRACE), "LEFT_BRACE", TokenPattern.PatternType.STRING, "[");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.RIGHT_BRACE), "RIGHT_BRACE", TokenPattern.PatternType.STRING, "]");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.EQ), "EQ", TokenPattern.PatternType.STRING, "=");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.LT), "LT", TokenPattern.PatternType.STRING, "<");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.GT), "GT", TokenPattern.PatternType.STRING, ">");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.LTE), "LTE", TokenPattern.PatternType.STRING, "<=");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.GTE), "GTE", TokenPattern.PatternType.STRING, ">=");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.NE), "NE", TokenPattern.PatternType.STRING, "<>");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.AND), "AND", TokenPattern.PatternType.STRING, "AND");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.OR), "OR", TokenPattern.PatternType.STRING, "OR");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.XOR), "XOR", TokenPattern.PatternType.STRING, "XOR");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.NOT), "NOT", TokenPattern.PatternType.STRING, "NOT");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.IN), "IN", TokenPattern.PatternType.STRING, "in");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.DOT), "DOT", TokenPattern.PatternType.STRING, ".");
|
||||
AddPattern(pattern);
|
||||
|
||||
customPattern = new ArgumentSeparatorPattern(Convert.ToInt32(ExpressionConstants.ARGUMENT_SEPARATOR), "ARGUMENT_SEPARATOR", TokenPattern.PatternType.STRING, ",");
|
||||
customPattern.Initialize(Convert.ToInt32(ExpressionConstants.ARGUMENT_SEPARATOR), "ARGUMENT_SEPARATOR", TokenPattern.PatternType.STRING, ",", _myContext);
|
||||
AddPattern(customPattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.ARRAY_BRACES), "ARRAY_BRACES", TokenPattern.PatternType.STRING, "[]");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.LEFT_SHIFT), "LEFT_SHIFT", TokenPattern.PatternType.STRING, "<<");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.RIGHT_SHIFT), "RIGHT_SHIFT", TokenPattern.PatternType.STRING, ">>");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.WHITESPACE), "WHITESPACE", TokenPattern.PatternType.REGEXP, "\\s+");
|
||||
pattern.Ignore = true;
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.INTEGER), "INTEGER", TokenPattern.PatternType.REGEXP, "\\d+(u|l|ul|lu|f|m)?");
|
||||
AddPattern(pattern);
|
||||
|
||||
customPattern = new RealPattern(Convert.ToInt32(ExpressionConstants.REAL), "REAL", TokenPattern.PatternType.REGEXP, "\\d{0}\\{1}\\d+([e][+-]\\d{{1,3}})?(d|f|m)?");
|
||||
customPattern.Initialize(Convert.ToInt32(ExpressionConstants.REAL), "REAL", TokenPattern.PatternType.REGEXP, "\\d{0}\\{1}\\d+([e][+-]\\d{{1,3}})?(d|f|m)?", _myContext);
|
||||
AddPattern(customPattern, false);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.STRING_LITERAL), "STRING_LITERAL", TokenPattern.PatternType.REGEXP, "\"([^\"\\r\\n\\\\]|\\\\u[0-9a-f]{4}|\\\\[\\\\\"'trn])*\"");
|
||||
AddPattern(pattern, false);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.CHAR_LITERAL), "CHAR_LITERAL", TokenPattern.PatternType.REGEXP, "'([^'\\r\\n\\\\]|\\\\u[0-9a-f]{4}|\\\\[\\\\\"'trn])'");
|
||||
AddPattern(pattern, false);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.TRUE), "TRUE", TokenPattern.PatternType.STRING, "True");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.FALSE), "FALSE", TokenPattern.PatternType.STRING, "False");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.IDENTIFIER), "IDENTIFIER", TokenPattern.PatternType.REGEXP, "[a-z_]\\w*");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.HEX_LITERAL), "HEX_LITERAL", TokenPattern.PatternType.REGEXP, "0x[0-9a-f]+(u|l|ul|lu)?");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.NULL_LITERAL), "NULL_LITERAL", TokenPattern.PatternType.STRING, "null");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.TIMESPAN), "TIMESPAN", TokenPattern.PatternType.REGEXP, "##(\\d+\\.)?\\d{2}:\\d{2}(:\\d{2}(\\.\\d{1,7})?)?#");
|
||||
AddPattern(pattern, false);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.DATETIME), "DATETIME", TokenPattern.PatternType.REGEXP, "#[^#]+#");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.IF), "IF", TokenPattern.PatternType.STRING, "if");
|
||||
AddPattern(pattern);
|
||||
|
||||
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.CAST), "CAST", TokenPattern.PatternType.STRING, "cast");
|
||||
AddPattern(pattern);
|
||||
}
|
||||
}
|
||||
}
|
226
Parsing/LookAheadReader.cs
Normal file
226
Parsing/LookAheadReader.cs
Normal file
@@ -0,0 +1,226 @@
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
// * A look-ahead character stream reader. This class provides the
|
||||
// * functionalities of a buffered line-number reader, but with the
|
||||
// * additional possibility of peeking an unlimited number of
|
||||
// * characters ahead. When looking further and further ahead in the
|
||||
// * character stream, the buffer is continously enlarged to contain
|
||||
// * all the required characters from the current position an
|
||||
// * onwards. This means that looking more characters ahead requires
|
||||
// * more memory, and thus becomes unviable in the end.
|
||||
internal class LookAheadReader : TextReader
|
||||
{
|
||||
private const int StreamBlockSize = 4096;
|
||||
private const int BufferBlockSize = 1024;
|
||||
private char[] _buffer = new char[StreamBlockSize];
|
||||
private int _pos;
|
||||
private int _length;
|
||||
private TextReader _input = null;
|
||||
private int _line = 1;
|
||||
private int _column = 1;
|
||||
|
||||
public LookAheadReader(TextReader input) : base()
|
||||
{
|
||||
this._input = input;
|
||||
}
|
||||
|
||||
public int LineNumber => _line;
|
||||
|
||||
public int ColumnNumber => _column;
|
||||
|
||||
public override int Read()
|
||||
{
|
||||
ReadAhead(1);
|
||||
if (_pos >= _length)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
UpdateLineColumnNumbers(1);
|
||||
return Convert.ToInt32(_buffer[System.Math.Max(System.Threading.Interlocked.Increment(ref _pos), _pos - 1)]);
|
||||
}
|
||||
}
|
||||
|
||||
public override int Read(char[] cbuf, int off, int len)
|
||||
{
|
||||
ReadAhead(len);
|
||||
if (_pos >= _length)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
var count = _length - _pos;
|
||||
if (count > len)
|
||||
{
|
||||
count = len;
|
||||
}
|
||||
UpdateLineColumnNumbers(count);
|
||||
Array.Copy(_buffer, _pos, cbuf, off, count);
|
||||
_pos += count;
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
||||
public string ReadString(int len)
|
||||
{
|
||||
ReadAhead(len);
|
||||
if (_pos >= _length)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
else
|
||||
{
|
||||
var count = _length - _pos;
|
||||
if (count > len)
|
||||
{
|
||||
count = len;
|
||||
}
|
||||
UpdateLineColumnNumbers(count);
|
||||
var result = new string(_buffer, _pos, count);
|
||||
_pos += count;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
public override int Peek()
|
||||
{
|
||||
return Peek(0);
|
||||
}
|
||||
|
||||
public int Peek(int off)
|
||||
{
|
||||
ReadAhead(off + 1);
|
||||
if (_pos + off >= _length)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return Convert.ToInt32(_buffer[_pos + off]);
|
||||
}
|
||||
}
|
||||
|
||||
public string PeekString(int off, int len)
|
||||
{
|
||||
ReadAhead(off + len + 1);
|
||||
if (_pos + off >= _length)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
else
|
||||
{
|
||||
var count = _length - (_pos + off);
|
||||
if (count > len)
|
||||
{
|
||||
count = len;
|
||||
}
|
||||
return new string(_buffer, _pos + off, count);
|
||||
}
|
||||
}
|
||||
|
||||
public override void Close()
|
||||
{
|
||||
_buffer = null;
|
||||
_pos = 0;
|
||||
_length = 0;
|
||||
if (_input != null)
|
||||
{
|
||||
_input.Close();
|
||||
_input = null;
|
||||
}
|
||||
}
|
||||
|
||||
private void ReadAhead(int offset)
|
||||
{
|
||||
int size = 0;
|
||||
int readSize = 0;
|
||||
|
||||
// Check for end of stream or already read characters
|
||||
if (_input == null || _pos + offset < _length)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Remove old characters from buffer
|
||||
if (_pos > BufferBlockSize)
|
||||
{
|
||||
Array.Copy(_buffer, _pos, _buffer, 0, _length - _pos);
|
||||
_length -= _pos;
|
||||
_pos = 0;
|
||||
}
|
||||
|
||||
// Calculate number of characters to read
|
||||
size = _pos + offset - _length + 1;
|
||||
if (size % StreamBlockSize != 0)
|
||||
{
|
||||
size = (size / StreamBlockSize) * StreamBlockSize;
|
||||
size += StreamBlockSize;
|
||||
}
|
||||
EnsureBufferCapacity(_length + size);
|
||||
|
||||
// Read characters
|
||||
try
|
||||
{
|
||||
readSize = _input.Read(_buffer, _length, size);
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
_input = null;
|
||||
throw;
|
||||
}
|
||||
|
||||
// Append characters to buffer
|
||||
if (readSize > 0)
|
||||
{
|
||||
_length += readSize;
|
||||
}
|
||||
if (readSize < size)
|
||||
{
|
||||
try
|
||||
{
|
||||
_input.Close();
|
||||
}
|
||||
finally
|
||||
{
|
||||
_input = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void EnsureBufferCapacity(int size)
|
||||
{
|
||||
char[] newbuf = null;
|
||||
|
||||
if (_buffer.Length >= size)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (size % BufferBlockSize != 0)
|
||||
{
|
||||
size = (size / BufferBlockSize) * BufferBlockSize;
|
||||
size += BufferBlockSize;
|
||||
}
|
||||
newbuf = new char[size];
|
||||
Array.Copy(_buffer, 0, newbuf, 0, _length);
|
||||
_buffer = newbuf;
|
||||
}
|
||||
|
||||
private void UpdateLineColumnNumbers(int offset)
|
||||
{
|
||||
for (int i = 0; i <= offset - 1; i++)
|
||||
{
|
||||
if (_buffer.Contains(_buffer[_pos + i]))
|
||||
{
|
||||
_line += 1;
|
||||
_column = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
_column += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
589
Parsing/LookAheadSet.cs
Normal file
589
Parsing/LookAheadSet.cs
Normal file
@@ -0,0 +1,589 @@
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/*
|
||||
* A token look-ahead set. This class contains a set of token id
|
||||
* sequences. All sequences in the set are limited in length, so
|
||||
* that no single sequence is longer than a maximum value. This
|
||||
* class also filters out duplicates. Each token sequence also
|
||||
* contains a repeat flag, allowing the look-ahead set to contain
|
||||
* information about possible infinite repetitions of certain
|
||||
* sequences. That information is important when conflicts arise
|
||||
* between two look-ahead sets, as such a conflict cannot be
|
||||
* resolved if the conflicting sequences can be repeated (would
|
||||
* cause infinite loop).
|
||||
*/
|
||||
internal class LookAheadSet
|
||||
{
|
||||
private readonly ArrayList _elements = new ArrayList();
|
||||
private readonly int _maxLength;
|
||||
|
||||
public LookAheadSet(int maxLength)
|
||||
{
|
||||
this._maxLength = maxLength;
|
||||
}
|
||||
|
||||
public LookAheadSet(int maxLength, LookAheadSet set)
|
||||
: this(maxLength)
|
||||
{
|
||||
|
||||
AddAll(set);
|
||||
}
|
||||
|
||||
public int Size()
|
||||
{
|
||||
return _elements.Count;
|
||||
}
|
||||
|
||||
public int GetMinLength()
|
||||
{
|
||||
int min = -1;
|
||||
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var seq = (Sequence)_elements[i];
|
||||
if (min < 0 || seq.Length() < min)
|
||||
{
|
||||
min = seq.Length();
|
||||
}
|
||||
}
|
||||
return (min < 0) ? 0 : min;
|
||||
}
|
||||
|
||||
public int GetMaxLength()
|
||||
{
|
||||
int max = 0;
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var seq = (Sequence)_elements[i];
|
||||
if (seq.Length() > max)
|
||||
{
|
||||
max = seq.Length();
|
||||
}
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
public int[] GetInitialTokens()
|
||||
{
|
||||
ArrayList list = new ArrayList();
|
||||
int i;
|
||||
for (i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var token = ((Sequence)_elements[i]).GetToken(0);
|
||||
if (token != null && !list.Contains(token))
|
||||
{
|
||||
list.Add(token);
|
||||
}
|
||||
}
|
||||
var result = new int[list.Count];
|
||||
for (i = 0; i < list.Count; i++)
|
||||
{
|
||||
result[i] = (int)list[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public bool IsRepetitive()
|
||||
{
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var seq = (Sequence)_elements[i];
|
||||
if (seq.IsRepetitive())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public bool IsNext(Parser parser)
|
||||
{
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var seq = (Sequence)_elements[i];
|
||||
if (seq.IsNext(parser))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public bool IsNext(Parser parser, int length)
|
||||
{
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var seq = (Sequence)_elements[i];
|
||||
if (seq.IsNext(parser, length))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public bool IsOverlap(LookAheadSet set)
|
||||
{
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
if (set.IsOverlap((Sequence)_elements[i]))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private bool IsOverlap(Sequence seq)
|
||||
{
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var elem = (Sequence)_elements[i];
|
||||
if (seq.StartsWith(elem) || elem.StartsWith(seq))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private bool Contains(Sequence elem)
|
||||
{
|
||||
return FindSequence(elem) != null;
|
||||
}
|
||||
|
||||
public bool Intersects(LookAheadSet set)
|
||||
{
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
if (set.Contains((Sequence)_elements[i]))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private Sequence FindSequence(Sequence elem)
|
||||
{
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
if (_elements[i].Equals(elem))
|
||||
{
|
||||
return (Sequence)_elements[i];
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private void Add(Sequence seq)
|
||||
{
|
||||
if (seq.Length() > _maxLength)
|
||||
{
|
||||
seq = new Sequence(_maxLength, seq);
|
||||
}
|
||||
if (!Contains(seq))
|
||||
{
|
||||
_elements.Add(seq);
|
||||
}
|
||||
}
|
||||
|
||||
public void Add(int token)
|
||||
{
|
||||
Add(new Sequence(false, token));
|
||||
}
|
||||
|
||||
public void AddAll(LookAheadSet set)
|
||||
{
|
||||
for (int i = 0; i < set._elements.Count; i++)
|
||||
{
|
||||
Add((Sequence)set._elements[i]);
|
||||
}
|
||||
}
|
||||
|
||||
public void AddEmpty()
|
||||
{
|
||||
Add(new Sequence());
|
||||
}
|
||||
|
||||
private void Remove(Sequence seq)
|
||||
{
|
||||
_elements.Remove(seq);
|
||||
}
|
||||
|
||||
public void RemoveAll(LookAheadSet set)
|
||||
{
|
||||
for (int i = 0; i < set._elements.Count; i++)
|
||||
{
|
||||
Remove((Sequence)set._elements[i]);
|
||||
}
|
||||
}
|
||||
|
||||
public LookAheadSet CreateNextSet(int token)
|
||||
{
|
||||
LookAheadSet result = new LookAheadSet(_maxLength - 1);
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var seq = (Sequence)_elements[i];
|
||||
var value = seq.GetToken(0);
|
||||
if (value != null && token == (int)value)
|
||||
{
|
||||
result.Add(seq.Subsequence(1));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public LookAheadSet CreateIntersection(LookAheadSet set)
|
||||
{
|
||||
LookAheadSet result = new LookAheadSet(_maxLength);
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var seq1 = (Sequence)_elements[i];
|
||||
var seq2 = set.FindSequence(seq1);
|
||||
if (seq2 != null && seq1.IsRepetitive())
|
||||
{
|
||||
result.Add(seq2);
|
||||
}
|
||||
else if (seq2 != null)
|
||||
{
|
||||
result.Add(seq1);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public LookAheadSet CreateCombination(LookAheadSet set)
|
||||
{
|
||||
LookAheadSet result = new LookAheadSet(_maxLength);
|
||||
|
||||
// Handle special cases
|
||||
if (this.Size() <= 0)
|
||||
{
|
||||
return set;
|
||||
}
|
||||
else if (set.Size() <= 0)
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
// Create combinations
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var first = (Sequence)_elements[i];
|
||||
if (first.Length() >= _maxLength)
|
||||
{
|
||||
result.Add(first);
|
||||
}
|
||||
else if (first.Length() <= 0)
|
||||
{
|
||||
result.AddAll(set);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int j = 0; j < set._elements.Count; j++)
|
||||
{
|
||||
var second = (Sequence)set._elements[j];
|
||||
result.Add(first.Concat(_maxLength, second));
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public LookAheadSet CreateOverlaps(LookAheadSet set)
|
||||
{
|
||||
LookAheadSet result = new LookAheadSet(_maxLength);
|
||||
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var seq = (Sequence)_elements[i];
|
||||
if (set.IsOverlap(seq))
|
||||
{
|
||||
result.Add(seq);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public LookAheadSet CreateFilter(LookAheadSet set)
|
||||
{
|
||||
LookAheadSet result = new LookAheadSet(_maxLength);
|
||||
|
||||
// Handle special cases
|
||||
if (this.Size() <= 0 || set.Size() <= 0)
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
// Create combinations
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var first = (Sequence)_elements[i];
|
||||
for (int j = 0; j < set._elements.Count; j++)
|
||||
{
|
||||
var second = (Sequence)set._elements[j];
|
||||
if (first.StartsWith(second))
|
||||
{
|
||||
result.Add(first.Subsequence(second.Length()));
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public LookAheadSet CreateRepetitive()
|
||||
{
|
||||
LookAheadSet result = new LookAheadSet(_maxLength);
|
||||
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var seq = (Sequence)_elements[i];
|
||||
if (seq.IsRepetitive())
|
||||
{
|
||||
result.Add(seq);
|
||||
}
|
||||
else
|
||||
{
|
||||
result.Add(new Sequence(true, seq));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return ToString(null);
|
||||
}
|
||||
|
||||
public string ToString(Tokenizer tokenizer)
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
buffer.Append("{");
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var seq = (Sequence)_elements[i];
|
||||
buffer.Append("\n ");
|
||||
buffer.Append(seq.ToString(tokenizer));
|
||||
}
|
||||
buffer.Append("\n}");
|
||||
return buffer.ToString();
|
||||
}
|
||||
|
||||
private class Sequence
|
||||
{
|
||||
private bool _repeat;
|
||||
private readonly ArrayList _tokens;
|
||||
|
||||
public Sequence()
|
||||
{
|
||||
this._repeat = false;
|
||||
this._tokens = new ArrayList(0);
|
||||
}
|
||||
|
||||
public Sequence(bool repeat, int token)
|
||||
{
|
||||
_repeat = false;
|
||||
_tokens = new ArrayList(1);
|
||||
_tokens.Add(token);
|
||||
}
|
||||
|
||||
public Sequence(int length, Sequence seq)
|
||||
{
|
||||
this._repeat = seq._repeat;
|
||||
this._tokens = new ArrayList(length);
|
||||
if (seq.Length() < length)
|
||||
{
|
||||
length = seq.Length();
|
||||
}
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
_tokens.Add(seq._tokens[i]);
|
||||
}
|
||||
}
|
||||
|
||||
public Sequence(bool repeat, Sequence seq)
|
||||
{
|
||||
this._repeat = repeat;
|
||||
this._tokens = seq._tokens;
|
||||
}
|
||||
|
||||
public int Length()
|
||||
{
|
||||
return _tokens.Count;
|
||||
}
|
||||
|
||||
public object GetToken(int pos)
|
||||
{
|
||||
if (pos >= 0 && pos < _tokens.Count)
|
||||
{
|
||||
return _tokens[pos];
|
||||
}
|
||||
else
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public override bool Equals(object obj)
|
||||
{
|
||||
if (obj is Sequence)
|
||||
{
|
||||
return Equals((Sequence)obj);
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public bool Equals(Sequence seq)
|
||||
{
|
||||
if (_tokens.Count != seq._tokens.Count)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < _tokens.Count; i++)
|
||||
{
|
||||
if (!_tokens[i].Equals(seq._tokens[i]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return _tokens.Count.GetHashCode();
|
||||
}
|
||||
|
||||
public bool StartsWith(Sequence seq)
|
||||
{
|
||||
if (Length() < seq.Length())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < seq._tokens.Count; i++)
|
||||
{
|
||||
if (!_tokens[i].Equals(seq._tokens[i]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public bool IsRepetitive()
|
||||
{
|
||||
return _repeat;
|
||||
}
|
||||
|
||||
public bool IsNext(Parser parser)
|
||||
{
|
||||
for (int i = 0; i < _tokens.Count; i++)
|
||||
{
|
||||
var id = (int)_tokens[i];
|
||||
var token = parser.PeekToken(i);
|
||||
if (token == null || token.Id != id)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public bool IsNext(Parser parser, int length)
|
||||
{
|
||||
if (length > _tokens.Count)
|
||||
{
|
||||
length = _tokens.Count;
|
||||
}
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
var id = (int)_tokens[i];
|
||||
var token = parser.PeekToken(i);
|
||||
if (token == null || token.Id != id)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return ToString(null);
|
||||
}
|
||||
|
||||
public string ToString(Tokenizer tokenizer)
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
if (tokenizer == null)
|
||||
{
|
||||
buffer.Append(_tokens.ToString());
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.Append("[");
|
||||
for (int i = 0; i < _tokens.Count; i++)
|
||||
{
|
||||
var id = (int)_tokens[i];
|
||||
var str = tokenizer.GetPatternDescription(id);
|
||||
if (i > 0)
|
||||
{
|
||||
buffer.Append(" ");
|
||||
}
|
||||
buffer.Append(str);
|
||||
}
|
||||
buffer.Append("]");
|
||||
}
|
||||
if (_repeat)
|
||||
{
|
||||
buffer.Append(" *");
|
||||
}
|
||||
return buffer.ToString();
|
||||
}
|
||||
|
||||
public Sequence Concat(int length, Sequence seq)
|
||||
{
|
||||
Sequence res = new Sequence(length, this);
|
||||
|
||||
if (seq._repeat)
|
||||
{
|
||||
res._repeat = true;
|
||||
}
|
||||
length -= this.Length();
|
||||
if (length > seq.Length())
|
||||
{
|
||||
res._tokens.AddRange(seq._tokens);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
res._tokens.Add(seq._tokens[i]);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
public Sequence Subsequence(int start)
|
||||
{
|
||||
Sequence res = new Sequence(Length(), this);
|
||||
|
||||
while (start > 0 && res._tokens.Count > 0)
|
||||
{
|
||||
res._tokens.RemoveAt(0);
|
||||
start--;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
107
Parsing/Matcher.cs
Normal file
107
Parsing/Matcher.cs
Normal file
@@ -0,0 +1,107 @@
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A regular expression string matcher. This class handles the
|
||||
* matching of a specific string with a specific regular
|
||||
* expression. It contains state information about the matching
|
||||
* process, as for example the position of the latest match, and a
|
||||
* number of flags that were set. This class is not thread-safe.
|
||||
*/
|
||||
internal class Matcher
|
||||
{
|
||||
private readonly Element _element;
|
||||
private ReaderBuffer _buffer;
|
||||
private readonly bool _ignoreCase;
|
||||
private int _start;
|
||||
private int _length;
|
||||
private bool _endOfString;
|
||||
|
||||
internal Matcher(Element e, ReaderBuffer buffer, bool ignoreCase)
|
||||
{
|
||||
this._element = e;
|
||||
this._buffer = buffer;
|
||||
this._ignoreCase = ignoreCase;
|
||||
this._start = 0;
|
||||
Reset();
|
||||
}
|
||||
|
||||
public bool IsCaseInsensitive()
|
||||
{
|
||||
return _ignoreCase;
|
||||
}
|
||||
|
||||
public void Reset()
|
||||
{
|
||||
_length = -1;
|
||||
_endOfString = false;
|
||||
}
|
||||
|
||||
public void Reset(string str)
|
||||
{
|
||||
Reset(new ReaderBuffer(new StringReader(str)));
|
||||
}
|
||||
|
||||
public void Reset(ReaderBuffer buffer)
|
||||
{
|
||||
this._buffer = buffer;
|
||||
Reset();
|
||||
}
|
||||
|
||||
public int Start()
|
||||
{
|
||||
return _start;
|
||||
}
|
||||
|
||||
public int End()
|
||||
{
|
||||
if (_length > 0)
|
||||
{
|
||||
return _start + _length;
|
||||
}
|
||||
else
|
||||
{
|
||||
return _start;
|
||||
}
|
||||
}
|
||||
|
||||
public int Length()
|
||||
{
|
||||
return _length;
|
||||
}
|
||||
|
||||
public bool HasReadEndOfString()
|
||||
{
|
||||
return _endOfString;
|
||||
}
|
||||
|
||||
public bool MatchFromBeginning()
|
||||
{
|
||||
return MatchFrom(0);
|
||||
}
|
||||
|
||||
public bool MatchFrom(int pos)
|
||||
{
|
||||
Reset();
|
||||
_start = pos;
|
||||
_length = _element.Match(this, _buffer, _start, 0);
|
||||
return _length >= 0;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
if (_length <= 0)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
else
|
||||
{
|
||||
return _buffer.Substring(_buffer.Position, _length);
|
||||
}
|
||||
}
|
||||
|
||||
internal void SetReadEndOfString()
|
||||
{
|
||||
_endOfString = true;
|
||||
}
|
||||
}
|
||||
}
|
240
Parsing/Node.cs
Normal file
240
Parsing/Node.cs
Normal file
@@ -0,0 +1,240 @@
|
||||
using System.Collections;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
|
||||
/**
|
||||
* An abstract parse tree node. This class is inherited by all
|
||||
* nodes in the parse tree, i.e. by the token and production
|
||||
* classes.
|
||||
*/
|
||||
internal abstract class Node
|
||||
{
|
||||
private Node _parent;
|
||||
private ArrayList _values;
|
||||
|
||||
internal virtual bool IsHidden()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
public abstract int Id
|
||||
{
|
||||
get;
|
||||
}
|
||||
|
||||
public virtual int GetId()
|
||||
{
|
||||
return Id;
|
||||
}
|
||||
|
||||
public abstract string Name
|
||||
{
|
||||
get;
|
||||
}
|
||||
|
||||
public virtual string GetName()
|
||||
{
|
||||
return Name;
|
||||
}
|
||||
|
||||
public virtual int StartLine
|
||||
{
|
||||
get
|
||||
{
|
||||
for (int i = 0; i < Count; i++)
|
||||
{
|
||||
var line = this[i].StartLine;
|
||||
if (line >= 0)
|
||||
{
|
||||
return line;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
public virtual int GetStartLine()
|
||||
{
|
||||
return StartLine;
|
||||
}
|
||||
|
||||
public virtual int StartColumn
|
||||
{
|
||||
get
|
||||
{
|
||||
for (int i = 0; i < Count; i++)
|
||||
{
|
||||
var col = this[i].StartColumn;
|
||||
if (col >= 0)
|
||||
{
|
||||
return col;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
public virtual int GetStartColumn()
|
||||
{
|
||||
return StartColumn;
|
||||
}
|
||||
|
||||
public virtual int EndLine
|
||||
{
|
||||
get
|
||||
{
|
||||
for (int i = Count - 1; i >= 0; i--)
|
||||
{
|
||||
var line = this[i].EndLine;
|
||||
if (line >= 0)
|
||||
{
|
||||
return line;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
public virtual int GetEndLine()
|
||||
{
|
||||
return EndLine;
|
||||
}
|
||||
|
||||
public virtual int EndColumn
|
||||
{
|
||||
get
|
||||
{
|
||||
int col;
|
||||
|
||||
for (int i = Count - 1; i >= 0; i--)
|
||||
{
|
||||
col = this[i].EndColumn;
|
||||
if (col >= 0)
|
||||
{
|
||||
return col;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
public virtual int GetEndColumn()
|
||||
{
|
||||
return EndColumn;
|
||||
}
|
||||
|
||||
public Node Parent => _parent;
|
||||
|
||||
public Node GetParent()
|
||||
{
|
||||
return Parent;
|
||||
}
|
||||
|
||||
internal void SetParent(Node parent)
|
||||
{
|
||||
this._parent = parent;
|
||||
}
|
||||
|
||||
public virtual int Count => 0;
|
||||
|
||||
public virtual int GetChildCount()
|
||||
{
|
||||
return Count;
|
||||
}
|
||||
|
||||
public int GetDescendantCount()
|
||||
{
|
||||
int count = 0;
|
||||
|
||||
for (int i = 0; i < Count; i++)
|
||||
{
|
||||
count += 1 + this[i].GetDescendantCount();
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
public virtual Node this[int index] => null;
|
||||
|
||||
public virtual Node GetChildAt(int index)
|
||||
{
|
||||
return this[index];
|
||||
}
|
||||
|
||||
public ArrayList Values
|
||||
{
|
||||
get
|
||||
{
|
||||
if (_values == null)
|
||||
{
|
||||
_values = new ArrayList();
|
||||
}
|
||||
return _values;
|
||||
}
|
||||
set
|
||||
{
|
||||
this._values = value;
|
||||
}
|
||||
}
|
||||
|
||||
public int GetValueCount()
|
||||
{
|
||||
if (_values == null)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
return _values.Count;
|
||||
}
|
||||
}
|
||||
|
||||
public object GetValue(int pos)
|
||||
{
|
||||
return Values[pos];
|
||||
}
|
||||
|
||||
public ArrayList GetAllValues()
|
||||
{
|
||||
return _values;
|
||||
}
|
||||
|
||||
|
||||
public void AddValue(object value)
|
||||
{
|
||||
if (value != null)
|
||||
{
|
||||
Values.Add(value);
|
||||
}
|
||||
}
|
||||
|
||||
public void AddValues(ArrayList values)
|
||||
{
|
||||
if (values != null)
|
||||
{
|
||||
Values.AddRange(values);
|
||||
}
|
||||
}
|
||||
|
||||
public void RemoveAllValues()
|
||||
{
|
||||
_values = null;
|
||||
}
|
||||
|
||||
public void PrintTo(TextWriter output)
|
||||
{
|
||||
PrintTo(output, "");
|
||||
output.Flush();
|
||||
}
|
||||
|
||||
private void PrintTo(TextWriter output, string indent)
|
||||
{
|
||||
output.WriteLine(indent + ToString());
|
||||
indent = indent + " ";
|
||||
for (int i = 0; i < Count; i++)
|
||||
{
|
||||
this[i].PrintTo(output, indent);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
250
Parsing/ParseException.cs
Normal file
250
Parsing/ParseException.cs
Normal file
@@ -0,0 +1,250 @@
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A parse exception.
|
||||
*/
|
||||
public class ParseException : Exception
|
||||
{
|
||||
public enum ErrorType
|
||||
{
|
||||
|
||||
/**
|
||||
* The internal error type is only used to signal an error
|
||||
* that is a result of a bug in the parser or tokenizer
|
||||
* code.
|
||||
*/
|
||||
INTERNAL,
|
||||
|
||||
/**
|
||||
* The I/O error type is used for stream I/O errors.
|
||||
*/
|
||||
IO,
|
||||
|
||||
/**
|
||||
* The unexpected end of file error type is used when end
|
||||
* of file is encountered instead of a valid token.
|
||||
*/
|
||||
UNEXPECTED_EOF,
|
||||
|
||||
/**
|
||||
* The unexpected character error type is used when a
|
||||
* character is read that isn't handled by one of the
|
||||
* token patterns.
|
||||
*/
|
||||
UNEXPECTED_CHAR,
|
||||
|
||||
/**
|
||||
* The unexpected token error type is used when another
|
||||
* token than the expected one is encountered.
|
||||
*/
|
||||
UNEXPECTED_TOKEN,
|
||||
|
||||
/**
|
||||
* The invalid token error type is used when a token
|
||||
* pattern with an error message is matched. The
|
||||
* additional information provided should contain the
|
||||
* error message.
|
||||
*/
|
||||
INVALID_TOKEN,
|
||||
|
||||
/**
|
||||
* The analysis error type is used when an error is
|
||||
* encountered in the analysis. The additional information
|
||||
* provided should contain the error message.
|
||||
*/
|
||||
ANALYSIS
|
||||
}
|
||||
|
||||
private readonly ErrorType _type;
|
||||
private readonly string _info;
|
||||
private readonly ArrayList _details;
|
||||
private readonly int _line;
|
||||
private readonly int _column;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new parse exception.
|
||||
/// </summary>
|
||||
/// <param name="type"></param>
|
||||
/// <param name="info"></param>
|
||||
/// <param name="line"></param>
|
||||
/// <param name="column"></param>
|
||||
public ParseException(ErrorType type,
|
||||
string info,
|
||||
int line,
|
||||
int column)
|
||||
: this(type, info, null, line, column)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new parse exception. This constructor is only
|
||||
/// used to supply the detailed information array, which is
|
||||
/// only used for expected token errors. The list then contains
|
||||
/// descriptions of the expected tokens.
|
||||
/// </summary>
|
||||
/// <param name="type"></param>
|
||||
/// <param name="info"></param>
|
||||
/// <param name="details"></param>
|
||||
/// <param name="line"></param>
|
||||
/// <param name="column"></param>
|
||||
public ParseException(ErrorType type,
|
||||
string info,
|
||||
ArrayList details,
|
||||
int line,
|
||||
int column)
|
||||
{
|
||||
|
||||
this._type = type;
|
||||
this._info = info;
|
||||
this._details = details;
|
||||
this._line = line;
|
||||
this._column = column;
|
||||
}
|
||||
|
||||
|
||||
public ErrorType Type => _type;
|
||||
|
||||
public ErrorType GetErrorType()
|
||||
{
|
||||
return Type;
|
||||
}
|
||||
|
||||
public string Info => _info;
|
||||
|
||||
public string GetInfo()
|
||||
{
|
||||
return Info;
|
||||
}
|
||||
|
||||
public ArrayList Details => new ArrayList(_details);
|
||||
|
||||
public ArrayList GetDetails()
|
||||
{
|
||||
return Details;
|
||||
}
|
||||
|
||||
public int Line => _line;
|
||||
|
||||
public int GetLine()
|
||||
{
|
||||
return Line;
|
||||
}
|
||||
|
||||
public int Column => _column;
|
||||
|
||||
public int GetColumn()
|
||||
{
|
||||
return _column;
|
||||
}
|
||||
|
||||
public override string Message
|
||||
{
|
||||
get
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
// Add error description
|
||||
buffer.Append(ErrorMessage);
|
||||
|
||||
// Add line and column
|
||||
if (_line > 0 && _column > 0)
|
||||
{
|
||||
buffer.Append(", on line: ");
|
||||
buffer.Append(_line);
|
||||
buffer.Append(" column: ");
|
||||
buffer.Append(_column);
|
||||
}
|
||||
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
public string GetMessage()
|
||||
{
|
||||
return Message;
|
||||
}
|
||||
|
||||
public string ErrorMessage
|
||||
{
|
||||
get
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
// Add type and info
|
||||
switch (_type)
|
||||
{
|
||||
case ErrorType.IO:
|
||||
buffer.Append("I/O error: ");
|
||||
buffer.Append(_info);
|
||||
break;
|
||||
case ErrorType.UNEXPECTED_EOF:
|
||||
buffer.Append("unexpected end of file");
|
||||
break;
|
||||
case ErrorType.UNEXPECTED_CHAR:
|
||||
buffer.Append("unexpected character '");
|
||||
buffer.Append(_info);
|
||||
buffer.Append("'");
|
||||
break;
|
||||
case ErrorType.UNEXPECTED_TOKEN:
|
||||
buffer.Append("unexpected token ");
|
||||
buffer.Append(_info);
|
||||
if (_details != null)
|
||||
{
|
||||
buffer.Append(", expected ");
|
||||
if (_details.Count > 1)
|
||||
{
|
||||
buffer.Append("one of ");
|
||||
}
|
||||
buffer.Append(GetMessageDetails());
|
||||
}
|
||||
break;
|
||||
case ErrorType.INVALID_TOKEN:
|
||||
buffer.Append(_info);
|
||||
break;
|
||||
case ErrorType.ANALYSIS:
|
||||
buffer.Append(_info);
|
||||
break;
|
||||
default:
|
||||
buffer.Append("internal error");
|
||||
if (_info != null)
|
||||
{
|
||||
buffer.Append(": ");
|
||||
buffer.Append(_info);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
public string GetErrorMessage()
|
||||
{
|
||||
return ErrorMessage;
|
||||
}
|
||||
|
||||
private string GetMessageDetails()
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < _details.Count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
buffer.Append(", ");
|
||||
if (i + 1 == _details.Count)
|
||||
{
|
||||
buffer.Append("or ");
|
||||
}
|
||||
}
|
||||
buffer.Append(_details[i]);
|
||||
}
|
||||
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
}
|
492
Parsing/Parser.cs
Normal file
492
Parsing/Parser.cs
Normal file
@@ -0,0 +1,492 @@
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
|
||||
[Obsolete(" A base parser class. This class provides the standard parser interface, as well as token handling.")]
|
||||
internal abstract class Parser
|
||||
{
|
||||
private bool _initialized;
|
||||
private readonly Tokenizer _tokenizer;
|
||||
private Analyzer _analyzer;
|
||||
private readonly ArrayList _patterns = new ArrayList();
|
||||
private readonly Hashtable _patternIds = new Hashtable();
|
||||
private readonly ArrayList _tokens = new ArrayList();
|
||||
private ParserLogException _errorLog = new ParserLogException();
|
||||
private int _errorRecovery = -1;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new parser.
|
||||
/// </summary>
|
||||
/// <param name="input"></param>
|
||||
internal Parser(TextReader input) : this(input, null)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new parser.
|
||||
/// </summary>
|
||||
/// <param name="input"></param>
|
||||
/// <param name="analyzer"></param>
|
||||
internal Parser(TextReader input, Analyzer analyzer)
|
||||
{
|
||||
_tokenizer = NewTokenizer(input);
|
||||
this._analyzer = analyzer ?? NewAnalyzer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new parser.
|
||||
*
|
||||
* @param tokenizer the tokenizer to use
|
||||
*/
|
||||
internal Parser(Tokenizer tokenizer) : this(tokenizer, null)
|
||||
{
|
||||
}
|
||||
|
||||
internal Parser(Tokenizer tokenizer, Analyzer analyzer)
|
||||
{
|
||||
this._tokenizer = tokenizer;
|
||||
this._analyzer = analyzer ?? NewAnalyzer();
|
||||
}
|
||||
|
||||
protected virtual Tokenizer NewTokenizer(TextReader input)
|
||||
{
|
||||
// TODO: This method should really be abstract, but it isn't in this
|
||||
// version due to backwards compatibility requirements.
|
||||
return new Tokenizer(input);
|
||||
}
|
||||
|
||||
protected virtual Analyzer NewAnalyzer()
|
||||
{
|
||||
// TODO: This method should really be abstract, but it isn't in this
|
||||
// version due to backwards compatibility requirements.
|
||||
return new Analyzer();
|
||||
}
|
||||
|
||||
public Tokenizer Tokenizer => _tokenizer;
|
||||
|
||||
public Analyzer Analyzer => _analyzer;
|
||||
|
||||
public Tokenizer GetTokenizer()
|
||||
{
|
||||
return Tokenizer;
|
||||
}
|
||||
|
||||
public Analyzer GetAnalyzer()
|
||||
{
|
||||
return Analyzer;
|
||||
}
|
||||
|
||||
internal void SetInitialized(bool initialized)
|
||||
{
|
||||
_initialized = initialized;
|
||||
}
|
||||
|
||||
public virtual void AddPattern(ProductionPattern pattern)
|
||||
{
|
||||
if (pattern.Count <= 0)
|
||||
{
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INVALID_PRODUCTION,
|
||||
pattern.Name,
|
||||
"no production alternatives are present (must have at " +
|
||||
"least one)");
|
||||
}
|
||||
if (_patternIds.ContainsKey(pattern.Id))
|
||||
{
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INVALID_PRODUCTION,
|
||||
pattern.Name,
|
||||
"another pattern with the same id (" + pattern.Id +
|
||||
") has already been added");
|
||||
}
|
||||
_patterns.Add(pattern);
|
||||
_patternIds.Add(pattern.Id, pattern);
|
||||
SetInitialized(false);
|
||||
}
|
||||
|
||||
public virtual void Prepare()
|
||||
{
|
||||
if (_patterns.Count <= 0)
|
||||
{
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INVALID_PARSER,
|
||||
"no production patterns have been added");
|
||||
}
|
||||
for (int i = 0; i < _patterns.Count; i++)
|
||||
{
|
||||
CheckPattern((ProductionPattern)_patterns[i]);
|
||||
}
|
||||
SetInitialized(true);
|
||||
}
|
||||
|
||||
private void CheckPattern(ProductionPattern pattern)
|
||||
{
|
||||
for (int i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
CheckAlternative(pattern.Name, pattern[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private void CheckAlternative(string name,
|
||||
ProductionPatternAlternative alt)
|
||||
{
|
||||
|
||||
for (int i = 0; i < alt.Count; i++)
|
||||
{
|
||||
CheckElement(name, alt[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void CheckElement(string name,
|
||||
ProductionPatternElement elem)
|
||||
{
|
||||
|
||||
if (elem.IsProduction() && GetPattern(elem.Id) == null)
|
||||
{
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INVALID_PRODUCTION,
|
||||
name,
|
||||
"an undefined production pattern id (" + elem.Id +
|
||||
") is referenced");
|
||||
}
|
||||
}
|
||||
|
||||
public void Reset(TextReader input)
|
||||
{
|
||||
this._tokenizer.Reset(input);
|
||||
this._analyzer.Reset();
|
||||
}
|
||||
|
||||
public void Reset(TextReader input, Analyzer analyzer)
|
||||
{
|
||||
this._tokenizer.Reset(input);
|
||||
this._analyzer = analyzer;
|
||||
}
|
||||
|
||||
public Node Parse()
|
||||
{
|
||||
Node root = null;
|
||||
|
||||
// Initialize parser
|
||||
if (!_initialized)
|
||||
{
|
||||
Prepare();
|
||||
}
|
||||
this._tokens.Clear();
|
||||
this._errorLog = new ParserLogException();
|
||||
this._errorRecovery = -1;
|
||||
|
||||
// Parse input
|
||||
try
|
||||
{
|
||||
root = ParseStart();
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
AddError(e, true);
|
||||
}
|
||||
|
||||
// Check for errors
|
||||
if (_errorLog.Count > 0)
|
||||
{
|
||||
throw _errorLog;
|
||||
}
|
||||
|
||||
return root;
|
||||
}
|
||||
|
||||
protected abstract Node ParseStart();
|
||||
|
||||
protected virtual Production NewProduction(ProductionPattern pattern)
|
||||
{
|
||||
return _analyzer.NewProduction(pattern);
|
||||
}
|
||||
|
||||
internal void AddError(ParseException e, bool recovery)
|
||||
{
|
||||
if (_errorRecovery <= 0)
|
||||
{
|
||||
_errorLog.AddError(e);
|
||||
}
|
||||
if (recovery)
|
||||
{
|
||||
_errorRecovery = 3;
|
||||
}
|
||||
}
|
||||
|
||||
internal ProductionPattern GetPattern(int id)
|
||||
{
|
||||
return (ProductionPattern)_patternIds[id];
|
||||
}
|
||||
|
||||
internal ProductionPattern GetStartPattern()
|
||||
{
|
||||
if (_patterns.Count <= 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
else
|
||||
{
|
||||
return (ProductionPattern)_patterns[0];
|
||||
}
|
||||
}
|
||||
|
||||
internal ICollection GetPatterns()
|
||||
{
|
||||
return _patterns;
|
||||
}
|
||||
|
||||
internal void EnterNode(Node node)
|
||||
{
|
||||
if (!node.IsHidden() && _errorRecovery < 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
_analyzer.Enter(node);
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
AddError(e, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal Node ExitNode(Node node)
|
||||
{
|
||||
if (!node.IsHidden() && _errorRecovery < 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return _analyzer.Exit(node);
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
AddError(e, false);
|
||||
}
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
internal void AddNode(Production node, Node child)
|
||||
{
|
||||
if (_errorRecovery >= 0)
|
||||
{
|
||||
// Do nothing
|
||||
}
|
||||
else if (node.IsHidden())
|
||||
{
|
||||
node.AddChild(child);
|
||||
}
|
||||
else if (child != null && child.IsHidden())
|
||||
{
|
||||
for (int i = 0; i < child.Count; i++)
|
||||
{
|
||||
AddNode(node, child[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
_analyzer.Child(node, child);
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
AddError(e, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal Token NextToken()
|
||||
{
|
||||
Token token = PeekToken(0);
|
||||
|
||||
if (token != null)
|
||||
{
|
||||
_tokens.RemoveAt(0);
|
||||
return token;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new ParseException(
|
||||
ParseException.ErrorType.UNEXPECTED_EOF,
|
||||
null,
|
||||
_tokenizer.GetCurrentLine(),
|
||||
_tokenizer.GetCurrentColumn());
|
||||
}
|
||||
}
|
||||
|
||||
internal Token NextToken(int id)
|
||||
{
|
||||
Token token = NextToken();
|
||||
|
||||
if (token.Id == id)
|
||||
{
|
||||
if (_errorRecovery > 0)
|
||||
{
|
||||
_errorRecovery--;
|
||||
}
|
||||
return token;
|
||||
}
|
||||
else
|
||||
{
|
||||
var list = new ArrayList(1) {_tokenizer.GetPatternDescription(id)};
|
||||
throw new ParseException(
|
||||
ParseException.ErrorType.UNEXPECTED_TOKEN,
|
||||
token.ToShortString(),
|
||||
list,
|
||||
token.StartLine,
|
||||
token.StartColumn);
|
||||
}
|
||||
}
|
||||
|
||||
internal Token PeekToken(int steps)
|
||||
{
|
||||
while (steps >= _tokens.Count)
|
||||
{
|
||||
try
|
||||
{
|
||||
var token = _tokenizer.Next();
|
||||
if (token == null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
else
|
||||
{
|
||||
_tokens.Add(token);
|
||||
}
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
AddError(e, true);
|
||||
}
|
||||
}
|
||||
return (Token)_tokens[steps];
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < _patterns.Count; i++)
|
||||
{
|
||||
buffer.Append(ToString((ProductionPattern)_patterns[i]));
|
||||
buffer.Append("\n");
|
||||
}
|
||||
return buffer.ToString();
|
||||
}
|
||||
|
||||
private string ToString(ProductionPattern prod)
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
StringBuilder indent = new StringBuilder();
|
||||
int i;
|
||||
|
||||
buffer.Append(prod.Name);
|
||||
buffer.Append(" (");
|
||||
buffer.Append(prod.Id);
|
||||
buffer.Append(") ");
|
||||
for (i = 0; i < buffer.Length; i++)
|
||||
{
|
||||
indent.Append(" ");
|
||||
}
|
||||
buffer.Append("= ");
|
||||
indent.Append("| ");
|
||||
for (i = 0; i < prod.Count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
buffer.Append(indent);
|
||||
}
|
||||
buffer.Append(ToString(prod[i]));
|
||||
buffer.Append("\n");
|
||||
}
|
||||
for (i = 0; i < prod.Count; i++)
|
||||
{
|
||||
var set = prod[i].LookAhead;
|
||||
if (set.GetMaxLength() > 1)
|
||||
{
|
||||
buffer.Append("Using ");
|
||||
buffer.Append(set.GetMaxLength());
|
||||
buffer.Append(" token look-ahead for alternative ");
|
||||
buffer.Append(i + 1);
|
||||
buffer.Append(": ");
|
||||
buffer.Append(set.ToString(_tokenizer));
|
||||
buffer.Append("\n");
|
||||
}
|
||||
}
|
||||
return buffer.ToString();
|
||||
}
|
||||
|
||||
private string ToString(ProductionPatternAlternative alt)
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < alt.Count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
buffer.Append(" ");
|
||||
}
|
||||
buffer.Append(ToString(alt[i]));
|
||||
}
|
||||
return buffer.ToString();
|
||||
}
|
||||
|
||||
private string ToString(ProductionPatternElement elem)
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
int min = elem.MinCount;
|
||||
int max = elem.MaxCount;
|
||||
|
||||
if (min == 0 && max == 1)
|
||||
{
|
||||
buffer.Append("[");
|
||||
}
|
||||
if (elem.IsToken())
|
||||
{
|
||||
buffer.Append(GetTokenDescription(elem.Id));
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.Append(GetPattern(elem.Id).Name);
|
||||
}
|
||||
if (min == 0 && max == 1)
|
||||
{
|
||||
buffer.Append("]");
|
||||
}
|
||||
else if (min == 0 && max == Int32.MaxValue)
|
||||
{
|
||||
buffer.Append("*");
|
||||
}
|
||||
else if (min == 1 && max == Int32.MaxValue)
|
||||
{
|
||||
buffer.Append("+");
|
||||
}
|
||||
else if (min != 1 || max != 1)
|
||||
{
|
||||
buffer.Append("{");
|
||||
buffer.Append(min);
|
||||
buffer.Append(",");
|
||||
buffer.Append(max);
|
||||
buffer.Append("}");
|
||||
}
|
||||
return buffer.ToString();
|
||||
}
|
||||
|
||||
internal string GetTokenDescription(int token)
|
||||
{
|
||||
if (_tokenizer == null)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
else
|
||||
{
|
||||
return _tokenizer.GetPatternDescription(token);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
216
Parsing/ParserCreationException.cs
Normal file
216
Parsing/ParserCreationException.cs
Normal file
@@ -0,0 +1,216 @@
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A parser creation exception. This exception is used for signalling
|
||||
* an error in the token or production patterns, making it impossible
|
||||
* to create a working parser or tokenizer.
|
||||
*/
|
||||
internal class ParserCreationException : Exception
|
||||
{
|
||||
|
||||
/**
|
||||
* The error type enumeration.
|
||||
*/
|
||||
public enum ErrorType
|
||||
{
|
||||
|
||||
/**
|
||||
* The internal error type is only used to signal an
|
||||
* error that is a result of a bug in the parser or
|
||||
* tokenizer code.
|
||||
*/
|
||||
INTERNAL,
|
||||
|
||||
/**
|
||||
* The invalid parser error type is used when the parser
|
||||
* as such is invalid. This error is typically caused by
|
||||
* using a parser without any patterns.
|
||||
*/
|
||||
INVALID_PARSER,
|
||||
|
||||
/**
|
||||
* The invalid token error type is used when a token
|
||||
* pattern is erroneous. This error is typically caused
|
||||
* by an invalid pattern type or an erroneous regular
|
||||
* expression.
|
||||
*/
|
||||
INVALID_TOKEN,
|
||||
|
||||
/**
|
||||
* The invalid production error type is used when a
|
||||
* production pattern is erroneous. This error is
|
||||
* typically caused by referencing undeclared productions,
|
||||
* or violating some other production pattern constraint.
|
||||
*/
|
||||
INVALID_PRODUCTION,
|
||||
|
||||
/**
|
||||
* The infinite loop error type is used when an infinite
|
||||
* loop has been detected in the grammar. One of the
|
||||
* productions in the loop will be reported.
|
||||
*/
|
||||
INFINITE_LOOP,
|
||||
|
||||
/**
|
||||
* The inherent ambiguity error type is used when the set
|
||||
* of production patterns (i.e. the grammar) contains
|
||||
* ambiguities that cannot be resolved.
|
||||
*/
|
||||
INHERENT_AMBIGUITY
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
private readonly ErrorType _type;
|
||||
private readonly string _name;
|
||||
private readonly string _info;
|
||||
private readonly ArrayList _details;
|
||||
|
||||
public ParserCreationException(ErrorType type,
|
||||
String info)
|
||||
: this(type, null, info)
|
||||
{
|
||||
}
|
||||
|
||||
public ParserCreationException(ErrorType type,
|
||||
String name,
|
||||
String info)
|
||||
: this(type, name, info, null)
|
||||
{
|
||||
}
|
||||
|
||||
public ParserCreationException(ErrorType type,
|
||||
String name,
|
||||
String info,
|
||||
ArrayList details)
|
||||
{
|
||||
|
||||
this._type = type;
|
||||
this._name = name;
|
||||
this._info = info;
|
||||
this._details = details;
|
||||
}
|
||||
|
||||
public ErrorType Type => _type;
|
||||
|
||||
public ErrorType GetErrorType()
|
||||
{
|
||||
return Type;
|
||||
}
|
||||
|
||||
public string Name => _name;
|
||||
|
||||
public string GetName()
|
||||
{
|
||||
return Name;
|
||||
}
|
||||
|
||||
public string Info => _info;
|
||||
|
||||
public string GetInfo()
|
||||
{
|
||||
return Info;
|
||||
}
|
||||
|
||||
public string Details
|
||||
{
|
||||
get
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
if (_details == null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
for (int i = 0; i < _details.Count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
buffer.Append(", ");
|
||||
if (i + 1 == _details.Count)
|
||||
{
|
||||
buffer.Append("and ");
|
||||
}
|
||||
}
|
||||
buffer.Append(_details[i]);
|
||||
}
|
||||
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
public string GetDetails()
|
||||
{
|
||||
return Details;
|
||||
}
|
||||
|
||||
public override string Message
|
||||
{
|
||||
get
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
switch (_type)
|
||||
{
|
||||
case ErrorType.INVALID_PARSER:
|
||||
buffer.Append("parser is invalid, as ");
|
||||
buffer.Append(_info);
|
||||
break;
|
||||
case ErrorType.INVALID_TOKEN:
|
||||
buffer.Append("token '");
|
||||
buffer.Append(_name);
|
||||
buffer.Append("' is invalid, as ");
|
||||
buffer.Append(_info);
|
||||
break;
|
||||
case ErrorType.INVALID_PRODUCTION:
|
||||
buffer.Append("production '");
|
||||
buffer.Append(_name);
|
||||
buffer.Append("' is invalid, as ");
|
||||
buffer.Append(_info);
|
||||
break;
|
||||
case ErrorType.INFINITE_LOOP:
|
||||
buffer.Append("infinite loop found in production pattern '");
|
||||
buffer.Append(_name);
|
||||
buffer.Append("'");
|
||||
break;
|
||||
case ErrorType.INHERENT_AMBIGUITY:
|
||||
buffer.Append("inherent ambiguity in production '");
|
||||
buffer.Append(_name);
|
||||
buffer.Append("'");
|
||||
if (_info != null)
|
||||
{
|
||||
buffer.Append(" ");
|
||||
buffer.Append(_info);
|
||||
}
|
||||
if (_details != null)
|
||||
{
|
||||
buffer.Append(" starting with ");
|
||||
if (_details.Count > 1)
|
||||
{
|
||||
buffer.Append("tokens ");
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.Append("token ");
|
||||
}
|
||||
buffer.Append(Details);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
buffer.Append("internal error");
|
||||
break;
|
||||
}
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
public string GetMessage()
|
||||
{
|
||||
return Message;
|
||||
}
|
||||
}
|
||||
}
|
55
Parsing/ParserLogException.cs
Normal file
55
Parsing/ParserLogException.cs
Normal file
@@ -0,0 +1,55 @@
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
internal class ParserLogException : Exception
|
||||
{
|
||||
private readonly ArrayList _errors = new ArrayList();
|
||||
public ParserLogException()
|
||||
{
|
||||
}
|
||||
public override string Message
|
||||
{
|
||||
get
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < Count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
buffer.Append("\n");
|
||||
}
|
||||
buffer.Append(this[i].Message);
|
||||
}
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
public int Count => _errors.Count;
|
||||
|
||||
|
||||
public int GetErrorCount()
|
||||
{
|
||||
return Count;
|
||||
}
|
||||
|
||||
public ParseException this[int index] => (ParseException)_errors[index];
|
||||
|
||||
public ParseException GetError(int index)
|
||||
{
|
||||
return this[index];
|
||||
}
|
||||
|
||||
public void AddError(ParseException e)
|
||||
{
|
||||
_errors.Add(e);
|
||||
}
|
||||
|
||||
public string GetMessage()
|
||||
{
|
||||
return Message;
|
||||
}
|
||||
}
|
||||
}
|
70
Parsing/Production.cs
Normal file
70
Parsing/Production.cs
Normal file
@@ -0,0 +1,70 @@
|
||||
using System.Collections;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
|
||||
/**
|
||||
* A production node. This class represents a grammar production
|
||||
* (i.e. a list of child nodes) in a parse tree. The productions
|
||||
* are created by a parser, that adds children a according to a
|
||||
* set of production patterns (i.e. grammar rules).
|
||||
*/
|
||||
internal class Production : Node
|
||||
{
|
||||
private readonly ProductionPattern _pattern;
|
||||
private readonly ArrayList _children;
|
||||
|
||||
public Production(ProductionPattern pattern)
|
||||
{
|
||||
this._pattern = pattern;
|
||||
this._children = new ArrayList();
|
||||
}
|
||||
|
||||
public override int Id => _pattern.Id;
|
||||
|
||||
public override string Name => _pattern.Name;
|
||||
|
||||
public override int Count => _children.Count;
|
||||
|
||||
public override Node this[int index]
|
||||
{
|
||||
get
|
||||
{
|
||||
if (index < 0 || index >= _children.Count)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
else
|
||||
{
|
||||
return (Node)_children[index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void AddChild(Node child)
|
||||
{
|
||||
if (child != null)
|
||||
{
|
||||
child.SetParent(this);
|
||||
_children.Add(child);
|
||||
}
|
||||
}
|
||||
|
||||
public ProductionPattern Pattern => _pattern;
|
||||
|
||||
public ProductionPattern GetPattern()
|
||||
{
|
||||
return Pattern;
|
||||
}
|
||||
|
||||
internal override bool IsHidden()
|
||||
{
|
||||
return _pattern.Synthetic;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return _pattern.Name + '(' + _pattern.Id + ')';
|
||||
}
|
||||
}
|
||||
}
|
213
Parsing/ProductionPattern.cs
Normal file
213
Parsing/ProductionPattern.cs
Normal file
@@ -0,0 +1,213 @@
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
|
||||
/**
|
||||
* A production pattern. This class represents a set of production
|
||||
* alternatives that together forms a single production. A
|
||||
* production pattern is identified by an integer id and a name,
|
||||
* both provided upon creation. The pattern id is used for
|
||||
* referencing the production pattern from production pattern
|
||||
* elements.
|
||||
*/
|
||||
internal class ProductionPattern
|
||||
{
|
||||
|
||||
private readonly int _id;
|
||||
private readonly string _name;
|
||||
private bool _synthetic;
|
||||
private readonly ArrayList _alternatives;
|
||||
private int _defaultAlt;
|
||||
private LookAheadSet _lookAhead;
|
||||
|
||||
public ProductionPattern(int id, string name)
|
||||
{
|
||||
this._id = id;
|
||||
this._name = name;
|
||||
this._synthetic = false;
|
||||
this._alternatives = new ArrayList();
|
||||
this._defaultAlt = -1;
|
||||
this._lookAhead = null;
|
||||
}
|
||||
public int Id => _id;
|
||||
|
||||
public int GetId()
|
||||
{
|
||||
return Id;
|
||||
}
|
||||
|
||||
public string Name => _name;
|
||||
|
||||
public string GetName()
|
||||
{
|
||||
return Name;
|
||||
}
|
||||
|
||||
public bool Synthetic
|
||||
{
|
||||
get
|
||||
{
|
||||
return _synthetic;
|
||||
}
|
||||
set
|
||||
{
|
||||
_synthetic = value;
|
||||
}
|
||||
}
|
||||
|
||||
public bool IsSyntetic()
|
||||
{
|
||||
return Synthetic;
|
||||
}
|
||||
|
||||
public void SetSyntetic(bool synthetic)
|
||||
{
|
||||
Synthetic = synthetic;
|
||||
}
|
||||
|
||||
internal LookAheadSet LookAhead
|
||||
{
|
||||
get
|
||||
{
|
||||
return _lookAhead;
|
||||
}
|
||||
set
|
||||
{
|
||||
_lookAhead = value;
|
||||
}
|
||||
}
|
||||
|
||||
internal ProductionPatternAlternative DefaultAlternative
|
||||
{
|
||||
get
|
||||
{
|
||||
if (_defaultAlt >= 0)
|
||||
{
|
||||
object obj = _alternatives[_defaultAlt];
|
||||
return (ProductionPatternAlternative)obj;
|
||||
}
|
||||
else
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
set
|
||||
{
|
||||
_defaultAlt = 0;
|
||||
for (int i = 0; i < _alternatives.Count; i++)
|
||||
{
|
||||
if (_alternatives[i] == value)
|
||||
{
|
||||
_defaultAlt = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int Count => _alternatives.Count;
|
||||
|
||||
public int GetAlternativeCount()
|
||||
{
|
||||
return Count;
|
||||
}
|
||||
|
||||
public ProductionPatternAlternative this[int index] => (ProductionPatternAlternative)_alternatives[index];
|
||||
|
||||
public ProductionPatternAlternative GetAlternative(int pos)
|
||||
{
|
||||
return this[pos];
|
||||
}
|
||||
|
||||
public bool IsLeftRecursive()
|
||||
{
|
||||
ProductionPatternAlternative alt;
|
||||
|
||||
for (int i = 0; i < _alternatives.Count; i++)
|
||||
{
|
||||
alt = (ProductionPatternAlternative)_alternatives[i];
|
||||
if (alt.IsLeftRecursive())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public bool IsRightRecursive()
|
||||
{
|
||||
ProductionPatternAlternative alt;
|
||||
|
||||
for (int i = 0; i < _alternatives.Count; i++)
|
||||
{
|
||||
alt = (ProductionPatternAlternative)_alternatives[i];
|
||||
if (alt.IsRightRecursive())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public bool IsMatchingEmpty()
|
||||
{
|
||||
ProductionPatternAlternative alt;
|
||||
|
||||
for (int i = 0; i < _alternatives.Count; i++)
|
||||
{
|
||||
alt = (ProductionPatternAlternative)_alternatives[i];
|
||||
if (alt.IsMatchingEmpty())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public void AddAlternative(ProductionPatternAlternative alt)
|
||||
{
|
||||
if (_alternatives.Contains(alt))
|
||||
{
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INVALID_PRODUCTION,
|
||||
_name,
|
||||
"two identical alternatives exist");
|
||||
}
|
||||
alt.SetPattern(this);
|
||||
_alternatives.Add(alt);
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
StringBuilder indent = new StringBuilder();
|
||||
int i;
|
||||
|
||||
buffer.Append(_name);
|
||||
buffer.Append("(");
|
||||
buffer.Append(_id);
|
||||
buffer.Append(") ");
|
||||
for (i = 0; i < buffer.Length; i++)
|
||||
{
|
||||
indent.Append(" ");
|
||||
}
|
||||
for (i = 0; i < _alternatives.Count; i++)
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
buffer.Append("= ");
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.Append("\n");
|
||||
buffer.Append(indent);
|
||||
buffer.Append("| ");
|
||||
}
|
||||
buffer.Append(_alternatives[i]);
|
||||
}
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
}
|
211
Parsing/ProductionPatternAlternative.cs
Normal file
211
Parsing/ProductionPatternAlternative.cs
Normal file
@@ -0,0 +1,211 @@
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
|
||||
/**
|
||||
* A production pattern alternative. This class represents a list of
|
||||
* production pattern elements. In order to provide productions that
|
||||
* cannot be represented with the element occurance counters, multiple
|
||||
* alternatives must be created and added to the same production
|
||||
* pattern. A production pattern alternative is always contained
|
||||
* within a production pattern.
|
||||
*/
|
||||
internal class ProductionPatternAlternative
|
||||
{
|
||||
private ProductionPattern _pattern;
|
||||
private readonly ArrayList _elements = new ArrayList();
|
||||
private LookAheadSet _lookAhead = null;
|
||||
|
||||
public ProductionPatternAlternative()
|
||||
{
|
||||
}
|
||||
|
||||
public ProductionPattern Pattern => _pattern;
|
||||
|
||||
public ProductionPattern GetPattern()
|
||||
{
|
||||
return Pattern;
|
||||
}
|
||||
|
||||
internal LookAheadSet LookAhead
|
||||
{
|
||||
get
|
||||
{
|
||||
return _lookAhead;
|
||||
}
|
||||
set
|
||||
{
|
||||
_lookAhead = value;
|
||||
}
|
||||
}
|
||||
|
||||
public int Count => _elements.Count;
|
||||
|
||||
public int GetElementCount()
|
||||
{
|
||||
return Count;
|
||||
}
|
||||
|
||||
public ProductionPatternElement this[int index] => (ProductionPatternElement)_elements[index];
|
||||
|
||||
public ProductionPatternElement GetElement(int pos)
|
||||
{
|
||||
return this[pos];
|
||||
}
|
||||
|
||||
public bool IsLeftRecursive()
|
||||
{
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var elem = (ProductionPatternElement)_elements[i];
|
||||
if (elem.Id == _pattern.Id)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (elem.MinCount > 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public bool IsRightRecursive()
|
||||
{
|
||||
for (int i = _elements.Count - 1; i >= 0; i--)
|
||||
{
|
||||
var elem = (ProductionPatternElement)_elements[i];
|
||||
if (elem.Id == _pattern.Id)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (elem.MinCount > 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public bool IsMatchingEmpty()
|
||||
{
|
||||
return GetMinElementCount() == 0;
|
||||
}
|
||||
|
||||
internal void SetPattern(ProductionPattern pattern)
|
||||
{
|
||||
this._pattern = pattern;
|
||||
}
|
||||
|
||||
public int GetMinElementCount()
|
||||
{
|
||||
int min = 0;
|
||||
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var elem = (ProductionPatternElement)_elements[i];
|
||||
min += elem.MinCount;
|
||||
}
|
||||
return min;
|
||||
}
|
||||
|
||||
public int GetMaxElementCount()
|
||||
{
|
||||
int max = 0;
|
||||
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
var elem = (ProductionPatternElement)_elements[i];
|
||||
if (elem.MaxCount >= Int32.MaxValue)
|
||||
{
|
||||
return Int32.MaxValue;
|
||||
}
|
||||
else
|
||||
{
|
||||
max += elem.MaxCount;
|
||||
}
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
public void AddToken(int id, int min, int max)
|
||||
{
|
||||
AddElement(new ProductionPatternElement(true, id, min, max));
|
||||
}
|
||||
|
||||
public void AddProduction(int id, int min, int max)
|
||||
{
|
||||
AddElement(new ProductionPatternElement(false, id, min, max));
|
||||
}
|
||||
|
||||
public void AddElement(ProductionPatternElement elem)
|
||||
{
|
||||
_elements.Add(elem);
|
||||
}
|
||||
|
||||
public void AddElement(ProductionPatternElement elem,
|
||||
int min,
|
||||
int max)
|
||||
{
|
||||
|
||||
if (elem.IsToken())
|
||||
{
|
||||
AddToken(elem.Id, min, max);
|
||||
}
|
||||
else
|
||||
{
|
||||
AddProduction(elem.Id, min, max);
|
||||
}
|
||||
}
|
||||
|
||||
public override bool Equals(object obj)
|
||||
{
|
||||
if (obj is ProductionPatternAlternative)
|
||||
{
|
||||
return Equals((ProductionPatternAlternative)obj);
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public bool Equals(ProductionPatternAlternative alt)
|
||||
{
|
||||
if (_elements.Count != alt._elements.Count)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
if (!_elements[i].Equals(alt._elements[i]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return _elements.Count.GetHashCode();
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < _elements.Count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
buffer.Append(" ");
|
||||
}
|
||||
buffer.Append(_elements[i]);
|
||||
}
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
}
|
138
Parsing/ProductionPatternElement.cs
Normal file
138
Parsing/ProductionPatternElement.cs
Normal file
@@ -0,0 +1,138 @@
|
||||
using System.Text;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A production pattern element. This class represents a reference to
|
||||
* either a token or a production. Each element also contains minimum
|
||||
* and maximum occurence counters, controlling the number of
|
||||
* repetitions allowed. A production pattern element is always
|
||||
* contained within a production pattern rule.
|
||||
*/
|
||||
internal class ProductionPatternElement
|
||||
{
|
||||
private readonly bool _token;
|
||||
private readonly int _id;
|
||||
private readonly int _min;
|
||||
private readonly int _max;
|
||||
private LookAheadSet _lookAhead;
|
||||
|
||||
public ProductionPatternElement(bool isToken,
|
||||
int id,
|
||||
int min,
|
||||
int max)
|
||||
{
|
||||
|
||||
this._token = isToken;
|
||||
this._id = id;
|
||||
if (min < 0)
|
||||
{
|
||||
min = 0;
|
||||
}
|
||||
this._min = min;
|
||||
if (max <= 0)
|
||||
{
|
||||
max = Int32.MaxValue;
|
||||
}
|
||||
else if (max < min)
|
||||
{
|
||||
max = min;
|
||||
}
|
||||
this._max = max;
|
||||
this._lookAhead = null;
|
||||
}
|
||||
|
||||
public int Id => _id;
|
||||
|
||||
public int GetId()
|
||||
{
|
||||
return Id;
|
||||
}
|
||||
|
||||
public int MinCount => _min;
|
||||
|
||||
public int GetMinCount()
|
||||
{
|
||||
return MinCount;
|
||||
}
|
||||
|
||||
public int MaxCount => _max;
|
||||
|
||||
public int GetMaxCount()
|
||||
{
|
||||
return MaxCount;
|
||||
}
|
||||
|
||||
internal LookAheadSet LookAhead
|
||||
{
|
||||
get
|
||||
{
|
||||
return _lookAhead;
|
||||
}
|
||||
set
|
||||
{
|
||||
_lookAhead = value;
|
||||
}
|
||||
}
|
||||
|
||||
public bool IsToken()
|
||||
{
|
||||
return _token;
|
||||
}
|
||||
|
||||
public bool IsProduction()
|
||||
{
|
||||
return !_token;
|
||||
}
|
||||
|
||||
public bool IsMatch(Token token)
|
||||
{
|
||||
return IsToken() && token != null && token.Id == _id;
|
||||
}
|
||||
|
||||
public override bool Equals(object obj)
|
||||
{
|
||||
if (obj is ProductionPatternElement)
|
||||
{
|
||||
var elem = (ProductionPatternElement)obj;
|
||||
return this._token == elem._token
|
||||
&& this._id == elem._id
|
||||
&& this._min == elem._min
|
||||
&& this._max == elem._max;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return this._id * 37;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
buffer.Append(_id);
|
||||
if (_token)
|
||||
{
|
||||
buffer.Append("(Token)");
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.Append("(Production)");
|
||||
}
|
||||
if (_min != 1 || _max != 1)
|
||||
{
|
||||
buffer.Append("{");
|
||||
buffer.Append(_min);
|
||||
buffer.Append(",");
|
||||
buffer.Append(_max);
|
||||
buffer.Append("}");
|
||||
}
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
}
|
180
Parsing/ReaderBuffer.cs
Normal file
180
Parsing/ReaderBuffer.cs
Normal file
@@ -0,0 +1,180 @@
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A character buffer that automatically reads from an input source
|
||||
* stream when needed. This class keeps track of the current position
|
||||
* in the buffer and its line and column number in the original input
|
||||
* source. It allows unlimited look-ahead of characters in the input,
|
||||
* reading and buffering the required data internally. As the
|
||||
* position is advanced, the buffer content prior to the current
|
||||
* position is subject to removal to make space for reading new
|
||||
* content. A few characters before the current position are always
|
||||
* kept to enable boundary condition checks.
|
||||
*/
|
||||
internal class ReaderBuffer
|
||||
{
|
||||
public const int BlockSize = 1024;
|
||||
private char[] _buffer = new char[BlockSize * 4];
|
||||
private int _pos = 0;
|
||||
private int _length = 0;
|
||||
private TextReader _input;
|
||||
private int _line = 1;
|
||||
private int _column = 1;
|
||||
|
||||
public ReaderBuffer(TextReader input)
|
||||
{
|
||||
this._input = input;
|
||||
}
|
||||
public void Dispose()
|
||||
{
|
||||
_buffer = null;
|
||||
_pos = 0;
|
||||
_length = 0;
|
||||
if (_input != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
_input.Close();
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
// Do nothing
|
||||
}
|
||||
_input = null;
|
||||
}
|
||||
}
|
||||
|
||||
public int Position => _pos;
|
||||
public int LineNumber => _line;
|
||||
public int ColumnNumber => _column;
|
||||
public int Length => _length;
|
||||
|
||||
public string Substring(int index, int length)
|
||||
{
|
||||
return new string(_buffer, index, length);
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return new string(_buffer, 0, _length);
|
||||
}
|
||||
|
||||
public int Peek(int offset)
|
||||
{
|
||||
int index = _pos + offset;
|
||||
|
||||
// Avoid most calls to EnsureBuffered(), since we are in a
|
||||
// performance hotspot here. This check is not exhaustive,
|
||||
// but only present here to speed things up.
|
||||
if (index >= _length)
|
||||
{
|
||||
EnsureBuffered(offset + 1);
|
||||
index = _pos + offset;
|
||||
}
|
||||
return (index >= _length) ? -1 : _buffer[index];
|
||||
}
|
||||
|
||||
public string Read(int offset)
|
||||
{
|
||||
EnsureBuffered(offset + 1);
|
||||
if (_pos >= _length)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
else
|
||||
{
|
||||
var count = _length - _pos;
|
||||
if (count > offset)
|
||||
{
|
||||
count = offset;
|
||||
}
|
||||
UpdateLineColumnNumbers(count);
|
||||
var result = new string(_buffer, _pos, count);
|
||||
_pos += count;
|
||||
if (_input == null && _pos >= _length)
|
||||
{
|
||||
Dispose();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
private void UpdateLineColumnNumbers(int offset)
|
||||
{
|
||||
for (int i = 0; i < offset; i++)
|
||||
{
|
||||
if (_buffer[_pos + i] == '\n')
|
||||
{
|
||||
_line++;
|
||||
_column = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
_column++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void EnsureBuffered(int offset)
|
||||
{
|
||||
// Check for end of stream or already read characters
|
||||
if (_input == null || _pos + offset < _length)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Remove (almost all) old characters from buffer
|
||||
if (_pos > BlockSize)
|
||||
{
|
||||
_length -= (_pos - 16);
|
||||
Array.Copy(_buffer, _pos - 16, _buffer, 0, _length);
|
||||
_pos = 16;
|
||||
}
|
||||
|
||||
// Calculate number of characters to read
|
||||
var size = _pos + offset - _length + 1;
|
||||
if (size % BlockSize != 0)
|
||||
{
|
||||
size = (1 + size / BlockSize) * BlockSize;
|
||||
}
|
||||
EnsureCapacity(_length + size);
|
||||
|
||||
// Read characters
|
||||
try
|
||||
{
|
||||
while (_input != null && size > 0)
|
||||
{
|
||||
var readSize = _input.Read(_buffer, _length, size);
|
||||
if (readSize > 0)
|
||||
{
|
||||
_length += readSize;
|
||||
size -= readSize;
|
||||
}
|
||||
else
|
||||
{
|
||||
_input.Close();
|
||||
_input = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
_input = null;
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
private void EnsureCapacity(int size)
|
||||
{
|
||||
if (_buffer.Length >= size)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (size % BlockSize != 0)
|
||||
{
|
||||
size = (1 + size / BlockSize) * BlockSize;
|
||||
}
|
||||
Array.Resize(ref _buffer, size);
|
||||
}
|
||||
}
|
||||
}
|
648
Parsing/RecursiveDescentParser.cs
Normal file
648
Parsing/RecursiveDescentParser.cs
Normal file
@@ -0,0 +1,648 @@
|
||||
using System.Collections;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A recursive descent parser. This parser handles LL(n) grammars,
|
||||
* selecting the appropriate pattern to parse based on the next few
|
||||
* tokens. The parser is more efficient the fewer look-ahead tokens
|
||||
* that is has to consider.
|
||||
*/
|
||||
internal class RecursiveDescentParser : Parser
|
||||
{
|
||||
private int _stackdepth = 0;
|
||||
|
||||
public RecursiveDescentParser(TextReader input) : base(input)
|
||||
{
|
||||
}
|
||||
|
||||
public RecursiveDescentParser(TextReader input, Analyzer analyzer)
|
||||
: base(input, analyzer)
|
||||
{
|
||||
}
|
||||
|
||||
public RecursiveDescentParser(Tokenizer tokenizer)
|
||||
: base(tokenizer)
|
||||
{
|
||||
}
|
||||
|
||||
public RecursiveDescentParser(Tokenizer tokenizer,
|
||||
Analyzer analyzer)
|
||||
: base(tokenizer, analyzer)
|
||||
{
|
||||
}
|
||||
|
||||
public override void AddPattern(ProductionPattern pattern)
|
||||
{
|
||||
|
||||
// Check for empty matches
|
||||
if (pattern.IsMatchingEmpty())
|
||||
{
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INVALID_PRODUCTION,
|
||||
pattern.Name,
|
||||
"zero elements can be matched (minimum is one)");
|
||||
}
|
||||
|
||||
// Check for left-recusive patterns
|
||||
if (pattern.IsLeftRecursive())
|
||||
{
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INVALID_PRODUCTION,
|
||||
pattern.Name,
|
||||
"left recursive patterns are not allowed");
|
||||
}
|
||||
|
||||
// Add pattern
|
||||
base.AddPattern(pattern);
|
||||
}
|
||||
|
||||
public override void Prepare()
|
||||
{
|
||||
// Performs production pattern checks
|
||||
base.Prepare();
|
||||
SetInitialized(false);
|
||||
|
||||
// Calculate production look-ahead sets
|
||||
var e = GetPatterns().GetEnumerator();
|
||||
while (e.MoveNext())
|
||||
{
|
||||
CalculateLookAhead((ProductionPattern)e.Current);
|
||||
}
|
||||
|
||||
// Set initialized flag
|
||||
SetInitialized(true);
|
||||
}
|
||||
|
||||
protected override Node ParseStart()
|
||||
{
|
||||
_stackdepth = 0;
|
||||
var node = ParsePattern(GetStartPattern());
|
||||
var token = PeekToken(0);
|
||||
if (token != null)
|
||||
{
|
||||
var list = new ArrayList(1) { "<EOF>" };
|
||||
throw new ParseException(
|
||||
ParseException.ErrorType.UNEXPECTED_TOKEN,
|
||||
token.ToShortString(),
|
||||
list,
|
||||
token.StartLine,
|
||||
token.StartColumn);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
private Node ParsePattern(ProductionPattern pattern)
|
||||
{
|
||||
_stackdepth++;
|
||||
|
||||
if (_stackdepth > 200)
|
||||
{
|
||||
throw new System.StackOverflowException();
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var defaultAlt = pattern.DefaultAlternative;
|
||||
for (int i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
var alt = pattern[i];
|
||||
if (defaultAlt != alt && IsNext(alt))
|
||||
{
|
||||
return ParseAlternative(alt);
|
||||
}
|
||||
}
|
||||
if (defaultAlt == null || !IsNext(defaultAlt))
|
||||
{
|
||||
ThrowParseException(FindUnion(pattern));
|
||||
}
|
||||
return ParseAlternative(defaultAlt);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_stackdepth--;
|
||||
}
|
||||
}
|
||||
|
||||
private Node ParseAlternative(ProductionPatternAlternative alt)
|
||||
{
|
||||
var node = NewProduction(alt.Pattern);
|
||||
EnterNode(node);
|
||||
for (int i = 0; i < alt.Count; i++)
|
||||
{
|
||||
try
|
||||
{
|
||||
ParseElement(node, alt[i]);
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
AddError(e, true);
|
||||
NextToken();
|
||||
i--;
|
||||
}
|
||||
}
|
||||
return ExitNode(node);
|
||||
}
|
||||
|
||||
private void ParseElement(Production node,
|
||||
ProductionPatternElement elem)
|
||||
{
|
||||
for (int i = 0; i < elem.MaxCount; i++)
|
||||
{
|
||||
if (i < elem.MinCount || IsNext(elem))
|
||||
{
|
||||
Node child;
|
||||
if (elem.IsToken())
|
||||
{
|
||||
child = NextToken(elem.Id);
|
||||
EnterNode(child);
|
||||
AddNode(node, ExitNode(child));
|
||||
}
|
||||
else
|
||||
{
|
||||
child = ParsePattern(GetPattern(elem.Id));
|
||||
AddNode(node, child);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private bool IsNext(ProductionPattern pattern)
|
||||
{
|
||||
LookAheadSet set = pattern.LookAhead;
|
||||
|
||||
if (set == null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
return set.IsNext(this);
|
||||
}
|
||||
}
|
||||
|
||||
private bool IsNext(ProductionPatternAlternative alt)
|
||||
{
|
||||
LookAheadSet set = alt.LookAhead;
|
||||
|
||||
if (set == null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
return set.IsNext(this);
|
||||
}
|
||||
}
|
||||
|
||||
private bool IsNext(ProductionPatternElement elem)
|
||||
{
|
||||
LookAheadSet set = elem.LookAhead;
|
||||
|
||||
if (set != null)
|
||||
{
|
||||
return set.IsNext(this);
|
||||
}
|
||||
else if (elem.IsToken())
|
||||
{
|
||||
return elem.IsMatch(PeekToken(0));
|
||||
}
|
||||
else
|
||||
{
|
||||
return IsNext(GetPattern(elem.Id));
|
||||
}
|
||||
}
|
||||
|
||||
private void CalculateLookAhead(ProductionPattern pattern)
|
||||
{
|
||||
ProductionPatternAlternative alt;
|
||||
LookAheadSet previous = new LookAheadSet(0);
|
||||
int length = 1;
|
||||
int i;
|
||||
CallStack stack = new CallStack();
|
||||
|
||||
// Calculate simple look-ahead
|
||||
stack.Push(pattern.Name, 1);
|
||||
var result = new LookAheadSet(1);
|
||||
var alternatives = new LookAheadSet[pattern.Count];
|
||||
for (i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
alt = pattern[i];
|
||||
alternatives[i] = FindLookAhead(alt, 1, 0, stack, null);
|
||||
alt.LookAhead = alternatives[i];
|
||||
result.AddAll(alternatives[i]);
|
||||
}
|
||||
if (pattern.LookAhead == null)
|
||||
{
|
||||
pattern.LookAhead = result;
|
||||
}
|
||||
var conflicts = FindConflicts(pattern, 1);
|
||||
|
||||
// Resolve conflicts
|
||||
while (conflicts.Size() > 0)
|
||||
{
|
||||
length++;
|
||||
stack.Clear();
|
||||
stack.Push(pattern.Name, length);
|
||||
conflicts.AddAll(previous);
|
||||
for (i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
alt = pattern[i];
|
||||
if (alternatives[i].Intersects(conflicts))
|
||||
{
|
||||
alternatives[i] = FindLookAhead(alt,
|
||||
length,
|
||||
0,
|
||||
stack,
|
||||
conflicts);
|
||||
alt.LookAhead = alternatives[i];
|
||||
}
|
||||
if (alternatives[i].Intersects(conflicts))
|
||||
{
|
||||
if (pattern.DefaultAlternative == null)
|
||||
{
|
||||
pattern.DefaultAlternative = alt;
|
||||
}
|
||||
else if (pattern.DefaultAlternative != alt)
|
||||
{
|
||||
result = alternatives[i].CreateIntersection(conflicts);
|
||||
ThrowAmbiguityException(pattern.Name,
|
||||
null,
|
||||
result);
|
||||
}
|
||||
}
|
||||
}
|
||||
previous = conflicts;
|
||||
conflicts = FindConflicts(pattern, length);
|
||||
}
|
||||
|
||||
// Resolve conflicts inside rules
|
||||
for (i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
CalculateLookAhead(pattern[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
private void CalculateLookAhead(ProductionPatternAlternative alt,
|
||||
int pos)
|
||||
{
|
||||
LookAheadSet previous = new LookAheadSet(0);
|
||||
int length = 1;
|
||||
|
||||
// Check trivial cases
|
||||
if (pos >= alt.Count)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for non-optional element
|
||||
var pattern = alt.Pattern;
|
||||
var elem = alt[pos];
|
||||
if (elem.MinCount == elem.MaxCount)
|
||||
{
|
||||
CalculateLookAhead(alt, pos + 1);
|
||||
return;
|
||||
}
|
||||
|
||||
// Calculate simple look-aheads
|
||||
var first = FindLookAhead(elem, 1, new CallStack(), null);
|
||||
var follow = FindLookAhead(alt, 1, pos + 1, new CallStack(), null);
|
||||
|
||||
// Resolve conflicts
|
||||
var location = "at position " + (pos + 1);
|
||||
var conflicts = FindConflicts(pattern.Name,
|
||||
location,
|
||||
first,
|
||||
follow);
|
||||
while (conflicts.Size() > 0)
|
||||
{
|
||||
length++;
|
||||
conflicts.AddAll(previous);
|
||||
first = FindLookAhead(elem,
|
||||
length,
|
||||
new CallStack(),
|
||||
conflicts);
|
||||
follow = FindLookAhead(alt,
|
||||
length,
|
||||
pos + 1,
|
||||
new CallStack(),
|
||||
conflicts);
|
||||
first = first.CreateCombination(follow);
|
||||
elem.LookAhead = first;
|
||||
if (first.Intersects(conflicts))
|
||||
{
|
||||
first = first.CreateIntersection(conflicts);
|
||||
ThrowAmbiguityException(pattern.Name, location, first);
|
||||
}
|
||||
previous = conflicts;
|
||||
conflicts = FindConflicts(pattern.Name,
|
||||
location,
|
||||
first,
|
||||
follow);
|
||||
}
|
||||
|
||||
// Check remaining elements
|
||||
CalculateLookAhead(alt, pos + 1);
|
||||
}
|
||||
|
||||
private LookAheadSet FindLookAhead(ProductionPattern pattern,
|
||||
int length,
|
||||
CallStack stack,
|
||||
LookAheadSet filter)
|
||||
{
|
||||
// Check for infinite loop
|
||||
if (stack.Contains(pattern.Name, length))
|
||||
{
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INFINITE_LOOP,
|
||||
pattern.Name,
|
||||
(String)null);
|
||||
}
|
||||
|
||||
// Find pattern look-ahead
|
||||
stack.Push(pattern.Name, length);
|
||||
var result = new LookAheadSet(length);
|
||||
for (int i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
var temp = FindLookAhead(pattern[i],
|
||||
length,
|
||||
0,
|
||||
stack,
|
||||
filter);
|
||||
result.AddAll(temp);
|
||||
}
|
||||
stack.Pop();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private LookAheadSet FindLookAhead(ProductionPatternAlternative alt,
|
||||
int length,
|
||||
int pos,
|
||||
CallStack stack,
|
||||
LookAheadSet filter)
|
||||
{
|
||||
LookAheadSet follow;
|
||||
// Check trivial cases
|
||||
if (length <= 0 || pos >= alt.Count)
|
||||
{
|
||||
return new LookAheadSet(0);
|
||||
}
|
||||
|
||||
// Find look-ahead for this element
|
||||
var first = FindLookAhead(alt[pos], length, stack, filter);
|
||||
if (alt[pos].MinCount == 0)
|
||||
{
|
||||
first.AddEmpty();
|
||||
}
|
||||
|
||||
// Find remaining look-ahead
|
||||
if (filter == null)
|
||||
{
|
||||
length -= first.GetMinLength();
|
||||
if (length > 0)
|
||||
{
|
||||
follow = FindLookAhead(alt, length, pos + 1, stack, null);
|
||||
first = first.CreateCombination(follow);
|
||||
}
|
||||
}
|
||||
else if (filter.IsOverlap(first))
|
||||
{
|
||||
var overlaps = first.CreateOverlaps(filter);
|
||||
length -= overlaps.GetMinLength();
|
||||
filter = filter.CreateFilter(overlaps);
|
||||
follow = FindLookAhead(alt, length, pos + 1, stack, filter);
|
||||
first.RemoveAll(overlaps);
|
||||
first.AddAll(overlaps.CreateCombination(follow));
|
||||
}
|
||||
|
||||
return first;
|
||||
}
|
||||
|
||||
private LookAheadSet FindLookAhead(ProductionPatternElement elem,
|
||||
int length,
|
||||
CallStack stack,
|
||||
LookAheadSet filter)
|
||||
{
|
||||
// Find initial element look-ahead
|
||||
var first = FindLookAhead(elem, length, 0, stack, filter);
|
||||
var result = new LookAheadSet(length);
|
||||
result.AddAll(first);
|
||||
if (filter == null || !filter.IsOverlap(result))
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
// Handle element repetitions
|
||||
if (elem.MaxCount == Int32.MaxValue)
|
||||
{
|
||||
first = first.CreateRepetitive();
|
||||
}
|
||||
var max = elem.MaxCount;
|
||||
if (length < max)
|
||||
{
|
||||
max = length;
|
||||
}
|
||||
for (int i = 1; i < max; i++)
|
||||
{
|
||||
first = first.CreateOverlaps(filter);
|
||||
if (first.Size() <= 0 || first.GetMinLength() >= length)
|
||||
{
|
||||
break;
|
||||
}
|
||||
var follow = FindLookAhead(elem,
|
||||
length,
|
||||
0,
|
||||
stack,
|
||||
filter.CreateFilter(first));
|
||||
first = first.CreateCombination(follow);
|
||||
result.AddAll(first);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private LookAheadSet FindLookAhead(ProductionPatternElement elem,
|
||||
int length,
|
||||
int dummy,
|
||||
CallStack stack,
|
||||
LookAheadSet filter)
|
||||
{
|
||||
LookAheadSet result;
|
||||
|
||||
if (elem.IsToken())
|
||||
{
|
||||
result = new LookAheadSet(length);
|
||||
result.Add(elem.Id);
|
||||
}
|
||||
else
|
||||
{
|
||||
var pattern = GetPattern(elem.Id);
|
||||
result = FindLookAhead(pattern, length, stack, filter);
|
||||
if (stack.Contains(pattern.Name))
|
||||
{
|
||||
result = result.CreateRepetitive();
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private LookAheadSet FindConflicts(ProductionPattern pattern,
|
||||
int maxLength)
|
||||
{
|
||||
|
||||
LookAheadSet result = new LookAheadSet(maxLength);
|
||||
for (int i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
var set1 = pattern[i].LookAhead;
|
||||
for (int j = 0; j < i; j++)
|
||||
{
|
||||
var set2 = pattern[j].LookAhead;
|
||||
result.AddAll(set1.CreateIntersection(set2));
|
||||
}
|
||||
}
|
||||
if (result.IsRepetitive())
|
||||
{
|
||||
ThrowAmbiguityException(pattern.Name, null, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private LookAheadSet FindConflicts(string pattern,
|
||||
string location,
|
||||
LookAheadSet set1,
|
||||
LookAheadSet set2)
|
||||
{
|
||||
var result = set1.CreateIntersection(set2);
|
||||
if (result.IsRepetitive())
|
||||
{
|
||||
ThrowAmbiguityException(pattern, location, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private LookAheadSet FindUnion(ProductionPattern pattern)
|
||||
{
|
||||
LookAheadSet result;
|
||||
int length = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
result = pattern[i].LookAhead;
|
||||
if (result.GetMaxLength() > length)
|
||||
{
|
||||
length = result.GetMaxLength();
|
||||
}
|
||||
}
|
||||
result = new LookAheadSet(length);
|
||||
for (i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
result.AddAll(pattern[i].LookAhead);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
private void ThrowParseException(LookAheadSet set)
|
||||
{
|
||||
ArrayList list = new ArrayList();
|
||||
|
||||
// Read tokens until mismatch
|
||||
while (set.IsNext(this, 1))
|
||||
{
|
||||
set = set.CreateNextSet(NextToken().Id);
|
||||
}
|
||||
|
||||
// Find next token descriptions
|
||||
var initials = set.GetInitialTokens();
|
||||
for (int i = 0; i < initials.Length; i++)
|
||||
{
|
||||
list.Add(GetTokenDescription(initials[i]));
|
||||
}
|
||||
|
||||
// Create exception
|
||||
var token = NextToken();
|
||||
throw new ParseException(ParseException.ErrorType.UNEXPECTED_TOKEN,
|
||||
token.ToShortString(),
|
||||
list,
|
||||
token.StartLine,
|
||||
token.StartColumn);
|
||||
}
|
||||
|
||||
private void ThrowAmbiguityException(string pattern,
|
||||
string location,
|
||||
LookAheadSet set)
|
||||
{
|
||||
|
||||
ArrayList list = new ArrayList();
|
||||
|
||||
// Find next token descriptions
|
||||
var initials = set.GetInitialTokens();
|
||||
for (int i = 0; i < initials.Length; i++)
|
||||
{
|
||||
list.Add(GetTokenDescription(initials[i]));
|
||||
}
|
||||
|
||||
// Create exception
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INHERENT_AMBIGUITY,
|
||||
pattern,
|
||||
location,
|
||||
list);
|
||||
}
|
||||
|
||||
|
||||
private class CallStack
|
||||
{
|
||||
private readonly ArrayList _nameStack = new ArrayList();
|
||||
private readonly ArrayList _valueStack = new ArrayList();
|
||||
public bool Contains(string name)
|
||||
{
|
||||
return _nameStack.Contains(name);
|
||||
}
|
||||
|
||||
public bool Contains(string name, int value)
|
||||
{
|
||||
for (int i = 0; i < _nameStack.Count; i++)
|
||||
{
|
||||
if (_nameStack[i].Equals(name)
|
||||
&& _valueStack[i].Equals(value))
|
||||
{
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public void Clear()
|
||||
{
|
||||
_nameStack.Clear();
|
||||
_valueStack.Clear();
|
||||
}
|
||||
|
||||
public void Push(string name, int value)
|
||||
{
|
||||
_nameStack.Add(name);
|
||||
_valueStack.Add(value);
|
||||
}
|
||||
|
||||
public void Pop()
|
||||
{
|
||||
if (_nameStack.Count > 0)
|
||||
{
|
||||
_nameStack.RemoveAt(_nameStack.Count - 1);
|
||||
_valueStack.RemoveAt(_valueStack.Count - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
505
Parsing/RegExp.cs
Normal file
505
Parsing/RegExp.cs
Normal file
@@ -0,0 +1,505 @@
|
||||
using System.Collections;
|
||||
using System.Globalization;
|
||||
using System.Text;
|
||||
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A regular expression. This class creates and holds an internal
|
||||
* data structure representing a regular expression. It also
|
||||
* allows creating matchers. This class is thread-safe. Multiple
|
||||
* matchers may operate simultanously on the same regular
|
||||
* expression.
|
||||
*/
|
||||
internal class RegExp
|
||||
{
|
||||
private readonly Element _element;
|
||||
private readonly string _pattern;
|
||||
private readonly bool _ignoreCase;
|
||||
private int _pos;
|
||||
|
||||
public RegExp(string pattern)
|
||||
: this(pattern, false)
|
||||
{
|
||||
}
|
||||
|
||||
public RegExp(string pattern, bool ignoreCase)
|
||||
{
|
||||
this._pattern = pattern;
|
||||
this._ignoreCase = ignoreCase;
|
||||
this._pos = 0;
|
||||
this._element = ParseExpr();
|
||||
if (_pos < pattern.Length)
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
|
||||
_pos,
|
||||
pattern);
|
||||
}
|
||||
}
|
||||
|
||||
public Matcher Matcher(string str)
|
||||
{
|
||||
return Matcher(new ReaderBuffer(new StringReader(str)));
|
||||
}
|
||||
|
||||
public Matcher Matcher(ReaderBuffer buffer)
|
||||
{
|
||||
return new Matcher((Element)_element.Clone(), buffer, _ignoreCase);
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
var str = new StringWriter();
|
||||
str.WriteLine("Regular Expression");
|
||||
str.WriteLine(" Pattern: " + _pattern);
|
||||
str.Write(" Flags:");
|
||||
if (_ignoreCase)
|
||||
{
|
||||
str.Write(" caseignore");
|
||||
}
|
||||
str.WriteLine();
|
||||
str.WriteLine(" Compiled:");
|
||||
_element.PrintTo(str, " ");
|
||||
return str.ToString();
|
||||
}
|
||||
|
||||
private Element ParseExpr()
|
||||
{
|
||||
var first = ParseTerm();
|
||||
if (PeekChar(0) != '|')
|
||||
{
|
||||
return first;
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadChar('|');
|
||||
var second = ParseExpr();
|
||||
return new AlternativeElement(first, second);
|
||||
}
|
||||
}
|
||||
|
||||
private Element ParseTerm()
|
||||
{
|
||||
ArrayList list = new ArrayList();
|
||||
|
||||
list.Add(ParseFact());
|
||||
while (true)
|
||||
{
|
||||
switch (PeekChar(0))
|
||||
{
|
||||
case -1:
|
||||
case ')':
|
||||
case ']':
|
||||
case '{':
|
||||
case '}':
|
||||
case '?':
|
||||
case '+':
|
||||
case '|':
|
||||
return CombineElements(list);
|
||||
default:
|
||||
list.Add(ParseFact());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Element ParseFact()
|
||||
{
|
||||
var elem = ParseAtom();
|
||||
switch (PeekChar(0))
|
||||
{
|
||||
case '?':
|
||||
case '*':
|
||||
case '+':
|
||||
case '{':
|
||||
return ParseAtomModifier(elem);
|
||||
default:
|
||||
return elem;
|
||||
}
|
||||
}
|
||||
|
||||
private Element ParseAtom()
|
||||
{
|
||||
Element elem;
|
||||
|
||||
switch (PeekChar(0))
|
||||
{
|
||||
case '.':
|
||||
ReadChar('.');
|
||||
return CharacterSetElement.Dot;
|
||||
case '(':
|
||||
ReadChar('(');
|
||||
elem = ParseExpr();
|
||||
ReadChar(')');
|
||||
return elem;
|
||||
case '[':
|
||||
ReadChar('[');
|
||||
elem = ParseCharSet();
|
||||
ReadChar(']');
|
||||
return elem;
|
||||
case -1:
|
||||
case ')':
|
||||
case ']':
|
||||
case '{':
|
||||
case '}':
|
||||
case '?':
|
||||
case '*':
|
||||
case '+':
|
||||
case '|':
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
|
||||
_pos,
|
||||
_pattern);
|
||||
default:
|
||||
return ParseChar();
|
||||
}
|
||||
}
|
||||
|
||||
private Element ParseAtomModifier(Element elem)
|
||||
{
|
||||
int min = 0;
|
||||
int max = -1;
|
||||
RepeatElement.RepeatType type;
|
||||
int firstPos;
|
||||
|
||||
// Read min and max
|
||||
type = RepeatElement.RepeatType.GREEDY;
|
||||
switch (ReadChar())
|
||||
{
|
||||
case '?':
|
||||
min = 0;
|
||||
max = 1;
|
||||
break;
|
||||
case '*':
|
||||
min = 0;
|
||||
max = -1;
|
||||
break;
|
||||
case '+':
|
||||
min = 1;
|
||||
max = -1;
|
||||
break;
|
||||
case '{':
|
||||
firstPos = _pos - 1;
|
||||
min = ReadNumber();
|
||||
max = min;
|
||||
if (PeekChar(0) == ',')
|
||||
{
|
||||
ReadChar(',');
|
||||
max = -1;
|
||||
if (PeekChar(0) != '}')
|
||||
{
|
||||
max = ReadNumber();
|
||||
}
|
||||
}
|
||||
ReadChar('}');
|
||||
if (max == 0 || (max > 0 && min > max))
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.INVALID_REPEAT_COUNT,
|
||||
firstPos,
|
||||
_pattern);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
|
||||
_pos - 1,
|
||||
_pattern);
|
||||
}
|
||||
|
||||
// Read operator mode
|
||||
if (PeekChar(0) == '?')
|
||||
{
|
||||
ReadChar('?');
|
||||
type = RepeatElement.RepeatType.RELUCTANT;
|
||||
}
|
||||
else if (PeekChar(0) == '+')
|
||||
{
|
||||
ReadChar('+');
|
||||
type = RepeatElement.RepeatType.POSSESSIVE;
|
||||
}
|
||||
|
||||
return new RepeatElement(elem, min, max, type);
|
||||
}
|
||||
|
||||
private Element ParseCharSet()
|
||||
{
|
||||
CharacterSetElement charset;
|
||||
bool repeat = true;
|
||||
|
||||
if (PeekChar(0) == '^')
|
||||
{
|
||||
ReadChar('^');
|
||||
charset = new CharacterSetElement(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
charset = new CharacterSetElement(false);
|
||||
}
|
||||
|
||||
while (PeekChar(0) > 0 && repeat)
|
||||
{
|
||||
var start = (char)PeekChar(0);
|
||||
switch (start)
|
||||
{
|
||||
case ']':
|
||||
repeat = false;
|
||||
break;
|
||||
case '\\':
|
||||
var elem = ParseEscapeChar();
|
||||
if (elem is StringElement)
|
||||
{
|
||||
charset.AddCharacters((StringElement)elem);
|
||||
}
|
||||
else
|
||||
{
|
||||
charset.AddCharacterSet((CharacterSetElement)elem);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ReadChar(start);
|
||||
if (PeekChar(0) == '-'
|
||||
&& PeekChar(1) > 0
|
||||
&& PeekChar(1) != ']')
|
||||
{
|
||||
|
||||
ReadChar('-');
|
||||
var end = ReadChar();
|
||||
charset.AddRange(FixChar(start), FixChar(end));
|
||||
}
|
||||
else
|
||||
{
|
||||
charset.AddCharacter(FixChar(start));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return charset;
|
||||
}
|
||||
|
||||
private Element ParseChar()
|
||||
{
|
||||
switch (PeekChar(0))
|
||||
{
|
||||
case '\\':
|
||||
return ParseEscapeChar();
|
||||
case '^':
|
||||
case '$':
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
|
||||
_pos,
|
||||
_pattern);
|
||||
default:
|
||||
return new StringElement(FixChar(ReadChar()));
|
||||
}
|
||||
}
|
||||
|
||||
private Element ParseEscapeChar()
|
||||
{
|
||||
char c;
|
||||
string str;
|
||||
int value;
|
||||
|
||||
ReadChar('\\');
|
||||
c = ReadChar();
|
||||
switch (c)
|
||||
{
|
||||
case '0':
|
||||
c = ReadChar();
|
||||
if (c < '0' || c > '3')
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
|
||||
_pos - 3,
|
||||
_pattern);
|
||||
}
|
||||
value = c - '0';
|
||||
c = (char)PeekChar(0);
|
||||
if ('0' <= c && c <= '7')
|
||||
{
|
||||
value *= 8;
|
||||
value += ReadChar() - '0';
|
||||
c = (char)PeekChar(0);
|
||||
if ('0' <= c && c <= '7')
|
||||
{
|
||||
value *= 8;
|
||||
value += ReadChar() - '0';
|
||||
}
|
||||
}
|
||||
return new StringElement(FixChar((char)value));
|
||||
case 'x':
|
||||
str = ReadChar().ToString() +
|
||||
ReadChar().ToString();
|
||||
try
|
||||
{
|
||||
value = Int32.Parse(str,
|
||||
NumberStyles.AllowHexSpecifier);
|
||||
return new StringElement(FixChar((char)value));
|
||||
}
|
||||
catch (FormatException)
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
|
||||
_pos - str.Length - 2,
|
||||
_pattern);
|
||||
}
|
||||
case 'u':
|
||||
str = ReadChar().ToString() +
|
||||
ReadChar().ToString() +
|
||||
ReadChar().ToString() +
|
||||
ReadChar().ToString();
|
||||
try
|
||||
{
|
||||
value = Int32.Parse(str,
|
||||
NumberStyles.AllowHexSpecifier);
|
||||
return new StringElement(FixChar((char)value));
|
||||
}
|
||||
catch (FormatException)
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
|
||||
_pos - str.Length - 2,
|
||||
_pattern);
|
||||
}
|
||||
case 't':
|
||||
return new StringElement('\t');
|
||||
case 'n':
|
||||
return new StringElement('\n');
|
||||
case 'r':
|
||||
return new StringElement('\r');
|
||||
case 'f':
|
||||
return new StringElement('\f');
|
||||
case 'a':
|
||||
return new StringElement('\u0007');
|
||||
case 'e':
|
||||
return new StringElement('\u001B');
|
||||
case 'd':
|
||||
return CharacterSetElement.Digit;
|
||||
case 'D':
|
||||
return CharacterSetElement.NonDigit;
|
||||
case 's':
|
||||
return CharacterSetElement.Whitespace;
|
||||
case 'S':
|
||||
return CharacterSetElement.NonWhitespace;
|
||||
case 'w':
|
||||
return CharacterSetElement.Word;
|
||||
case 'W':
|
||||
return CharacterSetElement.NonWord;
|
||||
default:
|
||||
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'))
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
|
||||
_pos - 2,
|
||||
_pattern);
|
||||
}
|
||||
return new StringElement(FixChar(c));
|
||||
}
|
||||
}
|
||||
|
||||
private char FixChar(char c)
|
||||
{
|
||||
return _ignoreCase ? Char.ToLower(c) : c;
|
||||
}
|
||||
|
||||
private int ReadNumber()
|
||||
{
|
||||
StringBuilder buf = new StringBuilder();
|
||||
int c;
|
||||
|
||||
c = PeekChar(0);
|
||||
while ('0' <= c && c <= '9')
|
||||
{
|
||||
buf.Append(ReadChar());
|
||||
c = PeekChar(0);
|
||||
}
|
||||
if (buf.Length <= 0)
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
|
||||
_pos,
|
||||
_pattern);
|
||||
}
|
||||
return Int32.Parse(buf.ToString());
|
||||
}
|
||||
|
||||
private char ReadChar()
|
||||
{
|
||||
int c = PeekChar(0);
|
||||
|
||||
if (c < 0)
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNTERMINATED_PATTERN,
|
||||
_pos,
|
||||
_pattern);
|
||||
}
|
||||
else
|
||||
{
|
||||
_pos++;
|
||||
return (char)c;
|
||||
}
|
||||
}
|
||||
|
||||
private char ReadChar(char c)
|
||||
{
|
||||
if (c != ReadChar())
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
|
||||
_pos - 1,
|
||||
_pattern);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
private int PeekChar(int count)
|
||||
{
|
||||
if (_pos + count < _pattern.Length)
|
||||
{
|
||||
return _pattern[_pos + count];
|
||||
}
|
||||
else
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
private Element CombineElements(ArrayList list)
|
||||
{
|
||||
Element elem;
|
||||
int i;
|
||||
// Concatenate string elements
|
||||
var prev = (Element)list[0];
|
||||
for (i = 1; i < list.Count; i++)
|
||||
{
|
||||
elem = (Element)list[i];
|
||||
if (prev is StringElement
|
||||
&& elem is StringElement)
|
||||
{
|
||||
|
||||
var str = ((StringElement)prev).GetString() +
|
||||
((StringElement)elem).GetString();
|
||||
elem = new StringElement(str);
|
||||
list.RemoveAt(i);
|
||||
list[i - 1] = elem;
|
||||
i--;
|
||||
}
|
||||
prev = elem;
|
||||
}
|
||||
|
||||
// Combine all remaining elements
|
||||
elem = (Element)list[list.Count - 1];
|
||||
for (i = list.Count - 2; i >= 0; i--)
|
||||
{
|
||||
prev = (Element)list[i];
|
||||
elem = new CombineElement(prev, elem);
|
||||
}
|
||||
|
||||
return elem;
|
||||
}
|
||||
}
|
||||
}
|
113
Parsing/RegExpException.cs
Normal file
113
Parsing/RegExpException.cs
Normal file
@@ -0,0 +1,113 @@
|
||||
using System.Text;
|
||||
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A regular expression exception. This exception is thrown if a
|
||||
* regular expression couldn't be processed (or "compiled")
|
||||
* properly.
|
||||
*/
|
||||
internal class RegExpException : Exception
|
||||
{
|
||||
public enum ErrorType
|
||||
{
|
||||
|
||||
/**
|
||||
* The unexpected character error constant. This error is
|
||||
* used when a character was read that didn't match the
|
||||
* allowed set of characters at the given position.
|
||||
*/
|
||||
UNEXPECTED_CHARACTER,
|
||||
|
||||
/**
|
||||
* The unterminated pattern error constant. This error is
|
||||
* used when more characters were expected in the pattern.
|
||||
*/
|
||||
UNTERMINATED_PATTERN,
|
||||
|
||||
/**
|
||||
* The unsupported special character error constant. This
|
||||
* error is used when special regular expression
|
||||
* characters are used in the pattern, but not supported
|
||||
* in this implementation.
|
||||
*/
|
||||
UNSUPPORTED_SPECIAL_CHARACTER,
|
||||
|
||||
/**
|
||||
* The unsupported escape character error constant. This
|
||||
* error is used when an escape character construct is
|
||||
* used in the pattern, but not supported in this
|
||||
* implementation.
|
||||
*/
|
||||
UNSUPPORTED_ESCAPE_CHARACTER,
|
||||
|
||||
/**
|
||||
* The invalid repeat count error constant. This error is
|
||||
* used when a repetition count of zero is specified, or
|
||||
* when the minimum exceeds the maximum.
|
||||
*/
|
||||
INVALID_REPEAT_COUNT
|
||||
}
|
||||
|
||||
private readonly ErrorType _type;
|
||||
private readonly int _position;
|
||||
private readonly string _pattern;
|
||||
|
||||
public RegExpException(ErrorType type, int pos, string pattern)
|
||||
{
|
||||
this._type = type;
|
||||
this._position = pos;
|
||||
this._pattern = pattern;
|
||||
}
|
||||
|
||||
public override string Message => GetMessage();
|
||||
|
||||
public string GetMessage()
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
// Append error type name
|
||||
switch (_type)
|
||||
{
|
||||
case ErrorType.UNEXPECTED_CHARACTER:
|
||||
buffer.Append("unexpected character");
|
||||
break;
|
||||
case ErrorType.UNTERMINATED_PATTERN:
|
||||
buffer.Append("unterminated pattern");
|
||||
break;
|
||||
case ErrorType.UNSUPPORTED_SPECIAL_CHARACTER:
|
||||
buffer.Append("unsupported character");
|
||||
break;
|
||||
case ErrorType.UNSUPPORTED_ESCAPE_CHARACTER:
|
||||
buffer.Append("unsupported escape character");
|
||||
break;
|
||||
case ErrorType.INVALID_REPEAT_COUNT:
|
||||
buffer.Append("invalid repeat count");
|
||||
break;
|
||||
default:
|
||||
buffer.Append("internal error");
|
||||
break;
|
||||
}
|
||||
|
||||
// Append erroneous character
|
||||
buffer.Append(": ");
|
||||
if (_position < _pattern.Length)
|
||||
{
|
||||
buffer.Append('\'');
|
||||
buffer.Append(_pattern.Substring(_position));
|
||||
buffer.Append('\'');
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.Append("<end of pattern>");
|
||||
}
|
||||
|
||||
// Append position
|
||||
buffer.Append(" at position ");
|
||||
buffer.Append(_position);
|
||||
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
}
|
239
Parsing/RepeatElement.cs
Normal file
239
Parsing/RepeatElement.cs
Normal file
@@ -0,0 +1,239 @@
|
||||
using System.Collections;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
|
||||
/**
|
||||
* A regular expression element repeater. The element repeats the
|
||||
* matches from a specified element, attempting to reach the
|
||||
* maximum repetition count.
|
||||
*/
|
||||
internal class RepeatElement : Element
|
||||
{
|
||||
public enum RepeatType
|
||||
{
|
||||
GREEDY = 1,
|
||||
RELUCTANT = 2,
|
||||
POSSESSIVE = 3
|
||||
}
|
||||
private readonly Element _elem;
|
||||
private readonly int _min;
|
||||
private readonly int _max;
|
||||
private readonly RepeatType _type;
|
||||
private int _matchStart;
|
||||
private BitArray _matches;
|
||||
|
||||
public RepeatElement(Element elem,
|
||||
int min,
|
||||
int max,
|
||||
RepeatType type)
|
||||
{
|
||||
|
||||
this._elem = elem;
|
||||
this._min = min;
|
||||
if (max <= 0)
|
||||
{
|
||||
this._max = Int32.MaxValue;
|
||||
}
|
||||
else
|
||||
{
|
||||
this._max = max;
|
||||
}
|
||||
this._type = type;
|
||||
this._matchStart = -1;
|
||||
this._matches = null;
|
||||
}
|
||||
|
||||
public override object Clone()
|
||||
{
|
||||
return new RepeatElement((Element)_elem.Clone(),
|
||||
_min,
|
||||
_max,
|
||||
_type);
|
||||
}
|
||||
|
||||
public override int Match(Matcher m,
|
||||
ReaderBuffer buffer,
|
||||
int start,
|
||||
int skip)
|
||||
{
|
||||
if (skip == 0)
|
||||
{
|
||||
_matchStart = -1;
|
||||
_matches = null;
|
||||
}
|
||||
switch (_type)
|
||||
{
|
||||
case RepeatType.GREEDY:
|
||||
return MatchGreedy(m, buffer, start, skip);
|
||||
case RepeatType.RELUCTANT:
|
||||
return MatchReluctant(m, buffer, start, skip);
|
||||
case RepeatType.POSSESSIVE:
|
||||
if (skip == 0)
|
||||
{
|
||||
return MatchPossessive(m, buffer, start, 0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
private int MatchGreedy(Matcher m,
|
||||
ReaderBuffer buffer,
|
||||
int start,
|
||||
int skip)
|
||||
{
|
||||
// Check for simple case
|
||||
if (skip == 0)
|
||||
{
|
||||
return MatchPossessive(m, buffer, start, 0);
|
||||
}
|
||||
|
||||
// Find all matches
|
||||
if (_matchStart != start)
|
||||
{
|
||||
_matchStart = start;
|
||||
_matches = new BitArray(10);
|
||||
FindMatches(m, buffer, start, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Find first non-skipped match
|
||||
for (int i = _matches.Count - 1; i >= 0; i--)
|
||||
{
|
||||
if (_matches[i])
|
||||
{
|
||||
if (skip == 0)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
skip--;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
private int MatchReluctant(Matcher m,
|
||||
ReaderBuffer buffer,
|
||||
int start,
|
||||
int skip)
|
||||
{
|
||||
if (_matchStart != start)
|
||||
{
|
||||
_matchStart = start;
|
||||
_matches = new BitArray(10);
|
||||
FindMatches(m, buffer, start, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Find first non-skipped match
|
||||
for (int i = 0; i < _matches.Count; i++)
|
||||
{
|
||||
if (_matches[i])
|
||||
{
|
||||
if (skip == 0)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
skip--;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
private int MatchPossessive(Matcher m,
|
||||
ReaderBuffer buffer,
|
||||
int start,
|
||||
int count)
|
||||
{
|
||||
int length = 0;
|
||||
int subLength = 1;
|
||||
|
||||
// Match as many elements as possible
|
||||
while (subLength > 0 && count < _max)
|
||||
{
|
||||
subLength = _elem.Match(m, buffer, start + length, 0);
|
||||
if (subLength >= 0)
|
||||
{
|
||||
count++;
|
||||
length += subLength;
|
||||
}
|
||||
}
|
||||
|
||||
// Return result
|
||||
if (_min <= count && count <= _max)
|
||||
{
|
||||
return length;
|
||||
}
|
||||
else
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
private void FindMatches(Matcher m,
|
||||
ReaderBuffer buffer,
|
||||
int start,
|
||||
int length,
|
||||
int count,
|
||||
int attempt)
|
||||
{
|
||||
int subLength;
|
||||
|
||||
// Check match ending here
|
||||
if (count > _max)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (_min <= count && attempt == 0)
|
||||
{
|
||||
if (_matches.Length <= length)
|
||||
{
|
||||
_matches.Length = length + 10;
|
||||
}
|
||||
_matches[length] = true;
|
||||
}
|
||||
|
||||
// Check element match
|
||||
subLength = _elem.Match(m, buffer, start, attempt);
|
||||
if (subLength < 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
else if (subLength == 0)
|
||||
{
|
||||
if (_min == count + 1)
|
||||
{
|
||||
if (_matches.Length <= length)
|
||||
{
|
||||
_matches.Length = length + 10;
|
||||
}
|
||||
_matches[length] = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Find alternative and subsequent matches
|
||||
FindMatches(m, buffer, start, length, count, attempt + 1);
|
||||
FindMatches(m,
|
||||
buffer,
|
||||
start + subLength,
|
||||
length + subLength,
|
||||
count + 1,
|
||||
0);
|
||||
}
|
||||
|
||||
public override void PrintTo(TextWriter output, string indent)
|
||||
{
|
||||
output.Write(indent + "Repeat (" + _min + "," + _max + ")");
|
||||
if (_type == RepeatType.RELUCTANT)
|
||||
{
|
||||
output.Write("?");
|
||||
}
|
||||
else if (_type == RepeatType.POSSESSIVE)
|
||||
{
|
||||
output.Write("+");
|
||||
}
|
||||
output.WriteLine();
|
||||
_elem.PrintTo(output, indent + " ");
|
||||
}
|
||||
}
|
||||
}
|
761
Parsing/StackParser.cs
Normal file
761
Parsing/StackParser.cs
Normal file
@@ -0,0 +1,761 @@
|
||||
using System.Collections;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* based on recursive descent parser, this implementation removes recursion
|
||||
* and uses a stack instead. This parser handles LL(n) grammars,
|
||||
* selecting the appropriate pattern to parse based on the next few
|
||||
* tokens.
|
||||
*/
|
||||
internal class StackParser : Parser
|
||||
{
|
||||
/**
|
||||
* this is the parser state that is pushed onto the stack, simulating
|
||||
* the variable state needed in recursive version. Some variables
|
||||
* substitute for execution position, such as validnext, so patterns
|
||||
* are processed in the proper order.
|
||||
*/
|
||||
internal class ParseState
|
||||
{
|
||||
/**
|
||||
* pattern for this state
|
||||
*/
|
||||
internal ProductionPattern pattern;
|
||||
/**
|
||||
* index of the alt pattern we are currently checking
|
||||
*/
|
||||
internal int altindex;
|
||||
|
||||
/**
|
||||
* index into the list of elements for the alt pattern
|
||||
*/
|
||||
internal int elementindex;
|
||||
|
||||
/**
|
||||
* index to the token we are processing.
|
||||
*/
|
||||
internal int tokenindex;
|
||||
|
||||
/**
|
||||
* The node for current state
|
||||
*/
|
||||
internal Node node;
|
||||
|
||||
/**
|
||||
* true if we already checked IsNext on the current pattern
|
||||
* so we should not call it again
|
||||
*/
|
||||
internal bool validnext;
|
||||
|
||||
}
|
||||
|
||||
|
||||
public StackParser(TextReader input) : base(input)
|
||||
{
|
||||
}
|
||||
|
||||
public StackParser(TextReader input, Analyzer analyzer)
|
||||
: base(input, analyzer)
|
||||
{
|
||||
}
|
||||
|
||||
public StackParser(Tokenizer tokenizer)
|
||||
: base(tokenizer)
|
||||
{
|
||||
}
|
||||
|
||||
public StackParser(Tokenizer tokenizer,
|
||||
Analyzer analyzer)
|
||||
: base(tokenizer, analyzer)
|
||||
{
|
||||
}
|
||||
|
||||
public override void AddPattern(ProductionPattern pattern)
|
||||
{
|
||||
|
||||
// Check for empty matches
|
||||
if (pattern.IsMatchingEmpty())
|
||||
{
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INVALID_PRODUCTION,
|
||||
pattern.Name,
|
||||
"zero elements can be matched (minimum is one)");
|
||||
}
|
||||
|
||||
// Check for left-recusive patterns
|
||||
if (pattern.IsLeftRecursive())
|
||||
{
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INVALID_PRODUCTION,
|
||||
pattern.Name,
|
||||
"left recursive patterns are not allowed");
|
||||
}
|
||||
|
||||
// Add pattern
|
||||
base.AddPattern(pattern);
|
||||
}
|
||||
|
||||
public override void Prepare()
|
||||
{
|
||||
// Performs production pattern checks
|
||||
base.Prepare();
|
||||
SetInitialized(false);
|
||||
|
||||
// Calculate production look-ahead sets
|
||||
var e = GetPatterns().GetEnumerator();
|
||||
while (e.MoveNext())
|
||||
{
|
||||
CalculateLookAhead((ProductionPattern)e.Current);
|
||||
}
|
||||
|
||||
// Set initialized flag
|
||||
SetInitialized(true);
|
||||
}
|
||||
|
||||
protected override Node ParseStart()
|
||||
{
|
||||
var node = ParsePatterns(GetStartPattern());
|
||||
|
||||
|
||||
var token = PeekToken(0);
|
||||
if (token != null)
|
||||
{
|
||||
var list = new ArrayList(1) { "<EOF>" };
|
||||
throw new ParseException(
|
||||
ParseException.ErrorType.UNEXPECTED_TOKEN,
|
||||
token.ToShortString(),
|
||||
list,
|
||||
token.StartLine,
|
||||
token.StartColumn);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private ParseState NewState(ProductionPattern pattern)
|
||||
{
|
||||
return new ParseState()
|
||||
{
|
||||
pattern = pattern,
|
||||
altindex = 0,
|
||||
elementindex = 0,
|
||||
tokenindex = 0,
|
||||
node = null,
|
||||
validnext = false
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// parse patterns using a stack. The stack is local to this method, since the parser
|
||||
/// is a singleton and may be parsing expressions from multiple threads, so cannot
|
||||
/// use the object to store our stack.
|
||||
/// </summary>
|
||||
/// <param name="start"></param>
|
||||
/// <returns></returns>
|
||||
private Node ParsePatterns(ProductionPattern start)
|
||||
{
|
||||
Stack<ParseState> _stack = new Stack<ParseState>();
|
||||
_stack.Push(NewState(start));
|
||||
|
||||
while (_stack.Count > 0)
|
||||
{
|
||||
ParseState state = _stack.Peek();
|
||||
ProductionPattern pattern = state.pattern;
|
||||
var defaultAlt = pattern.DefaultAlternative;
|
||||
ProductionPattern nextpattern = null;
|
||||
while (state.altindex < pattern.Count)
|
||||
{
|
||||
var alt = pattern[state.altindex];
|
||||
if (state.validnext || (defaultAlt != alt && IsNext(alt)))
|
||||
{
|
||||
state.validnext = true;
|
||||
nextpattern = ParseAlternative(state, alt);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
state.altindex++;
|
||||
state.validnext = false;
|
||||
}
|
||||
}
|
||||
|
||||
// check if completed pass through alt patterns. try default
|
||||
if (state.altindex >= pattern.Count)
|
||||
{
|
||||
if (!state.validnext && (defaultAlt == null || !IsNext(defaultAlt)))
|
||||
{
|
||||
ThrowParseException(FindUnion(pattern));
|
||||
}
|
||||
else
|
||||
{
|
||||
state.validnext = true;
|
||||
nextpattern = ParseAlternative(state, defaultAlt);
|
||||
}
|
||||
}
|
||||
|
||||
if (nextpattern != null)
|
||||
{
|
||||
_stack.Push(NewState(nextpattern));
|
||||
}
|
||||
|
||||
// we finished current pattern, so back up to previous state.
|
||||
else
|
||||
{
|
||||
// if we have a node set, add it to the parent
|
||||
var child = state.node;
|
||||
_stack.Pop();
|
||||
if (_stack.Count == 0)
|
||||
{
|
||||
// back to top, can return our result, which is top node
|
||||
return child;
|
||||
}
|
||||
state = _stack.Peek();
|
||||
AddNode((Production)state.node, child);
|
||||
}
|
||||
}
|
||||
|
||||
// should never get here, but must show we return something.
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* return the pattern to push onto stack and process next.
|
||||
*/
|
||||
private ProductionPattern ParseAlternative(ParseState state, ProductionPatternAlternative alt)
|
||||
{
|
||||
if (state.node == null)
|
||||
{
|
||||
state.node = NewProduction(alt.Pattern);
|
||||
state.elementindex = 0;
|
||||
EnterNode(state.node);
|
||||
}
|
||||
while (state.elementindex < alt.Count)
|
||||
{
|
||||
try
|
||||
{
|
||||
var pattern = ParseElement(state, alt[state.elementindex]);
|
||||
if (pattern == null)
|
||||
state.elementindex++;
|
||||
else
|
||||
return pattern;
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
AddError(e, true);
|
||||
NextToken();
|
||||
}
|
||||
}
|
||||
|
||||
state.node = ExitNode(state.node);
|
||||
return null;
|
||||
}
|
||||
|
||||
private ProductionPattern ParseElement(ParseState state,
|
||||
ProductionPatternElement elem)
|
||||
{
|
||||
for (int i = state.tokenindex; i < elem.MaxCount; i++)
|
||||
{
|
||||
if (i < elem.MinCount || IsNext(elem))
|
||||
{
|
||||
Node child;
|
||||
if (elem.IsToken())
|
||||
{
|
||||
child = NextToken(elem.Id);
|
||||
EnterNode(child);
|
||||
AddNode((Production)state.node, ExitNode(child));
|
||||
}
|
||||
else
|
||||
{
|
||||
// continue from next token when we return
|
||||
state.tokenindex = i + 1;
|
||||
// return to start processing the new pattern at this state
|
||||
return GetPattern(elem.Id); ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
//
|
||||
// we completed processing this element
|
||||
state.tokenindex = 0;
|
||||
return null;
|
||||
}
|
||||
|
||||
private bool IsNext(ProductionPattern pattern)
|
||||
{
|
||||
LookAheadSet set = pattern.LookAhead;
|
||||
|
||||
if (set == null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
return set.IsNext(this);
|
||||
}
|
||||
}
|
||||
|
||||
private bool IsNext(ProductionPatternAlternative alt)
|
||||
{
|
||||
LookAheadSet set = alt.LookAhead;
|
||||
|
||||
if (set == null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
return set.IsNext(this);
|
||||
}
|
||||
}
|
||||
|
||||
private bool IsNext(ProductionPatternElement elem)
|
||||
{
|
||||
LookAheadSet set = elem.LookAhead;
|
||||
|
||||
if (set != null)
|
||||
{
|
||||
return set.IsNext(this);
|
||||
}
|
||||
else if (elem.IsToken())
|
||||
{
|
||||
return elem.IsMatch(PeekToken(0));
|
||||
}
|
||||
else
|
||||
{
|
||||
return IsNext(GetPattern(elem.Id));
|
||||
}
|
||||
}
|
||||
|
||||
private void CalculateLookAhead(ProductionPattern pattern)
|
||||
{
|
||||
ProductionPatternAlternative alt;
|
||||
LookAheadSet previous = new LookAheadSet(0);
|
||||
int length = 1;
|
||||
int i;
|
||||
CallStack stack = new CallStack();
|
||||
|
||||
// Calculate simple look-ahead
|
||||
stack.Push(pattern.Name, 1);
|
||||
var result = new LookAheadSet(1);
|
||||
var alternatives = new LookAheadSet[pattern.Count];
|
||||
for (i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
alt = pattern[i];
|
||||
alternatives[i] = FindLookAhead(alt, 1, 0, stack, null);
|
||||
alt.LookAhead = alternatives[i];
|
||||
result.AddAll(alternatives[i]);
|
||||
}
|
||||
if (pattern.LookAhead == null)
|
||||
{
|
||||
pattern.LookAhead = result;
|
||||
}
|
||||
var conflicts = FindConflicts(pattern, 1);
|
||||
|
||||
// Resolve conflicts
|
||||
while (conflicts.Size() > 0)
|
||||
{
|
||||
length++;
|
||||
stack.Clear();
|
||||
stack.Push(pattern.Name, length);
|
||||
conflicts.AddAll(previous);
|
||||
for (i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
alt = pattern[i];
|
||||
if (alternatives[i].Intersects(conflicts))
|
||||
{
|
||||
alternatives[i] = FindLookAhead(alt,
|
||||
length,
|
||||
0,
|
||||
stack,
|
||||
conflicts);
|
||||
alt.LookAhead = alternatives[i];
|
||||
}
|
||||
if (alternatives[i].Intersects(conflicts))
|
||||
{
|
||||
if (pattern.DefaultAlternative == null)
|
||||
{
|
||||
pattern.DefaultAlternative = alt;
|
||||
}
|
||||
else if (pattern.DefaultAlternative != alt)
|
||||
{
|
||||
result = alternatives[i].CreateIntersection(conflicts);
|
||||
ThrowAmbiguityException(pattern.Name,
|
||||
null,
|
||||
result);
|
||||
}
|
||||
}
|
||||
}
|
||||
previous = conflicts;
|
||||
conflicts = FindConflicts(pattern, length);
|
||||
}
|
||||
|
||||
// Resolve conflicts inside rules
|
||||
for (i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
CalculateLookAhead(pattern[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
private void CalculateLookAhead(ProductionPatternAlternative alt,
|
||||
int pos)
|
||||
{
|
||||
LookAheadSet previous = new LookAheadSet(0);
|
||||
int length = 1;
|
||||
|
||||
// Check trivial cases
|
||||
if (pos >= alt.Count)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for non-optional element
|
||||
var pattern = alt.Pattern;
|
||||
var elem = alt[pos];
|
||||
if (elem.MinCount == elem.MaxCount)
|
||||
{
|
||||
CalculateLookAhead(alt, pos + 1);
|
||||
return;
|
||||
}
|
||||
|
||||
// Calculate simple look-aheads
|
||||
var first = FindLookAhead(elem, 1, new CallStack(), null);
|
||||
var follow = FindLookAhead(alt, 1, pos + 1, new CallStack(), null);
|
||||
|
||||
// Resolve conflicts
|
||||
var location = "at position " + (pos + 1);
|
||||
var conflicts = FindConflicts(pattern.Name,
|
||||
location,
|
||||
first,
|
||||
follow);
|
||||
while (conflicts.Size() > 0)
|
||||
{
|
||||
length++;
|
||||
conflicts.AddAll(previous);
|
||||
first = FindLookAhead(elem,
|
||||
length,
|
||||
new CallStack(),
|
||||
conflicts);
|
||||
follow = FindLookAhead(alt,
|
||||
length,
|
||||
pos + 1,
|
||||
new CallStack(),
|
||||
conflicts);
|
||||
first = first.CreateCombination(follow);
|
||||
elem.LookAhead = first;
|
||||
if (first.Intersects(conflicts))
|
||||
{
|
||||
first = first.CreateIntersection(conflicts);
|
||||
ThrowAmbiguityException(pattern.Name, location, first);
|
||||
}
|
||||
previous = conflicts;
|
||||
conflicts = FindConflicts(pattern.Name,
|
||||
location,
|
||||
first,
|
||||
follow);
|
||||
}
|
||||
|
||||
// Check remaining elements
|
||||
CalculateLookAhead(alt, pos + 1);
|
||||
}
|
||||
|
||||
private LookAheadSet FindLookAhead(ProductionPattern pattern,
|
||||
int length,
|
||||
CallStack stack,
|
||||
LookAheadSet filter)
|
||||
{
|
||||
// Check for infinite loop
|
||||
if (stack.Contains(pattern.Name, length))
|
||||
{
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INFINITE_LOOP,
|
||||
pattern.Name,
|
||||
(String)null);
|
||||
}
|
||||
|
||||
// Find pattern look-ahead
|
||||
stack.Push(pattern.Name, length);
|
||||
var result = new LookAheadSet(length);
|
||||
for (int i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
var temp = FindLookAhead(pattern[i],
|
||||
length,
|
||||
0,
|
||||
stack,
|
||||
filter);
|
||||
result.AddAll(temp);
|
||||
}
|
||||
stack.Pop();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private LookAheadSet FindLookAhead(ProductionPatternAlternative alt,
|
||||
int length,
|
||||
int pos,
|
||||
CallStack stack,
|
||||
LookAheadSet filter)
|
||||
{
|
||||
LookAheadSet follow;
|
||||
// Check trivial cases
|
||||
if (length <= 0 || pos >= alt.Count)
|
||||
{
|
||||
return new LookAheadSet(0);
|
||||
}
|
||||
|
||||
// Find look-ahead for this element
|
||||
var first = FindLookAhead(alt[pos], length, stack, filter);
|
||||
if (alt[pos].MinCount == 0)
|
||||
{
|
||||
first.AddEmpty();
|
||||
}
|
||||
|
||||
// Find remaining look-ahead
|
||||
if (filter == null)
|
||||
{
|
||||
length -= first.GetMinLength();
|
||||
if (length > 0)
|
||||
{
|
||||
follow = FindLookAhead(alt, length, pos + 1, stack, null);
|
||||
first = first.CreateCombination(follow);
|
||||
}
|
||||
}
|
||||
else if (filter.IsOverlap(first))
|
||||
{
|
||||
var overlaps = first.CreateOverlaps(filter);
|
||||
length -= overlaps.GetMinLength();
|
||||
filter = filter.CreateFilter(overlaps);
|
||||
follow = FindLookAhead(alt, length, pos + 1, stack, filter);
|
||||
first.RemoveAll(overlaps);
|
||||
first.AddAll(overlaps.CreateCombination(follow));
|
||||
}
|
||||
|
||||
return first;
|
||||
}
|
||||
|
||||
private LookAheadSet FindLookAhead(ProductionPatternElement elem,
|
||||
int length,
|
||||
CallStack stack,
|
||||
LookAheadSet filter)
|
||||
{
|
||||
// Find initial element look-ahead
|
||||
var first = FindLookAhead(elem, length, 0, stack, filter);
|
||||
var result = new LookAheadSet(length);
|
||||
result.AddAll(first);
|
||||
if (filter == null || !filter.IsOverlap(result))
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
// Handle element repetitions
|
||||
if (elem.MaxCount == Int32.MaxValue)
|
||||
{
|
||||
first = first.CreateRepetitive();
|
||||
}
|
||||
var max = elem.MaxCount;
|
||||
if (length < max)
|
||||
{
|
||||
max = length;
|
||||
}
|
||||
for (int i = 1; i < max; i++)
|
||||
{
|
||||
first = first.CreateOverlaps(filter);
|
||||
if (first.Size() <= 0 || first.GetMinLength() >= length)
|
||||
{
|
||||
break;
|
||||
}
|
||||
var follow = FindLookAhead(elem,
|
||||
length,
|
||||
0,
|
||||
stack,
|
||||
filter.CreateFilter(first));
|
||||
first = first.CreateCombination(follow);
|
||||
result.AddAll(first);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private LookAheadSet FindLookAhead(ProductionPatternElement elem,
|
||||
int length,
|
||||
int dummy,
|
||||
CallStack stack,
|
||||
LookAheadSet filter)
|
||||
{
|
||||
LookAheadSet result;
|
||||
|
||||
if (elem.IsToken())
|
||||
{
|
||||
result = new LookAheadSet(length);
|
||||
result.Add(elem.Id);
|
||||
}
|
||||
else
|
||||
{
|
||||
var pattern = GetPattern(elem.Id);
|
||||
result = FindLookAhead(pattern, length, stack, filter);
|
||||
if (stack.Contains(pattern.Name))
|
||||
{
|
||||
result = result.CreateRepetitive();
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private LookAheadSet FindConflicts(ProductionPattern pattern,
|
||||
int maxLength)
|
||||
{
|
||||
|
||||
LookAheadSet result = new LookAheadSet(maxLength);
|
||||
for (int i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
var set1 = pattern[i].LookAhead;
|
||||
for (int j = 0; j < i; j++)
|
||||
{
|
||||
var set2 = pattern[j].LookAhead;
|
||||
result.AddAll(set1.CreateIntersection(set2));
|
||||
}
|
||||
}
|
||||
if (result.IsRepetitive())
|
||||
{
|
||||
ThrowAmbiguityException(pattern.Name, null, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private LookAheadSet FindConflicts(string pattern,
|
||||
string location,
|
||||
LookAheadSet set1,
|
||||
LookAheadSet set2)
|
||||
{
|
||||
var result = set1.CreateIntersection(set2);
|
||||
if (result.IsRepetitive())
|
||||
{
|
||||
ThrowAmbiguityException(pattern, location, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private LookAheadSet FindUnion(ProductionPattern pattern)
|
||||
{
|
||||
LookAheadSet result;
|
||||
int length = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
result = pattern[i].LookAhead;
|
||||
if (result.GetMaxLength() > length)
|
||||
{
|
||||
length = result.GetMaxLength();
|
||||
}
|
||||
}
|
||||
result = new LookAheadSet(length);
|
||||
for (i = 0; i < pattern.Count; i++)
|
||||
{
|
||||
result.AddAll(pattern[i].LookAhead);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
private void ThrowParseException(LookAheadSet set)
|
||||
{
|
||||
ArrayList list = new ArrayList();
|
||||
|
||||
// Read tokens until mismatch
|
||||
while (set.IsNext(this, 1))
|
||||
{
|
||||
set = set.CreateNextSet(NextToken().Id);
|
||||
}
|
||||
|
||||
// Find next token descriptions
|
||||
var initials = set.GetInitialTokens();
|
||||
for (int i = 0; i < initials.Length; i++)
|
||||
{
|
||||
list.Add(GetTokenDescription(initials[i]));
|
||||
}
|
||||
|
||||
// Create exception
|
||||
var token = NextToken();
|
||||
throw new ParseException(ParseException.ErrorType.UNEXPECTED_TOKEN,
|
||||
token.ToShortString(),
|
||||
list,
|
||||
token.StartLine,
|
||||
token.StartColumn);
|
||||
}
|
||||
|
||||
private void ThrowAmbiguityException(string pattern,
|
||||
string location,
|
||||
LookAheadSet set)
|
||||
{
|
||||
|
||||
ArrayList list = new ArrayList();
|
||||
|
||||
// Find next token descriptions
|
||||
var initials = set.GetInitialTokens();
|
||||
for (int i = 0; i < initials.Length; i++)
|
||||
{
|
||||
list.Add(GetTokenDescription(initials[i]));
|
||||
}
|
||||
|
||||
// Create exception
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INHERENT_AMBIGUITY,
|
||||
pattern,
|
||||
location,
|
||||
list);
|
||||
}
|
||||
|
||||
|
||||
private class CallStack
|
||||
{
|
||||
private readonly ArrayList _nameStack = new ArrayList();
|
||||
private readonly ArrayList _valueStack = new ArrayList();
|
||||
public bool Contains(string name)
|
||||
{
|
||||
return _nameStack.Contains(name);
|
||||
}
|
||||
|
||||
public bool Contains(string name, int value)
|
||||
{
|
||||
for (int i = 0; i < _nameStack.Count; i++)
|
||||
{
|
||||
if (_nameStack[i].Equals(name)
|
||||
&& _valueStack[i].Equals(value))
|
||||
{
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public void Clear()
|
||||
{
|
||||
_nameStack.Clear();
|
||||
_valueStack.Clear();
|
||||
}
|
||||
|
||||
public void Push(string name, int value)
|
||||
{
|
||||
_nameStack.Add(name);
|
||||
_valueStack.Add(value);
|
||||
}
|
||||
|
||||
public void Pop()
|
||||
{
|
||||
if (_nameStack.Count > 0)
|
||||
{
|
||||
_nameStack.RemoveAt(_nameStack.Count - 1);
|
||||
_valueStack.RemoveAt(_valueStack.Count - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
64
Parsing/StringElement.cs
Normal file
64
Parsing/StringElement.cs
Normal file
@@ -0,0 +1,64 @@
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A regular expression string element. This element only matches
|
||||
* an exact string. Once created, the string element is immutable.
|
||||
*/
|
||||
internal class StringElement : Element
|
||||
{
|
||||
private readonly string _value;
|
||||
public StringElement(char c)
|
||||
: this(c.ToString())
|
||||
{
|
||||
}
|
||||
|
||||
public StringElement(string str)
|
||||
{
|
||||
_value = str;
|
||||
}
|
||||
|
||||
public string GetString()
|
||||
{
|
||||
return _value;
|
||||
}
|
||||
|
||||
public override object Clone()
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
public override int Match(Matcher m,
|
||||
ReaderBuffer buffer,
|
||||
int start,
|
||||
int skip)
|
||||
{
|
||||
if (skip != 0)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
for (int i = 0; i < _value.Length; i++)
|
||||
{
|
||||
var c = buffer.Peek(start + i);
|
||||
if (c < 0)
|
||||
{
|
||||
m.SetReadEndOfString();
|
||||
return -1;
|
||||
}
|
||||
if (m.IsCaseInsensitive())
|
||||
{
|
||||
c = (int)Char.ToLower((char)c);
|
||||
}
|
||||
if (c != (int)_value[i])
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return _value.Length;
|
||||
}
|
||||
|
||||
public override void PrintTo(TextWriter output, string indent)
|
||||
{
|
||||
output.WriteLine(indent + "'" + _value + "'");
|
||||
}
|
||||
}
|
||||
}
|
168
Parsing/Token.cs
Normal file
168
Parsing/Token.cs
Normal file
@@ -0,0 +1,168 @@
|
||||
using System.Text;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A token node. This class represents a token (i.e. a set of adjacent
|
||||
* characters) in a parse tree. The tokens are created by a tokenizer,
|
||||
* that groups characters together into tokens according to a set of
|
||||
* token patterns.
|
||||
*/
|
||||
internal class Token : Node
|
||||
{
|
||||
private readonly TokenPattern _pattern;
|
||||
private readonly string _image;
|
||||
private readonly int _startLine;
|
||||
private readonly int _startColumn;
|
||||
private readonly int _endLine;
|
||||
private readonly int _endColumn;
|
||||
private Token _previous = null;
|
||||
private Token _next = null;
|
||||
|
||||
public Token(TokenPattern pattern, string image, int line, int col)
|
||||
{
|
||||
this._pattern = pattern;
|
||||
this._image = image;
|
||||
this._startLine = line;
|
||||
this._startColumn = col;
|
||||
this._endLine = line;
|
||||
this._endColumn = col + image.Length - 1;
|
||||
for (int pos = 0; image.IndexOf('\n', pos) >= 0;)
|
||||
{
|
||||
pos = image.IndexOf('\n', pos) + 1;
|
||||
this._endLine++;
|
||||
_endColumn = image.Length - pos;
|
||||
}
|
||||
}
|
||||
|
||||
public override int Id => _pattern.Id;
|
||||
|
||||
public override string Name => _pattern.Name;
|
||||
|
||||
public override int StartLine => _startLine;
|
||||
|
||||
public override int StartColumn => _startColumn;
|
||||
|
||||
public override int EndLine => _endLine;
|
||||
|
||||
public override int EndColumn => _endColumn;
|
||||
|
||||
public string Image => _image;
|
||||
|
||||
public string GetImage()
|
||||
{
|
||||
return Image;
|
||||
}
|
||||
|
||||
internal TokenPattern Pattern => _pattern;
|
||||
public Token Previous
|
||||
{
|
||||
get
|
||||
{
|
||||
return _previous;
|
||||
}
|
||||
set
|
||||
{
|
||||
if (_previous != null)
|
||||
{
|
||||
_previous._next = null;
|
||||
}
|
||||
_previous = value;
|
||||
if (_previous != null)
|
||||
{
|
||||
_previous._next = this;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Token GetPreviousToken()
|
||||
{
|
||||
return Previous;
|
||||
}
|
||||
|
||||
public Token Next
|
||||
{
|
||||
get
|
||||
{
|
||||
return _next;
|
||||
}
|
||||
set
|
||||
{
|
||||
if (_next != null)
|
||||
{
|
||||
_next._previous = null;
|
||||
}
|
||||
_next = value;
|
||||
if (_next != null)
|
||||
{
|
||||
_next._previous = this;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Token GetNextToken()
|
||||
{
|
||||
return Next;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
int newline = _image.IndexOf('\n');
|
||||
|
||||
buffer.Append(_pattern.Name);
|
||||
buffer.Append("(");
|
||||
buffer.Append(_pattern.Id);
|
||||
buffer.Append("): \"");
|
||||
if (newline >= 0)
|
||||
{
|
||||
if (newline > 0 && _image[newline - 1] == '\r')
|
||||
{
|
||||
newline--;
|
||||
}
|
||||
buffer.Append(_image.Substring(0, newline));
|
||||
buffer.Append("(...)");
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.Append(_image);
|
||||
}
|
||||
buffer.Append("\", line: ");
|
||||
buffer.Append(_startLine);
|
||||
buffer.Append(", col: ");
|
||||
buffer.Append(_startColumn);
|
||||
|
||||
return buffer.ToString();
|
||||
}
|
||||
|
||||
public string ToShortString()
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
int newline = _image.IndexOf('\n');
|
||||
|
||||
buffer.Append('"');
|
||||
if (newline >= 0)
|
||||
{
|
||||
if (newline > 0 && _image[newline - 1] == '\r')
|
||||
{
|
||||
newline--;
|
||||
}
|
||||
buffer.Append(_image.Substring(0, newline));
|
||||
buffer.Append("(...)");
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.Append(_image);
|
||||
}
|
||||
buffer.Append('"');
|
||||
if (_pattern.Type == TokenPattern.PatternType.REGEXP)
|
||||
{
|
||||
buffer.Append(" <");
|
||||
buffer.Append(_pattern.Name);
|
||||
buffer.Append(">");
|
||||
}
|
||||
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
}
|
31
Parsing/TokenMatch.cs
Normal file
31
Parsing/TokenMatch.cs
Normal file
@@ -0,0 +1,31 @@
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* The token match status. This class contains logic to ensure that
|
||||
* only the longest match is considered.
|
||||
*/
|
||||
internal class TokenMatch
|
||||
{
|
||||
private int _length = 0;
|
||||
private TokenPattern _pattern = null;
|
||||
|
||||
public void Clear()
|
||||
{
|
||||
_length = 0;
|
||||
_pattern = null;
|
||||
}
|
||||
|
||||
public int Length => _length;
|
||||
|
||||
public TokenPattern Pattern => _pattern;
|
||||
|
||||
public void Update(int length, TokenPattern pattern)
|
||||
{
|
||||
if (this._length < length)
|
||||
{
|
||||
this._length = length;
|
||||
this._pattern = pattern;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
825
Parsing/TokenNFA.cs
Normal file
825
Parsing/TokenNFA.cs
Normal file
@@ -0,0 +1,825 @@
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A non-deterministic finite state automaton (NFA) for matching
|
||||
* tokens. It supports both fixed strings and simple regular
|
||||
* expressions, but should perform similar to a DFA due to highly
|
||||
* optimized data structures and tuning. The memory footprint during
|
||||
* matching should be near zero, since no heap memory is allocated
|
||||
* unless the pre-allocated queues need to be enlarged. The NFA also
|
||||
* does not use recursion, but iterates in a loop instead.
|
||||
*/
|
||||
internal class TokenNFA
|
||||
{
|
||||
private readonly NFAState[] _initialChar = new NFAState[128];
|
||||
private readonly NFAState _initial = new NFAState();
|
||||
private readonly NFAStateQueue _queue = new NFAStateQueue();
|
||||
|
||||
public void AddTextMatch(string str, bool ignoreCase, TokenPattern value)
|
||||
{
|
||||
NFAState state;
|
||||
char ch = str[0];
|
||||
|
||||
if (ch < 128 && !ignoreCase)
|
||||
{
|
||||
state = _initialChar[ch];
|
||||
if (state == null)
|
||||
{
|
||||
state = _initialChar[ch] = new NFAState();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
state = _initial.AddOut(ch, ignoreCase, null);
|
||||
}
|
||||
for (int i = 1; i < str.Length; i++)
|
||||
{
|
||||
state = state.AddOut(str[i], ignoreCase, null);
|
||||
}
|
||||
state.Value = value;
|
||||
}
|
||||
|
||||
public void AddRegExpMatch(string pattern,
|
||||
bool ignoreCase,
|
||||
TokenPattern value)
|
||||
{
|
||||
TokenRegExpParser parser = new TokenRegExpParser(pattern, ignoreCase);
|
||||
string debug = "DFA regexp; " + parser.GetDebugInfo();
|
||||
|
||||
var isAscii = parser.Start.IsAsciiOutgoing();
|
||||
for (int i = 0; isAscii && i < 128; i++)
|
||||
{
|
||||
bool match = false;
|
||||
for (int j = 0; j < parser.Start.Outgoing.Length; j++)
|
||||
{
|
||||
if (parser.Start.Outgoing[j].Match((char)i))
|
||||
{
|
||||
if (match)
|
||||
{
|
||||
isAscii = false;
|
||||
break;
|
||||
}
|
||||
match = true;
|
||||
}
|
||||
}
|
||||
if (match && _initialChar[i] != null)
|
||||
{
|
||||
isAscii = false;
|
||||
}
|
||||
}
|
||||
if (parser.Start.Incoming.Length > 0)
|
||||
{
|
||||
_initial.AddOut(new NFAEpsilonTransition(parser.Start));
|
||||
debug += ", uses initial epsilon";
|
||||
}
|
||||
else if (isAscii && !ignoreCase)
|
||||
{
|
||||
for (int i = 0; isAscii && i < 128; i++)
|
||||
{
|
||||
for (int j = 0; j < parser.Start.Outgoing.Length; j++)
|
||||
{
|
||||
if (parser.Start.Outgoing[j].Match((char)i))
|
||||
{
|
||||
_initialChar[i] = parser.Start.Outgoing[j].State;
|
||||
}
|
||||
}
|
||||
}
|
||||
debug += ", uses ASCII lookup";
|
||||
}
|
||||
else
|
||||
{
|
||||
parser.Start.MergeInto(_initial);
|
||||
debug += ", uses initial state";
|
||||
}
|
||||
parser.End.Value = value;
|
||||
value.DebugInfo = debug;
|
||||
}
|
||||
|
||||
public int Match(ReaderBuffer buffer, TokenMatch match)
|
||||
{
|
||||
int length = 0;
|
||||
int pos = 1;
|
||||
NFAState state;
|
||||
|
||||
// The first step of the match loop has been unrolled and
|
||||
// optimized for performance below.
|
||||
this._queue.Clear();
|
||||
var peekChar = buffer.Peek(0);
|
||||
if (0 <= peekChar && peekChar < 128)
|
||||
{
|
||||
state = this._initialChar[peekChar];
|
||||
if (state != null)
|
||||
{
|
||||
this._queue.AddLast(state);
|
||||
}
|
||||
}
|
||||
if (peekChar >= 0)
|
||||
{
|
||||
this._initial.MatchTransitions((char)peekChar, this._queue, true);
|
||||
}
|
||||
this._queue.MarkEnd();
|
||||
peekChar = buffer.Peek(1);
|
||||
|
||||
// The remaining match loop processes all subsequent states
|
||||
while (!this._queue.Empty)
|
||||
{
|
||||
if (this._queue.Marked)
|
||||
{
|
||||
pos++;
|
||||
peekChar = buffer.Peek(pos);
|
||||
this._queue.MarkEnd();
|
||||
}
|
||||
state = this._queue.RemoveFirst();
|
||||
if (state.Value != null)
|
||||
{
|
||||
match.Update(pos, state.Value);
|
||||
}
|
||||
if (peekChar >= 0)
|
||||
{
|
||||
state.MatchTransitions((char)peekChar, this._queue, false);
|
||||
}
|
||||
}
|
||||
return length;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* An NFA state. The NFA consists of a series of states, each
|
||||
* having zero or more transitions to other states.
|
||||
*/
|
||||
internal class NFAState
|
||||
{
|
||||
internal TokenPattern Value = null;
|
||||
internal NFATransition[] Incoming = new NFATransition[0];
|
||||
internal NFATransition[] Outgoing = new NFATransition[0];
|
||||
internal bool EpsilonOut = false;
|
||||
|
||||
public bool HasTransitions()
|
||||
{
|
||||
return Incoming.Length > 0 || Outgoing.Length > 0;
|
||||
}
|
||||
public bool IsAsciiOutgoing()
|
||||
{
|
||||
for (int i = 0; i < Outgoing.Length; i++)
|
||||
{
|
||||
if (!Outgoing[i].IsAscii())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public void AddIn(NFATransition trans)
|
||||
{
|
||||
Array.Resize(ref Incoming, Incoming.Length + 1);
|
||||
Incoming[Incoming.Length - 1] = trans;
|
||||
}
|
||||
|
||||
public NFAState AddOut(char ch, bool ignoreCase, NFAState state)
|
||||
{
|
||||
if (ignoreCase)
|
||||
{
|
||||
if (state == null)
|
||||
{
|
||||
state = new NFAState();
|
||||
}
|
||||
AddOut(new NFACharTransition(Char.ToLower(ch), state));
|
||||
AddOut(new NFACharTransition(Char.ToUpper(ch), state));
|
||||
return state;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (state == null)
|
||||
{
|
||||
state = FindUniqueCharTransition(ch);
|
||||
if (state != null)
|
||||
{
|
||||
return state;
|
||||
}
|
||||
state = new NFAState();
|
||||
}
|
||||
return AddOut(new NFACharTransition(ch, state));
|
||||
}
|
||||
}
|
||||
|
||||
public NFAState AddOut(NFATransition trans)
|
||||
{
|
||||
Array.Resize(ref Outgoing, Outgoing.Length + 1);
|
||||
Outgoing[Outgoing.Length - 1] = trans;
|
||||
if (trans is NFAEpsilonTransition)
|
||||
{
|
||||
EpsilonOut = true;
|
||||
}
|
||||
return trans.State;
|
||||
}
|
||||
|
||||
public void MergeInto(NFAState state)
|
||||
{
|
||||
for (int i = 0; i < Incoming.Length; i++)
|
||||
{
|
||||
state.AddIn(Incoming[i]);
|
||||
Incoming[i].State = state;
|
||||
}
|
||||
Incoming = null;
|
||||
for (int i = 0; i < Outgoing.Length; i++)
|
||||
{
|
||||
state.AddOut(Outgoing[i]);
|
||||
}
|
||||
Outgoing = null;
|
||||
}
|
||||
|
||||
private NFAState FindUniqueCharTransition(char ch)
|
||||
{
|
||||
NFATransition res = null;
|
||||
NFATransition trans;
|
||||
|
||||
for (int i = 0; i < Outgoing.Length; i++)
|
||||
{
|
||||
trans = Outgoing[i];
|
||||
if (trans.Match(ch) && trans is NFACharTransition)
|
||||
{
|
||||
if (res != null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
res = trans;
|
||||
}
|
||||
}
|
||||
for (int i = 0; res != null && i < Outgoing.Length; i++)
|
||||
{
|
||||
trans = Outgoing[i];
|
||||
if (trans != res && trans.State == res.State)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return res?.State;
|
||||
}
|
||||
|
||||
public void MatchTransitions(char ch, NFAStateQueue queue, bool initial)
|
||||
{
|
||||
for (int i = 0; i < Outgoing.Length; i++)
|
||||
{
|
||||
var trans = Outgoing[i];
|
||||
var target = trans.State;
|
||||
if (initial && trans is NFAEpsilonTransition)
|
||||
{
|
||||
target.MatchTransitions(ch, queue, true);
|
||||
}
|
||||
else if (trans.Match(ch))
|
||||
{
|
||||
queue.AddLast(target);
|
||||
if (target.EpsilonOut)
|
||||
{
|
||||
target.MatchEmpty(queue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void MatchEmpty(NFAStateQueue queue)
|
||||
{
|
||||
for (int i = 0; i < Outgoing.Length; i++)
|
||||
{
|
||||
var trans = Outgoing[i];
|
||||
if (trans is NFAEpsilonTransition)
|
||||
{
|
||||
var target = trans.State;
|
||||
queue.AddLast(target);
|
||||
if (target.EpsilonOut)
|
||||
{
|
||||
target.MatchEmpty(queue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* An NFA state transition. A transition checks a single
|
||||
* character of input an determines if it is a match. If a match
|
||||
* is encountered, the NFA should move forward to the transition
|
||||
* state.
|
||||
*/
|
||||
internal abstract class NFATransition
|
||||
{
|
||||
|
||||
internal NFAState State;
|
||||
|
||||
protected NFATransition(NFAState state)
|
||||
{
|
||||
this.State = state;
|
||||
this.State.AddIn(this);
|
||||
}
|
||||
|
||||
public abstract bool IsAscii();
|
||||
|
||||
public abstract bool Match(char ch);
|
||||
|
||||
public abstract NFATransition Copy(NFAState state);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The special epsilon transition. This transition matches the
|
||||
* empty input, i.e. it is an automatic transition that doesn't
|
||||
* read any input. As such, it returns false in the match method
|
||||
* and is handled specially everywhere.
|
||||
*/
|
||||
internal class NFAEpsilonTransition : NFATransition
|
||||
{
|
||||
public NFAEpsilonTransition(NFAState state) : base(state)
|
||||
{
|
||||
}
|
||||
|
||||
public override bool IsAscii()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
public override bool Match(char ch)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
public override NFATransition Copy(NFAState state)
|
||||
{
|
||||
return new NFAEpsilonTransition(state);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A single character match transition.
|
||||
*/
|
||||
internal class NFACharTransition : NFATransition
|
||||
{
|
||||
private readonly char _match;
|
||||
|
||||
public NFACharTransition(char match, NFAState state) : base(state)
|
||||
{
|
||||
_match = match;
|
||||
}
|
||||
|
||||
public override bool IsAscii()
|
||||
{
|
||||
return 0 <= _match && _match < 128;
|
||||
}
|
||||
|
||||
public override bool Match(char ch)
|
||||
{
|
||||
return this._match == ch;
|
||||
}
|
||||
|
||||
public override NFATransition Copy(NFAState state)
|
||||
{
|
||||
return new NFACharTransition(_match, state);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A character range match transition. Used for user-defined
|
||||
* character sets in regular expressions.
|
||||
*/
|
||||
internal class NFACharRangeTransition : NFATransition
|
||||
{
|
||||
|
||||
protected bool Inverse;
|
||||
protected bool IgnoreCase;
|
||||
|
||||
private object[] _contents = new object[0];
|
||||
|
||||
public NFACharRangeTransition(bool inverse,
|
||||
bool ignoreCase,
|
||||
NFAState state) : base(state)
|
||||
{
|
||||
this.Inverse = inverse;
|
||||
this.IgnoreCase = ignoreCase;
|
||||
}
|
||||
|
||||
public override bool IsAscii()
|
||||
{
|
||||
if (Inverse)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < _contents.Length; i++)
|
||||
{
|
||||
var obj = _contents[i];
|
||||
if (obj is char)
|
||||
{
|
||||
var c = (char)obj;
|
||||
if (c < 0 || 128 <= c)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (obj is Range)
|
||||
{
|
||||
if (!((Range)obj).IsAscii())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public void AddCharacter(char c)
|
||||
{
|
||||
if (IgnoreCase)
|
||||
{
|
||||
c = Char.ToLower(c);
|
||||
}
|
||||
AddContent(c);
|
||||
}
|
||||
|
||||
public void AddRange(char min, char max)
|
||||
{
|
||||
if (IgnoreCase)
|
||||
{
|
||||
min = Char.ToLower(min);
|
||||
max = Char.ToLower(max);
|
||||
}
|
||||
AddContent(new Range(min, max));
|
||||
}
|
||||
|
||||
private void AddContent(Object obj)
|
||||
{
|
||||
Array.Resize(ref _contents, _contents.Length + 1);
|
||||
_contents[_contents.Length - 1] = obj;
|
||||
}
|
||||
|
||||
public override bool Match(char ch)
|
||||
{
|
||||
object obj;
|
||||
char c;
|
||||
Range r;
|
||||
|
||||
if (IgnoreCase)
|
||||
{
|
||||
ch = Char.ToLower(ch);
|
||||
}
|
||||
for (int i = 0; i < _contents.Length; i++)
|
||||
{
|
||||
obj = _contents[i];
|
||||
if (obj is char)
|
||||
{
|
||||
c = (char)obj;
|
||||
if (c == ch)
|
||||
{
|
||||
return !Inverse;
|
||||
}
|
||||
}
|
||||
else if (obj is Range)
|
||||
{
|
||||
r = (Range)obj;
|
||||
if (r.Inside(ch))
|
||||
{
|
||||
return !Inverse;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Inverse;
|
||||
}
|
||||
|
||||
public override NFATransition Copy(NFAState state)
|
||||
{
|
||||
var copy = new NFACharRangeTransition(Inverse, IgnoreCase, state) { _contents = _contents };
|
||||
return copy;
|
||||
}
|
||||
|
||||
private class Range
|
||||
{
|
||||
private readonly char _min;
|
||||
private readonly char _max;
|
||||
|
||||
public Range(char min, char max)
|
||||
{
|
||||
this._min = min;
|
||||
this._max = max;
|
||||
}
|
||||
|
||||
public bool IsAscii()
|
||||
{
|
||||
return 0 <= _min && _min < 128 &&
|
||||
0 <= _max && _max < 128;
|
||||
}
|
||||
|
||||
public bool Inside(char c)
|
||||
{
|
||||
return _min <= c && c <= _max;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The dot ('.') character set transition. This transition
|
||||
* matches a single character that is not equal to a newline
|
||||
* character.
|
||||
*/
|
||||
internal class NFADotTransition : NFATransition
|
||||
{
|
||||
public NFADotTransition(NFAState state) : base(state)
|
||||
{
|
||||
}
|
||||
|
||||
public override bool IsAscii()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
public override bool Match(char ch)
|
||||
{
|
||||
switch (ch)
|
||||
{
|
||||
case '\n':
|
||||
case '\r':
|
||||
case '\u0085':
|
||||
case '\u2028':
|
||||
case '\u2029':
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
public override NFATransition Copy(NFAState state)
|
||||
{
|
||||
return new NFADotTransition(state);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The digit character set transition. This transition matches a
|
||||
* single numeric character.
|
||||
*/
|
||||
internal class NFADigitTransition : NFATransition
|
||||
{
|
||||
public NFADigitTransition(NFAState state) : base(state)
|
||||
{
|
||||
}
|
||||
|
||||
public override bool IsAscii()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
public override bool Match(char ch)
|
||||
{
|
||||
return '0' <= ch && ch <= '9';
|
||||
}
|
||||
|
||||
public override NFATransition Copy(NFAState state)
|
||||
{
|
||||
return new NFADigitTransition(state);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The non-digit character set transition. This transition
|
||||
* matches a single non-numeric character.
|
||||
*/
|
||||
internal class NFANonDigitTransition : NFATransition
|
||||
{
|
||||
public NFANonDigitTransition(NFAState state) : base(state)
|
||||
{
|
||||
}
|
||||
|
||||
public override bool IsAscii()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
public override bool Match(char ch)
|
||||
{
|
||||
return ch < '0' || '9' < ch;
|
||||
}
|
||||
|
||||
public override NFATransition Copy(NFAState state)
|
||||
{
|
||||
return new NFANonDigitTransition(state);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The whitespace character set transition. This transition
|
||||
* matches a single whitespace character.
|
||||
*/
|
||||
internal class NFAWhitespaceTransition : NFATransition
|
||||
{
|
||||
public NFAWhitespaceTransition(NFAState state) : base(state)
|
||||
{
|
||||
}
|
||||
|
||||
public override bool IsAscii()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
public override bool Match(char ch)
|
||||
{
|
||||
switch (ch)
|
||||
{
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\n':
|
||||
case '\f':
|
||||
case '\r':
|
||||
case (char)11:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public override NFATransition Copy(NFAState state)
|
||||
{
|
||||
return new NFAWhitespaceTransition(state);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The non-whitespace character set transition. This transition
|
||||
* matches a single non-whitespace character.
|
||||
*/
|
||||
internal class NFANonWhitespaceTransition : NFATransition
|
||||
{
|
||||
|
||||
public NFANonWhitespaceTransition(NFAState state) : base(state)
|
||||
{
|
||||
}
|
||||
|
||||
public override bool IsAscii()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
public override bool Match(char ch)
|
||||
{
|
||||
switch (ch)
|
||||
{
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\n':
|
||||
case '\f':
|
||||
case '\r':
|
||||
case (char)11:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
public override NFATransition Copy(NFAState state)
|
||||
{
|
||||
return new NFANonWhitespaceTransition(state);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The word character set transition. This transition matches a
|
||||
* single word character.
|
||||
*/
|
||||
internal class NFAWordTransition : NFATransition
|
||||
{
|
||||
|
||||
public NFAWordTransition(NFAState state) : base(state)
|
||||
{
|
||||
}
|
||||
|
||||
public override bool IsAscii()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public override bool Match(char ch)
|
||||
{
|
||||
return ('a' <= ch && ch <= 'z')
|
||||
|| ('A' <= ch && ch <= 'Z')
|
||||
|| ('0' <= ch && ch <= '9')
|
||||
|| ch == '_';
|
||||
}
|
||||
|
||||
public override NFATransition Copy(NFAState state)
|
||||
{
|
||||
return new NFAWordTransition(state);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The non-word character set transition. This transition matches
|
||||
* a single non-word character.
|
||||
*/
|
||||
internal class NFANonWordTransition : NFATransition
|
||||
{
|
||||
public NFANonWordTransition(NFAState state) : base(state)
|
||||
{
|
||||
}
|
||||
|
||||
public override bool IsAscii()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
public override bool Match(char ch)
|
||||
{
|
||||
bool word = ('a' <= ch && ch <= 'z')
|
||||
|| ('A' <= ch && ch <= 'Z')
|
||||
|| ('0' <= ch && ch <= '9')
|
||||
|| ch == '_';
|
||||
return !word;
|
||||
}
|
||||
|
||||
public override NFATransition Copy(NFAState state)
|
||||
{
|
||||
return new NFANonWordTransition(state);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* An NFA state queue. This queue is used during processing to
|
||||
* keep track of the current and subsequent NFA states. The
|
||||
* current state is read from the beginning of the queue, and new
|
||||
* states are added at the end. A marker index is used to
|
||||
* separate the current from the subsequent states.<p>
|
||||
*
|
||||
* The queue implementation is optimized for quick removal at the
|
||||
* beginning and addition at the end. It will attempt to use a
|
||||
* fixed-size array to store the whole queue, and moves the data
|
||||
* in this array only when absolutely needed. The array is also
|
||||
* enlarged automatically if too many states are being processed
|
||||
* at a single time.
|
||||
*/
|
||||
internal class NFAStateQueue
|
||||
{
|
||||
|
||||
private NFAState[] _queue = new NFAState[2048];
|
||||
|
||||
private int _first = 0;
|
||||
|
||||
private int _last = 0;
|
||||
|
||||
private int _mark = 0;
|
||||
|
||||
public bool Empty => (_last <= _first);
|
||||
|
||||
public bool Marked => _first == _mark;
|
||||
|
||||
public void Clear()
|
||||
{
|
||||
_first = 0;
|
||||
_last = 0;
|
||||
_mark = 0;
|
||||
}
|
||||
|
||||
public void MarkEnd()
|
||||
{
|
||||
_mark = _last;
|
||||
}
|
||||
|
||||
public NFAState RemoveFirst()
|
||||
{
|
||||
if (_first < _last)
|
||||
{
|
||||
_first++;
|
||||
return _queue[_first - 1];
|
||||
}
|
||||
else
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public void AddLast(NFAState state)
|
||||
{
|
||||
if (_last >= _queue.Length)
|
||||
{
|
||||
if (_first <= 0)
|
||||
{
|
||||
Array.Resize(ref _queue, _queue.Length * 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
Array.Copy(_queue, _first, _queue, 0, _last - _first);
|
||||
_last -= _first;
|
||||
_mark -= _first;
|
||||
_first = 0;
|
||||
}
|
||||
}
|
||||
_queue[_last++] = state;
|
||||
}
|
||||
}
|
||||
}
|
303
Parsing/TokenPattern.cs
Normal file
303
Parsing/TokenPattern.cs
Normal file
@@ -0,0 +1,303 @@
|
||||
using System.Text;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A token pattern. This class contains the definition of a token
|
||||
* (i.e. it's pattern), and allows testing a string against this
|
||||
* pattern. A token pattern is uniquely identified by an integer id,
|
||||
* that must be provided upon creation.
|
||||
*
|
||||
|
||||
*/
|
||||
internal class TokenPattern
|
||||
{
|
||||
public enum PatternType
|
||||
{
|
||||
|
||||
/**
|
||||
* The string pattern type is used for tokens that only
|
||||
* match an exact string.
|
||||
*/
|
||||
STRING,
|
||||
|
||||
/**
|
||||
* The regular expression pattern type is used for tokens
|
||||
* that match a regular expression.
|
||||
*/
|
||||
REGEXP
|
||||
}
|
||||
|
||||
private int _id;
|
||||
private string _name;
|
||||
private PatternType _type;
|
||||
private string _pattern;
|
||||
private bool _error;
|
||||
private string _errorMessage;
|
||||
private bool _ignore;
|
||||
private string _ignoreMessage;
|
||||
private string _debugInfo;
|
||||
|
||||
public TokenPattern(int id,
|
||||
string name,
|
||||
PatternType type,
|
||||
string pattern)
|
||||
{
|
||||
|
||||
this._id = id;
|
||||
this._name = name;
|
||||
this._type = type;
|
||||
this._pattern = pattern;
|
||||
}
|
||||
|
||||
public int Id
|
||||
{
|
||||
get
|
||||
{
|
||||
return _id;
|
||||
}
|
||||
set { _id = value; }
|
||||
}
|
||||
|
||||
public int GetId()
|
||||
{
|
||||
return _id;
|
||||
}
|
||||
|
||||
public string Name
|
||||
{
|
||||
get
|
||||
{
|
||||
return _name;
|
||||
}
|
||||
set { _name = value; }
|
||||
}
|
||||
|
||||
public string GetName()
|
||||
{
|
||||
return _name;
|
||||
}
|
||||
|
||||
public PatternType Type
|
||||
{
|
||||
get
|
||||
{
|
||||
return _type;
|
||||
}
|
||||
set { _type = value; }
|
||||
}
|
||||
|
||||
public PatternType GetPatternType()
|
||||
{
|
||||
return _type;
|
||||
}
|
||||
|
||||
public string Pattern
|
||||
{
|
||||
get
|
||||
{
|
||||
return _pattern;
|
||||
}
|
||||
set { _pattern = value; }
|
||||
}
|
||||
|
||||
public string GetPattern()
|
||||
{
|
||||
return _pattern;
|
||||
}
|
||||
|
||||
public bool Error
|
||||
{
|
||||
get
|
||||
{
|
||||
return _error;
|
||||
}
|
||||
set
|
||||
{
|
||||
_error = value;
|
||||
if (_error && _errorMessage == null)
|
||||
{
|
||||
_errorMessage = "unrecognized token found";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public string ErrorMessage
|
||||
{
|
||||
get
|
||||
{
|
||||
return _errorMessage;
|
||||
}
|
||||
set
|
||||
{
|
||||
_error = true;
|
||||
_errorMessage = value;
|
||||
}
|
||||
}
|
||||
|
||||
public bool IsError()
|
||||
{
|
||||
return Error;
|
||||
}
|
||||
|
||||
public string GetErrorMessage()
|
||||
{
|
||||
return ErrorMessage;
|
||||
}
|
||||
|
||||
public void SetError()
|
||||
{
|
||||
Error = true;
|
||||
}
|
||||
|
||||
public void SetError(string message)
|
||||
{
|
||||
ErrorMessage = message;
|
||||
}
|
||||
|
||||
public bool Ignore
|
||||
{
|
||||
get
|
||||
{
|
||||
return _ignore;
|
||||
}
|
||||
set
|
||||
{
|
||||
_ignore = value;
|
||||
}
|
||||
}
|
||||
|
||||
public string IgnoreMessage
|
||||
{
|
||||
get
|
||||
{
|
||||
return _ignoreMessage;
|
||||
}
|
||||
set
|
||||
{
|
||||
_ignore = true;
|
||||
_ignoreMessage = value;
|
||||
}
|
||||
}
|
||||
|
||||
public bool IsIgnore()
|
||||
{
|
||||
return Ignore;
|
||||
}
|
||||
|
||||
public string GetIgnoreMessage()
|
||||
{
|
||||
return IgnoreMessage;
|
||||
}
|
||||
|
||||
|
||||
public void SetIgnore()
|
||||
{
|
||||
Ignore = true;
|
||||
}
|
||||
|
||||
|
||||
public void SetIgnore(string message)
|
||||
{
|
||||
IgnoreMessage = message;
|
||||
}
|
||||
|
||||
public string DebugInfo
|
||||
{
|
||||
get
|
||||
{
|
||||
return _debugInfo;
|
||||
}
|
||||
set
|
||||
{
|
||||
_debugInfo = value;
|
||||
}
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
buffer.Append(_name);
|
||||
buffer.Append(" (");
|
||||
buffer.Append(_id);
|
||||
buffer.Append("): ");
|
||||
switch (_type)
|
||||
{
|
||||
case PatternType.STRING:
|
||||
buffer.Append("\"");
|
||||
buffer.Append(_pattern);
|
||||
buffer.Append("\"");
|
||||
break;
|
||||
case PatternType.REGEXP:
|
||||
buffer.Append("<<");
|
||||
buffer.Append(_pattern);
|
||||
buffer.Append(">>");
|
||||
break;
|
||||
}
|
||||
if (_error)
|
||||
{
|
||||
buffer.Append(" ERROR: \"");
|
||||
buffer.Append(_errorMessage);
|
||||
buffer.Append("\"");
|
||||
}
|
||||
if (_ignore)
|
||||
{
|
||||
buffer.Append(" IGNORE");
|
||||
if (_ignoreMessage != null)
|
||||
{
|
||||
buffer.Append(": \"");
|
||||
buffer.Append(_ignoreMessage);
|
||||
buffer.Append("\"");
|
||||
}
|
||||
}
|
||||
if (_debugInfo != null)
|
||||
{
|
||||
buffer.Append("\n ");
|
||||
buffer.Append(_debugInfo);
|
||||
}
|
||||
return buffer.ToString();
|
||||
}
|
||||
|
||||
public string ToShortString()
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
int newline = _pattern.IndexOf('\n');
|
||||
|
||||
if (_type == PatternType.STRING)
|
||||
{
|
||||
buffer.Append("\"");
|
||||
if (newline >= 0)
|
||||
{
|
||||
if (newline > 0 && _pattern[newline - 1] == '\r')
|
||||
{
|
||||
newline--;
|
||||
}
|
||||
buffer.Append(_pattern.Substring(0, newline));
|
||||
buffer.Append("(...)");
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.Append(_pattern);
|
||||
}
|
||||
buffer.Append("\"");
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.Append("<");
|
||||
buffer.Append(_name);
|
||||
buffer.Append(">");
|
||||
}
|
||||
|
||||
return buffer.ToString();
|
||||
}
|
||||
|
||||
public void SetData(int id, string name, PatternType type, string pattern)
|
||||
{
|
||||
Id = id;
|
||||
Name = name;
|
||||
Type = type;
|
||||
Pattern = pattern;
|
||||
}
|
||||
}
|
||||
}
|
545
Parsing/TokenRegExpParser.cs
Normal file
545
Parsing/TokenRegExpParser.cs
Normal file
@@ -0,0 +1,545 @@
|
||||
using System.Collections;
|
||||
using System.Globalization;
|
||||
using System.Text;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A regular expression parser. The parser creates an NFA for the
|
||||
* regular expression having a single start and acceptance states.
|
||||
*/
|
||||
internal class TokenRegExpParser
|
||||
{
|
||||
private readonly string _pattern;
|
||||
private readonly bool _ignoreCase;
|
||||
private int _pos;
|
||||
internal NFAState Start = new NFAState();
|
||||
internal NFAState End;
|
||||
private int _stateCount;
|
||||
private int _transitionCount;
|
||||
private int _epsilonCount;
|
||||
|
||||
public TokenRegExpParser(string pattern) : this(pattern, false)
|
||||
{
|
||||
}
|
||||
|
||||
public TokenRegExpParser(string pattern, bool ignoreCase)
|
||||
{
|
||||
this._pattern = pattern;
|
||||
this._ignoreCase = ignoreCase;
|
||||
this._pos = 0;
|
||||
this.End = ParseExpr(Start);
|
||||
if (_pos < pattern.Length)
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
|
||||
_pos,
|
||||
pattern);
|
||||
}
|
||||
}
|
||||
|
||||
public string GetDebugInfo()
|
||||
{
|
||||
if (_stateCount == 0)
|
||||
{
|
||||
UpdateStats(Start, new Hashtable());
|
||||
}
|
||||
return _stateCount + " states, " +
|
||||
_transitionCount + " transitions, " +
|
||||
_epsilonCount + " epsilons";
|
||||
}
|
||||
|
||||
private void UpdateStats(NFAState state, Hashtable visited)
|
||||
{
|
||||
if (!visited.ContainsKey(state))
|
||||
{
|
||||
visited.Add(state, state);
|
||||
_stateCount++;
|
||||
for (int i = 0; i < state.Outgoing.Length; i++)
|
||||
{
|
||||
_transitionCount++;
|
||||
if (state.Outgoing[i] is NFAEpsilonTransition)
|
||||
{
|
||||
_epsilonCount++;
|
||||
}
|
||||
UpdateStats(state.Outgoing[i].State, visited);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private NFAState ParseExpr(NFAState start)
|
||||
{
|
||||
NFAState end = new NFAState();
|
||||
do
|
||||
{
|
||||
if (PeekChar(0) == '|')
|
||||
{
|
||||
ReadChar('|');
|
||||
}
|
||||
var subStart = new NFAState();
|
||||
var subEnd = ParseTerm(subStart);
|
||||
if (subStart.Incoming.Length == 0)
|
||||
{
|
||||
subStart.MergeInto(start);
|
||||
}
|
||||
else
|
||||
{
|
||||
start.AddOut(new NFAEpsilonTransition(subStart));
|
||||
}
|
||||
if (subEnd.Outgoing.Length == 0 ||
|
||||
(!end.HasTransitions() && PeekChar(0) != '|'))
|
||||
{
|
||||
subEnd.MergeInto(end);
|
||||
}
|
||||
else
|
||||
{
|
||||
subEnd.AddOut(new NFAEpsilonTransition(end));
|
||||
}
|
||||
} while (PeekChar(0) == '|');
|
||||
return end;
|
||||
}
|
||||
|
||||
private NFAState ParseTerm(NFAState start)
|
||||
{
|
||||
var end = ParseFact(start);
|
||||
while (true)
|
||||
{
|
||||
switch (PeekChar(0))
|
||||
{
|
||||
case -1:
|
||||
case ')':
|
||||
case ']':
|
||||
case '{':
|
||||
case '}':
|
||||
case '?':
|
||||
case '+':
|
||||
case '|':
|
||||
return end;
|
||||
default:
|
||||
end = ParseFact(end);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private NFAState ParseFact(NFAState start)
|
||||
{
|
||||
NFAState placeholder = new NFAState();
|
||||
|
||||
var end = ParseAtom(placeholder);
|
||||
switch (PeekChar(0))
|
||||
{
|
||||
case '?':
|
||||
case '*':
|
||||
case '+':
|
||||
case '{':
|
||||
end = ParseAtomModifier(placeholder, end);
|
||||
break;
|
||||
}
|
||||
if (placeholder.Incoming.Length > 0 && start.Outgoing.Length > 0)
|
||||
{
|
||||
start.AddOut(new NFAEpsilonTransition(placeholder));
|
||||
return end;
|
||||
}
|
||||
else
|
||||
{
|
||||
placeholder.MergeInto(start);
|
||||
return (end == placeholder) ? start : end;
|
||||
}
|
||||
}
|
||||
|
||||
private NFAState ParseAtom(NFAState start)
|
||||
{
|
||||
NFAState end;
|
||||
|
||||
switch (PeekChar(0))
|
||||
{
|
||||
case '.':
|
||||
ReadChar('.');
|
||||
return start.AddOut(new NFADotTransition(new NFAState()));
|
||||
case '(':
|
||||
ReadChar('(');
|
||||
end = ParseExpr(start);
|
||||
ReadChar(')');
|
||||
return end;
|
||||
case '[':
|
||||
ReadChar('[');
|
||||
end = ParseCharSet(start);
|
||||
ReadChar(']');
|
||||
return end;
|
||||
case -1:
|
||||
case ')':
|
||||
case ']':
|
||||
case '{':
|
||||
case '}':
|
||||
case '?':
|
||||
case '*':
|
||||
case '+':
|
||||
case '|':
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
|
||||
_pos,
|
||||
_pattern);
|
||||
default:
|
||||
return ParseChar(start);
|
||||
}
|
||||
}
|
||||
|
||||
private NFAState ParseAtomModifier(NFAState start, NFAState end)
|
||||
{
|
||||
int min = 0;
|
||||
int max = -1;
|
||||
int firstPos = _pos;
|
||||
|
||||
// Read min and max
|
||||
switch (ReadChar())
|
||||
{
|
||||
case '?':
|
||||
min = 0;
|
||||
max = 1;
|
||||
break;
|
||||
case '*':
|
||||
min = 0;
|
||||
max = -1;
|
||||
break;
|
||||
case '+':
|
||||
min = 1;
|
||||
max = -1;
|
||||
break;
|
||||
case '{':
|
||||
min = ReadNumber();
|
||||
max = min;
|
||||
if (PeekChar(0) == ',')
|
||||
{
|
||||
ReadChar(',');
|
||||
max = -1;
|
||||
if (PeekChar(0) != '}')
|
||||
{
|
||||
max = ReadNumber();
|
||||
}
|
||||
}
|
||||
ReadChar('}');
|
||||
if (max == 0 || (max > 0 && min > max))
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.INVALID_REPEAT_COUNT,
|
||||
firstPos,
|
||||
_pattern);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
|
||||
_pos - 1,
|
||||
_pattern);
|
||||
}
|
||||
|
||||
// Read possessive or reluctant modifiers
|
||||
if (PeekChar(0) == '?')
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
|
||||
_pos,
|
||||
_pattern);
|
||||
}
|
||||
else if (PeekChar(0) == '+')
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
|
||||
_pos,
|
||||
_pattern);
|
||||
}
|
||||
|
||||
// Handle supported repeaters
|
||||
if (min == 0 && max == 1)
|
||||
{
|
||||
return start.AddOut(new NFAEpsilonTransition(end));
|
||||
}
|
||||
else if (min == 0 && max == -1)
|
||||
{
|
||||
if (end.Outgoing.Length == 0)
|
||||
{
|
||||
end.MergeInto(start);
|
||||
}
|
||||
else
|
||||
{
|
||||
end.AddOut(new NFAEpsilonTransition(start));
|
||||
}
|
||||
return start;
|
||||
}
|
||||
else if (min == 1 && max == -1)
|
||||
{
|
||||
if (start.Outgoing.Length == 1 &&
|
||||
end.Outgoing.Length == 0 &&
|
||||
end.Incoming.Length == 1 &&
|
||||
start.Outgoing[0] == end.Incoming[0])
|
||||
{
|
||||
|
||||
end.AddOut(start.Outgoing[0].Copy(end));
|
||||
}
|
||||
else
|
||||
{
|
||||
end.AddOut(new NFAEpsilonTransition(start));
|
||||
}
|
||||
return end;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.INVALID_REPEAT_COUNT,
|
||||
firstPos,
|
||||
_pattern);
|
||||
}
|
||||
}
|
||||
|
||||
private NFAState ParseCharSet(NFAState start)
|
||||
{
|
||||
NFAState end = new NFAState();
|
||||
NFACharRangeTransition range;
|
||||
|
||||
if (PeekChar(0) == '^')
|
||||
{
|
||||
ReadChar('^');
|
||||
range = new NFACharRangeTransition(true, _ignoreCase, end);
|
||||
}
|
||||
else
|
||||
{
|
||||
range = new NFACharRangeTransition(false, _ignoreCase, end);
|
||||
}
|
||||
start.AddOut(range);
|
||||
while (PeekChar(0) > 0)
|
||||
{
|
||||
var min = (char)PeekChar(0);
|
||||
switch (min)
|
||||
{
|
||||
case ']':
|
||||
return end;
|
||||
case '\\':
|
||||
range.AddCharacter(ReadEscapeChar());
|
||||
break;
|
||||
default:
|
||||
ReadChar(min);
|
||||
if (PeekChar(0) == '-' &&
|
||||
PeekChar(1) > 0 &&
|
||||
PeekChar(1) != ']')
|
||||
{
|
||||
|
||||
ReadChar('-');
|
||||
var max = ReadChar();
|
||||
range.AddRange(min, max);
|
||||
}
|
||||
else
|
||||
{
|
||||
range.AddCharacter(min);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return end;
|
||||
}
|
||||
|
||||
private NFAState ParseChar(NFAState start)
|
||||
{
|
||||
switch (PeekChar(0))
|
||||
{
|
||||
case '\\':
|
||||
return ParseEscapeChar(start);
|
||||
case '^':
|
||||
case '$':
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
|
||||
_pos,
|
||||
_pattern);
|
||||
default:
|
||||
return start.AddOut(ReadChar(), _ignoreCase, new NFAState());
|
||||
}
|
||||
}
|
||||
|
||||
private NFAState ParseEscapeChar(NFAState start)
|
||||
{
|
||||
NFAState end = new NFAState();
|
||||
|
||||
if (PeekChar(0) == '\\' && PeekChar(1) > 0)
|
||||
{
|
||||
switch ((char)PeekChar(1))
|
||||
{
|
||||
case 'd':
|
||||
ReadChar();
|
||||
ReadChar();
|
||||
return start.AddOut(new NFADigitTransition(end));
|
||||
case 'D':
|
||||
ReadChar();
|
||||
ReadChar();
|
||||
return start.AddOut(new NFANonDigitTransition(end));
|
||||
case 's':
|
||||
ReadChar();
|
||||
ReadChar();
|
||||
return start.AddOut(new NFAWhitespaceTransition(end));
|
||||
case 'S':
|
||||
ReadChar();
|
||||
ReadChar();
|
||||
return start.AddOut(new NFANonWhitespaceTransition(end));
|
||||
case 'w':
|
||||
ReadChar();
|
||||
ReadChar();
|
||||
return start.AddOut(new NFAWordTransition(end));
|
||||
case 'W':
|
||||
ReadChar();
|
||||
ReadChar();
|
||||
return start.AddOut(new NFANonWordTransition(end));
|
||||
}
|
||||
}
|
||||
return start.AddOut(ReadEscapeChar(), _ignoreCase, end);
|
||||
}
|
||||
|
||||
private char ReadEscapeChar()
|
||||
{
|
||||
string str;
|
||||
int value;
|
||||
|
||||
ReadChar('\\');
|
||||
var c = ReadChar();
|
||||
switch (c)
|
||||
{
|
||||
case '0':
|
||||
c = ReadChar();
|
||||
if (c < '0' || c > '3')
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
|
||||
_pos - 3,
|
||||
_pattern);
|
||||
}
|
||||
value = c - '0';
|
||||
c = (char)PeekChar(0);
|
||||
if ('0' <= c && c <= '7')
|
||||
{
|
||||
value *= 8;
|
||||
value += ReadChar() - '0';
|
||||
c = (char)PeekChar(0);
|
||||
if ('0' <= c && c <= '7')
|
||||
{
|
||||
value *= 8;
|
||||
value += ReadChar() - '0';
|
||||
}
|
||||
}
|
||||
return (char)value;
|
||||
case 'x':
|
||||
str = ReadChar().ToString() + ReadChar().ToString();
|
||||
try
|
||||
{
|
||||
value = Int32.Parse(str, NumberStyles.AllowHexSpecifier);
|
||||
return (char)value;
|
||||
}
|
||||
catch (FormatException)
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
|
||||
_pos - str.Length - 2,
|
||||
_pattern);
|
||||
}
|
||||
case 'u':
|
||||
str = ReadChar().ToString() +
|
||||
ReadChar().ToString() +
|
||||
ReadChar().ToString() +
|
||||
ReadChar().ToString();
|
||||
try
|
||||
{
|
||||
value = Int32.Parse(str, NumberStyles.AllowHexSpecifier);
|
||||
return (char)value;
|
||||
}
|
||||
catch (FormatException)
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
|
||||
_pos - str.Length - 2,
|
||||
_pattern);
|
||||
}
|
||||
case 't':
|
||||
return '\t';
|
||||
case 'n':
|
||||
return '\n';
|
||||
case 'r':
|
||||
return '\r';
|
||||
case 'f':
|
||||
return '\f';
|
||||
case 'a':
|
||||
return '\u0007';
|
||||
case 'e':
|
||||
return '\u001B';
|
||||
default:
|
||||
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'))
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
|
||||
_pos - 2,
|
||||
_pattern);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
private int ReadNumber()
|
||||
{
|
||||
StringBuilder buf = new StringBuilder();
|
||||
int c;
|
||||
|
||||
c = PeekChar(0);
|
||||
while ('0' <= c && c <= '9')
|
||||
{
|
||||
buf.Append(ReadChar());
|
||||
c = PeekChar(0);
|
||||
}
|
||||
if (buf.Length <= 0)
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
|
||||
_pos,
|
||||
_pattern);
|
||||
}
|
||||
return Int32.Parse(buf.ToString());
|
||||
}
|
||||
|
||||
private char ReadChar()
|
||||
{
|
||||
int c = PeekChar(0);
|
||||
|
||||
if (c < 0)
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNTERMINATED_PATTERN,
|
||||
_pos,
|
||||
_pattern);
|
||||
}
|
||||
else
|
||||
{
|
||||
_pos++;
|
||||
return (char)c;
|
||||
}
|
||||
}
|
||||
|
||||
private char ReadChar(char c)
|
||||
{
|
||||
if (c != ReadChar())
|
||||
{
|
||||
throw new RegExpException(
|
||||
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
|
||||
_pos - 1,
|
||||
_pattern);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
private int PeekChar(int count)
|
||||
{
|
||||
if (_pos + count < _pattern.Length)
|
||||
{
|
||||
return _pattern[_pos + count];
|
||||
}
|
||||
else
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
213
Parsing/TokenStringDFA.cs
Normal file
213
Parsing/TokenStringDFA.cs
Normal file
@@ -0,0 +1,213 @@
|
||||
using System.Text;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A deterministic finite state automaton for matching exact strings.
|
||||
* It uses a sorted binary tree representation of the state
|
||||
* transitions in order to enable quick matches with a minimal memory
|
||||
* footprint. It only supports a single character transition between
|
||||
* states, but may be run in an all case-insensitive mode.
|
||||
*/
|
||||
internal class TokenStringDFA
|
||||
{
|
||||
|
||||
private readonly DFAState[] _ascii = new DFAState[128];
|
||||
private readonly DFAState _nonAscii = new DFAState();
|
||||
|
||||
public TokenStringDFA()
|
||||
{
|
||||
}
|
||||
|
||||
public void AddMatch(string str, bool caseInsensitive, TokenPattern value)
|
||||
{
|
||||
DFAState state;
|
||||
char c = str[0];
|
||||
int start = 0;
|
||||
|
||||
if (caseInsensitive)
|
||||
{
|
||||
c = Char.ToLower(c);
|
||||
}
|
||||
if (c < 128)
|
||||
{
|
||||
state = _ascii[c];
|
||||
if (state == null)
|
||||
{
|
||||
state = _ascii[c] = new DFAState();
|
||||
}
|
||||
start++;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = _nonAscii;
|
||||
}
|
||||
for (int i = start; i < str.Length; i++)
|
||||
{
|
||||
var next = state.Tree.Find(str[i], caseInsensitive);
|
||||
if (next == null)
|
||||
{
|
||||
next = new DFAState();
|
||||
state.Tree.Add(str[i], caseInsensitive, next);
|
||||
}
|
||||
state = next;
|
||||
}
|
||||
state.Value = value;
|
||||
}
|
||||
|
||||
public TokenPattern Match(ReaderBuffer buffer, bool caseInsensitive)
|
||||
{
|
||||
TokenPattern result = null;
|
||||
DFAState state;
|
||||
int pos = 0;
|
||||
|
||||
var c = buffer.Peek(0);
|
||||
if (c < 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
if (caseInsensitive)
|
||||
{
|
||||
c = Char.ToLower((char)c);
|
||||
}
|
||||
if (c < 128)
|
||||
{
|
||||
state = _ascii[c];
|
||||
if (state == null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
else if (state.Value != null)
|
||||
{
|
||||
result = state.Value;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = _nonAscii;
|
||||
}
|
||||
while ((c = buffer.Peek(pos)) >= 0)
|
||||
{
|
||||
state = state.Tree.Find((char)c, caseInsensitive);
|
||||
if (state == null)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (state.Value != null)
|
||||
{
|
||||
result = state.Value;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < _ascii.Length; i++)
|
||||
{
|
||||
if (_ascii[i] != null)
|
||||
{
|
||||
buffer.Append((char)i);
|
||||
if (_ascii[i].Value != null)
|
||||
{
|
||||
buffer.Append(": ");
|
||||
buffer.Append(_ascii[i].Value);
|
||||
buffer.Append("\n");
|
||||
}
|
||||
_ascii[i].Tree.PrintTo(buffer, " ");
|
||||
}
|
||||
}
|
||||
_nonAscii.Tree.PrintTo(buffer, "");
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
internal class DFAState
|
||||
{
|
||||
|
||||
internal TokenPattern Value;
|
||||
|
||||
internal TransitionTree Tree = new TransitionTree();
|
||||
}
|
||||
|
||||
|
||||
internal class TransitionTree
|
||||
{
|
||||
private char _value = '\0';
|
||||
private DFAState _state;
|
||||
private TransitionTree _left;
|
||||
private TransitionTree _right;
|
||||
|
||||
public TransitionTree()
|
||||
{
|
||||
}
|
||||
|
||||
public DFAState Find(char c, bool lowerCase)
|
||||
{
|
||||
if (lowerCase)
|
||||
{
|
||||
c = Char.ToLower(c);
|
||||
}
|
||||
if (_value == '\0' || _value == c)
|
||||
{
|
||||
return _state;
|
||||
}
|
||||
else if (_value > c)
|
||||
{
|
||||
return _left.Find(c, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
return _right.Find(c, false);
|
||||
}
|
||||
}
|
||||
|
||||
public void Add(char c, bool lowerCase, DFAState state)
|
||||
{
|
||||
if (lowerCase)
|
||||
{
|
||||
c = Char.ToLower(c);
|
||||
}
|
||||
if (_value == '\0')
|
||||
{
|
||||
this._value = c;
|
||||
this._state = state;
|
||||
this._left = new TransitionTree();
|
||||
this._right = new TransitionTree();
|
||||
}
|
||||
else if (_value > c)
|
||||
{
|
||||
_left.Add(c, false, state);
|
||||
}
|
||||
else
|
||||
{
|
||||
_right.Add(c, false, state);
|
||||
}
|
||||
}
|
||||
|
||||
public void PrintTo(StringBuilder buffer, String indent)
|
||||
{
|
||||
_left?.PrintTo(buffer, indent);
|
||||
if (this._value != '\0')
|
||||
{
|
||||
if (buffer.Length > 0 && buffer[buffer.Length - 1] == '\n')
|
||||
{
|
||||
buffer.Append(indent);
|
||||
}
|
||||
buffer.Append(this._value);
|
||||
if (this._state.Value != null)
|
||||
{
|
||||
buffer.Append(": ");
|
||||
buffer.Append(this._state.Value);
|
||||
buffer.Append("\n");
|
||||
}
|
||||
this._state.Tree.PrintTo(buffer, indent + " ");
|
||||
}
|
||||
_right?.PrintTo(buffer, indent);
|
||||
}
|
||||
}
|
||||
}
|
444
Parsing/Tokenizer.cs
Normal file
444
Parsing/Tokenizer.cs
Normal file
@@ -0,0 +1,444 @@
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace Flee.Parsing
|
||||
{
|
||||
/**
|
||||
* A character stream tokenizer. This class groups the characters read
|
||||
* from the stream together into tokens ("words"). The grouping is
|
||||
* controlled by token patterns that contain either a fixed string to
|
||||
* search for, or a regular expression. If the stream of characters
|
||||
* don't match any of the token patterns, a parse exception is thrown.
|
||||
*/
|
||||
internal class Tokenizer
|
||||
{
|
||||
private bool _useTokenList = false;
|
||||
private readonly StringDFAMatcher _stringDfaMatcher;
|
||||
private readonly NFAMatcher _nfaMatcher;
|
||||
private readonly RegExpMatcher _regExpMatcher;
|
||||
private ReaderBuffer _buffer = null;
|
||||
private readonly TokenMatch _lastMatch = new TokenMatch();
|
||||
private Token _previousToken = null;
|
||||
|
||||
public Tokenizer(TextReader input)
|
||||
: this(input, false)
|
||||
{
|
||||
}
|
||||
|
||||
public Tokenizer(TextReader input, bool ignoreCase)
|
||||
{
|
||||
this._stringDfaMatcher = new StringDFAMatcher(ignoreCase);
|
||||
this._nfaMatcher = new NFAMatcher(ignoreCase);
|
||||
this._regExpMatcher = new RegExpMatcher(ignoreCase);
|
||||
this._buffer = new ReaderBuffer(input);
|
||||
}
|
||||
|
||||
public bool UseTokenList
|
||||
{
|
||||
get
|
||||
{
|
||||
return _useTokenList;
|
||||
}
|
||||
set
|
||||
{
|
||||
_useTokenList = value;
|
||||
}
|
||||
}
|
||||
|
||||
public bool GetUseTokenList()
|
||||
{
|
||||
return _useTokenList;
|
||||
}
|
||||
|
||||
public void SetUseTokenList(bool useTokenList)
|
||||
{
|
||||
this._useTokenList = useTokenList;
|
||||
}
|
||||
|
||||
public string GetPatternDescription(int id)
|
||||
{
|
||||
var pattern = _stringDfaMatcher.GetPattern(id);
|
||||
if (pattern == null)
|
||||
{
|
||||
pattern = _nfaMatcher.GetPattern(id);
|
||||
}
|
||||
if (pattern == null)
|
||||
{
|
||||
pattern = _regExpMatcher.GetPattern(id);
|
||||
}
|
||||
return pattern?.ToShortString();
|
||||
}
|
||||
|
||||
public int GetCurrentLine()
|
||||
{
|
||||
return _buffer.LineNumber;
|
||||
}
|
||||
|
||||
public int GetCurrentColumn()
|
||||
{
|
||||
return _buffer.ColumnNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* nfa - true to attempt as an nfa pattern for regexp. This handles most things except the complex repeates, ie {1,4}
|
||||
*/
|
||||
public void AddPattern(TokenPattern pattern, bool nfa=true)
|
||||
{
|
||||
switch (pattern.Type)
|
||||
{
|
||||
case TokenPattern.PatternType.STRING:
|
||||
try
|
||||
{
|
||||
_stringDfaMatcher.AddPattern(pattern);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INVALID_TOKEN,
|
||||
pattern.Name,
|
||||
"error adding string token: " +
|
||||
e.Message);
|
||||
}
|
||||
break;
|
||||
case TokenPattern.PatternType.REGEXP:
|
||||
if (nfa)
|
||||
{
|
||||
try
|
||||
{
|
||||
_nfaMatcher.AddPattern(pattern);
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
nfa = false;
|
||||
}
|
||||
}
|
||||
if (!nfa)
|
||||
{
|
||||
try
|
||||
{
|
||||
_regExpMatcher.AddPattern(pattern);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INVALID_TOKEN,
|
||||
pattern.Name,
|
||||
"regular expression contains error(s): " +
|
||||
e.Message);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
throw new ParserCreationException(
|
||||
ParserCreationException.ErrorType.INVALID_TOKEN,
|
||||
pattern.Name,
|
||||
"pattern type " + pattern.Type +
|
||||
" is undefined");
|
||||
}
|
||||
}
|
||||
|
||||
public void Reset(TextReader input)
|
||||
{
|
||||
//this.buffer.Dispose();
|
||||
this._buffer = new ReaderBuffer(input);
|
||||
this._previousToken = null;
|
||||
this._lastMatch.Clear();
|
||||
}
|
||||
|
||||
public Token Next()
|
||||
{
|
||||
Token token = null;
|
||||
|
||||
do
|
||||
{
|
||||
token = NextToken();
|
||||
if (token == null)
|
||||
{
|
||||
_previousToken = null;
|
||||
return null;
|
||||
}
|
||||
if (_useTokenList)
|
||||
{
|
||||
token.Previous = _previousToken;
|
||||
_previousToken = token;
|
||||
}
|
||||
if (token.Pattern.Ignore)
|
||||
{
|
||||
token = null;
|
||||
}
|
||||
else if (token.Pattern.Error)
|
||||
{
|
||||
throw new ParseException(
|
||||
ParseException.ErrorType.INVALID_TOKEN,
|
||||
token.Pattern.ErrorMessage,
|
||||
token.StartLine,
|
||||
token.StartColumn);
|
||||
}
|
||||
} while (token == null);
|
||||
return token;
|
||||
}
|
||||
|
||||
private Token NextToken()
|
||||
{
|
||||
try
|
||||
{
|
||||
_lastMatch.Clear();
|
||||
_stringDfaMatcher.Match(_buffer, _lastMatch);
|
||||
_nfaMatcher.Match(_buffer, _lastMatch);
|
||||
_regExpMatcher.Match(_buffer, _lastMatch);
|
||||
int line;
|
||||
int column;
|
||||
if (_lastMatch.Length > 0)
|
||||
{
|
||||
line = _buffer.LineNumber;
|
||||
column = _buffer.ColumnNumber;
|
||||
var str = _buffer.Read(_lastMatch.Length);
|
||||
return NewToken(_lastMatch.Pattern, str, line, column);
|
||||
}
|
||||
else if (_buffer.Peek(0) < 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
else
|
||||
{
|
||||
line = _buffer.LineNumber;
|
||||
column = _buffer.ColumnNumber;
|
||||
throw new ParseException(
|
||||
ParseException.ErrorType.UNEXPECTED_CHAR,
|
||||
_buffer.Read(1),
|
||||
line,
|
||||
column);
|
||||
}
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
throw new ParseException(ParseException.ErrorType.IO,
|
||||
e.Message,
|
||||
-1,
|
||||
-1);
|
||||
}
|
||||
}
|
||||
|
||||
protected virtual Token NewToken(TokenPattern pattern,
|
||||
string image,
|
||||
int line,
|
||||
int column)
|
||||
{
|
||||
|
||||
return new Token(pattern, image, line, column);
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
buffer.Append(_stringDfaMatcher);
|
||||
buffer.Append(_nfaMatcher);
|
||||
buffer.Append(_regExpMatcher);
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
internal abstract class TokenMatcher
|
||||
{
|
||||
protected TokenPattern[] Patterns = new TokenPattern[0];
|
||||
|
||||
protected bool IgnoreCase = false;
|
||||
|
||||
protected TokenMatcher(bool ignoreCase)
|
||||
{
|
||||
IgnoreCase = ignoreCase;
|
||||
}
|
||||
|
||||
public abstract void Match(ReaderBuffer buffer, TokenMatch match);
|
||||
|
||||
public TokenPattern GetPattern(int id)
|
||||
{
|
||||
for (int i = 0; i < Patterns.Length; i++)
|
||||
{
|
||||
if (Patterns[i].Id == id)
|
||||
{
|
||||
return Patterns[i];
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public virtual void AddPattern(TokenPattern pattern)
|
||||
{
|
||||
Array.Resize(ref Patterns, Patterns.Length + 1);
|
||||
Patterns[Patterns.Length - 1] = pattern;
|
||||
}
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < Patterns.Length; i++)
|
||||
{
|
||||
buffer.Append(Patterns[i]);
|
||||
buffer.Append("\n\n");
|
||||
}
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
internal class StringDFAMatcher : TokenMatcher
|
||||
{
|
||||
|
||||
private readonly TokenStringDFA _automaton = new TokenStringDFA();
|
||||
|
||||
public StringDFAMatcher(bool ignoreCase) : base(ignoreCase)
|
||||
{
|
||||
}
|
||||
|
||||
public override void AddPattern(TokenPattern pattern)
|
||||
{
|
||||
_automaton.AddMatch(pattern.Pattern, IgnoreCase, pattern);
|
||||
base.AddPattern(pattern);
|
||||
}
|
||||
|
||||
public override void Match(ReaderBuffer buffer, TokenMatch match)
|
||||
{
|
||||
TokenPattern res = _automaton.Match(buffer, IgnoreCase);
|
||||
|
||||
if (res != null)
|
||||
{
|
||||
match.Update(res.Pattern.Length, res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal class NFAMatcher : TokenMatcher
|
||||
{
|
||||
|
||||
private readonly TokenNFA _automaton = new TokenNFA();
|
||||
|
||||
public NFAMatcher(bool ignoreCase) : base(ignoreCase)
|
||||
{
|
||||
}
|
||||
|
||||
public override void AddPattern(TokenPattern pattern)
|
||||
{
|
||||
if (pattern.Type == TokenPattern.PatternType.STRING)
|
||||
{
|
||||
_automaton.AddTextMatch(pattern.Pattern, IgnoreCase, pattern);
|
||||
}
|
||||
else
|
||||
{
|
||||
_automaton.AddRegExpMatch(pattern.Pattern, IgnoreCase, pattern);
|
||||
}
|
||||
base.AddPattern(pattern);
|
||||
}
|
||||
|
||||
public override void Match(ReaderBuffer buffer, TokenMatch match)
|
||||
{
|
||||
_automaton.Match(buffer, match);
|
||||
}
|
||||
}
|
||||
|
||||
internal class RegExpMatcher : TokenMatcher
|
||||
{
|
||||
private REHandler[] _regExps = new REHandler[0];
|
||||
|
||||
public RegExpMatcher(bool ignoreCase) : base(ignoreCase)
|
||||
{
|
||||
}
|
||||
|
||||
public override void AddPattern(TokenPattern pattern)
|
||||
{
|
||||
REHandler re;
|
||||
try
|
||||
{
|
||||
re = new GrammaticaRE(pattern.Pattern, IgnoreCase);
|
||||
pattern.DebugInfo = "Grammatica regexp\n" + re;
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
re = new SystemRE(pattern.Pattern, IgnoreCase);
|
||||
pattern.DebugInfo = "native .NET regexp";
|
||||
}
|
||||
Array.Resize(ref _regExps, _regExps.Length + 1);
|
||||
_regExps[_regExps.Length - 1] = re;
|
||||
base.AddPattern(pattern);
|
||||
}
|
||||
|
||||
public override void Match(ReaderBuffer buffer, TokenMatch match)
|
||||
{
|
||||
for (int i = 0; i < _regExps.Length; i++)
|
||||
{
|
||||
int length = _regExps[i].Match(buffer);
|
||||
if (length > 0)
|
||||
{
|
||||
match.Update(length, Patterns[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
internal abstract class REHandler
|
||||
{
|
||||
public abstract int Match(ReaderBuffer buffer);
|
||||
}
|
||||
|
||||
internal class GrammaticaRE : REHandler
|
||||
{
|
||||
private readonly RegExp _regExp;
|
||||
private Matcher _matcher = null;
|
||||
|
||||
public GrammaticaRE(string regex, bool ignoreCase)
|
||||
{
|
||||
_regExp = new RegExp(regex, ignoreCase);
|
||||
}
|
||||
|
||||
public override int Match(ReaderBuffer buffer)
|
||||
{
|
||||
if (_matcher == null)
|
||||
{
|
||||
_matcher = _regExp.Matcher(buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
_matcher.Reset(buffer);
|
||||
}
|
||||
return _matcher.MatchFromBeginning() ? _matcher.Length() : 0;
|
||||
}
|
||||
}
|
||||
|
||||
internal class SystemRE : REHandler
|
||||
{
|
||||
private readonly Regex _reg;
|
||||
|
||||
public SystemRE(string regex, bool ignoreCase)
|
||||
{
|
||||
if (ignoreCase)
|
||||
{
|
||||
_reg = new Regex(regex, RegexOptions.IgnoreCase);
|
||||
}
|
||||
else
|
||||
{
|
||||
_reg = new Regex(regex);
|
||||
}
|
||||
}
|
||||
|
||||
public override int Match(ReaderBuffer buffer)
|
||||
{
|
||||
Match m;
|
||||
|
||||
// Ugly hack since .NET doesn't have a flag for when the
|
||||
// end of the input string was encountered...
|
||||
buffer.Peek(1024 * 16);
|
||||
// Also, there is no API to limit the search to the specified
|
||||
// position, so we double-check the index afterwards instead.
|
||||
m = _reg.Match(buffer.ToString(), buffer.Position);
|
||||
if (m.Success && m.Index == buffer.Position)
|
||||
{
|
||||
return m.Length;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user