This commit is contained in:
2025-10-08 09:49:37 +08:00
commit 284e764345
99 changed files with 21742 additions and 0 deletions

View File

@@ -0,0 +1,60 @@
namespace Flee.Parsing
{
/**
* A regular expression alternative element. This element matches
* the longest alternative element.
*/
internal class AlternativeElement : Element
{
private readonly Element _elem1;
private readonly Element _elem2;
public AlternativeElement(Element first, Element second)
{
_elem1 = first;
_elem2 = second;
}
public override object Clone()
{
return new AlternativeElement(_elem1, _elem2);
}
public override int Match(Matcher m,
ReaderBuffer buffer,
int start,
int skip)
{
int length = 0;
int length1 = -1;
int length2 = -1;
int skip1 = 0;
int skip2 = 0;
while (length >= 0 && skip1 + skip2 <= skip)
{
length1 = _elem1.Match(m, buffer, start, skip1);
length2 = _elem2.Match(m, buffer, start, skip2);
if (length1 >= length2)
{
length = length1;
skip1++;
}
else
{
length = length2;
skip2++;
}
}
return length;
}
public override void PrintTo(TextWriter output, string indent)
{
output.WriteLine(indent + "Alternative 1");
_elem1.PrintTo(output, indent + " ");
output.WriteLine(indent + "Alternative 2");
_elem2.PrintTo(output, indent + " ");
}
}
}

240
Parsing/Analyzer.cs Normal file
View File

@@ -0,0 +1,240 @@
using System.Collections;
namespace Flee.Parsing
{
[Obsolete("Creates a new parse tree analyzer.")]
internal class Analyzer
{
public Analyzer()
{
}
/// <summary>
/// Resets this analyzer when the parser is reset for another
///input stream.The default implementation of this method does
/// nothing.
/// </summary>
public virtual void Reset()
{
// Default implementation does nothing
}
public Node Analyze(Node node)
{
ParserLogException log = new ParserLogException();
node = Analyze(node, log);
if (log.Count > 0)
{
throw log;
}
return node;
}
private Node Analyze(Node node, ParserLogException log)
{
var errorCount = log.Count;
if (node is Production)
{
var prod = (Production)node;
prod = NewProduction(prod.Pattern);
try
{
Enter(prod);
}
catch (ParseException e)
{
log.AddError(e);
}
for (int i = 0; i < node.Count; i++)
{
try
{
Child(prod, Analyze(node[i], log));
}
catch (ParseException e)
{
log.AddError(e);
}
}
try
{
return Exit(prod);
}
catch (ParseException e)
{
if (errorCount == log.Count)
{
log.AddError(e);
}
}
}
else
{
node.Values.Clear();
try
{
Enter(node);
}
catch (ParseException e)
{
log.AddError(e);
}
try
{
return Exit(node);
}
catch (ParseException e)
{
if (errorCount == log.Count)
{
log.AddError(e);
}
}
}
return null;
}
public virtual Production NewProduction(ProductionPattern pattern)
{
return new Production(pattern);
}
public virtual void Enter(Node node)
{
}
public virtual Node Exit(Node node)
{
return node;
}
public virtual void Child(Production node, Node child)
{
node.AddChild(child);
}
protected Node GetChildAt(Node node, int pos)
{
if (node == null)
{
throw new ParseException(
ParseException.ErrorType.INTERNAL,
"attempt to read 'null' parse tree node",
-1,
-1);
}
var child = node[pos];
if (child == null)
{
throw new ParseException(
ParseException.ErrorType.INTERNAL,
"node '" + node.Name + "' has no child at " +
"position " + pos,
node.StartLine,
node.StartColumn);
}
return child;
}
protected Node GetChildWithId(Node node, int id)
{
if (node == null)
{
throw new ParseException(
ParseException.ErrorType.INTERNAL,
"attempt to read 'null' parse tree node",
-1,
-1);
}
for (int i = 0; i < node.Count; i++)
{
var child = node[i];
if (child != null && child.Id == id)
{
return child;
}
}
throw new ParseException(
ParseException.ErrorType.INTERNAL,
"node '" + node.Name + "' has no child with id " + id,
node.StartLine,
node.StartColumn);
}
protected object GetValue(Node node, int pos)
{
if (node == null)
{
throw new ParseException(
ParseException.ErrorType.INTERNAL,
"attempt to read 'null' parse tree node",
-1,
-1);
}
var value = node.Values[pos];
if (value == null)
{
throw new ParseException(
ParseException.ErrorType.INTERNAL,
"node '" + node.Name + "' has no value at " +
"position " + pos,
node.StartLine,
node.StartColumn);
}
return value;
}
protected int GetIntValue(Node node, int pos)
{
var value = GetValue(node, pos);
if (value is int)
{
return (int)value;
}
else
{
throw new ParseException(
ParseException.ErrorType.INTERNAL,
"node '" + node.Name + "' has no integer value " +
"at position " + pos,
node.StartLine,
node.StartColumn);
}
}
protected string GetStringValue(Node node, int pos)
{
var value = GetValue(node, pos);
if (value is string)
{
return (string)value;
}
else
{
throw new ParseException(
ParseException.ErrorType.INTERNAL,
"node '" + node.Name + "' has no string value " +
"at position " + pos,
node.StartLine,
node.StartColumn);
}
}
protected ArrayList GetChildValues(Node node)
{
ArrayList result = new ArrayList();
for (int i = 0; i < node.Count; i++)
{
var child = node[i];
var values = child.Values;
if (values != null)
{
result.AddRange(values);
}
}
return result;
}
}
}

111
Parsing/Automaton.cs Normal file
View File

@@ -0,0 +1,111 @@
namespace Flee.Parsing
{
internal class Automaton
{
private object _value;
private readonly AutomatonTree _tree = new AutomatonTree();
public Automaton()
{
}
public void AddMatch(string str, bool caseInsensitive, object value)
{
if (str.Length == 0)
{
this._value = value;
}
else
{
var state = _tree.Find(str[0], caseInsensitive);
if (state == null)
{
state = new Automaton();
state.AddMatch(str.Substring(1), caseInsensitive, value);
_tree.Add(str[0], caseInsensitive, state);
}
else
{
state.AddMatch(str.Substring(1), caseInsensitive, value);
}
}
}
public object MatchFrom(LookAheadReader input, int pos, bool caseInsensitive)
{
object result = null;
Automaton state = null;
int c = 0;
c = input.Peek(pos);
if (_tree != null && c >= 0)
{
state = _tree.Find(Convert.ToChar(c), caseInsensitive);
if (state != null)
{
result = state.MatchFrom(input, pos + 1, caseInsensitive);
}
}
return result ?? _value;
}
}
// * An automaton state transition tree. This class contains a
// * binary search tree for the automaton transitions from one state
// * to another. All transitions are linked to a single character.
internal class AutomatonTree
{
private char _value;
private Automaton _state;
private AutomatonTree _left;
private AutomatonTree _right;
public AutomatonTree()
{
}
public Automaton Find(char c, bool lowerCase)
{
if (lowerCase)
{
c = Char.ToLower(c);
}
if (_value == (char)0 || _value == c)
{
return _state;
}
else if (_value > c)
{
return _left.Find(c, false);
}
else
{
return _right.Find(c, false);
}
}
public void Add(char c, bool lowerCase, Automaton state)
{
if (lowerCase)
{
c = Char.ToLower(c);
}
if (_value == (char)0)
{
this._value = c;
this._state = state;
this._left = new AutomatonTree();
this._right = new AutomatonTree();
}
else if (_value > c)
{
_left.Add(c, false, state);
}
else
{
_right.Add(c, false, state);
}
}
}
}

View File

@@ -0,0 +1,267 @@
using System.Collections;
using System.Text;
namespace Flee.Parsing
{
/**
* A regular expression character set element. This element
* matches a single character inside (or outside) a character set.
* The character set is user defined and may contain ranges of
* characters. The set may also be inverted, meaning that only
* characters not inside the set will be considered to match.
*/
internal class CharacterSetElement : Element
{
public static CharacterSetElement Dot = new CharacterSetElement(false);
public static CharacterSetElement Digit = new CharacterSetElement(false);
public static CharacterSetElement NonDigit = new CharacterSetElement(true);
public static CharacterSetElement Whitespace = new CharacterSetElement(false);
public static CharacterSetElement NonWhitespace = new CharacterSetElement(true);
public static CharacterSetElement Word = new CharacterSetElement(false);
public static CharacterSetElement NonWord = new CharacterSetElement(true);
private readonly bool _inverted;
private readonly ArrayList _contents = new ArrayList();
public CharacterSetElement(bool inverted)
{
this._inverted = inverted;
}
public void AddCharacter(char c)
{
_contents.Add(c);
}
public void AddCharacters(string str)
{
for (int i = 0; i < str.Length; i++)
{
AddCharacter(str[i]);
}
}
public void AddCharacters(StringElement elem)
{
AddCharacters(elem.GetString());
}
public void AddRange(char min, char max)
{
_contents.Add(new Range(min, max));
}
public void AddCharacterSet(CharacterSetElement elem)
{
_contents.Add(elem);
}
public override object Clone()
{
return this;
}
public override int Match(Matcher m,
ReaderBuffer buffer,
int start,
int skip)
{
int c;
if (skip != 0)
{
return -1;
}
c = buffer.Peek(start);
if (c < 0)
{
m.SetReadEndOfString();
return -1;
}
if (m.IsCaseInsensitive())
{
c = (int)Char.ToLower((char)c);
}
return InSet((char)c) ? 1 : -1;
}
private bool InSet(char c)
{
if (this == Dot)
{
return InDotSet(c);
}
else if (this == Digit || this == NonDigit)
{
return InDigitSet(c) != _inverted;
}
else if (this == Whitespace || this == NonWhitespace)
{
return InWhitespaceSet(c) != _inverted;
}
else if (this == Word || this == NonWord)
{
return InWordSet(c) != _inverted;
}
else
{
return InUserSet(c) != _inverted;
}
}
private bool InDotSet(char c)
{
switch (c)
{
case '\n':
case '\r':
case '\u0085':
case '\u2028':
case '\u2029':
return false;
default:
return true;
}
}
private bool InDigitSet(char c)
{
return '0' <= c && c <= '9';
}
private bool InWhitespaceSet(char c)
{
switch (c)
{
case ' ':
case '\t':
case '\n':
case '\f':
case '\r':
case (char)11:
return true;
default:
return false;
}
}
private bool InWordSet(char c)
{
return ('a' <= c && c <= 'z')
|| ('A' <= c && c <= 'Z')
|| ('0' <= c && c <= '9')
|| c == '_';
}
private bool InUserSet(char value)
{
for (int i = 0; i < _contents.Count; i++)
{
var obj = _contents[i];
if (obj is char)
{
var c = (char)obj;
if (c == value)
{
return true;
}
}
else if (obj is Range)
{
var r = (Range)obj;
if (r.Inside(value))
{
return true;
}
}
else if (obj is CharacterSetElement)
{
var e = (CharacterSetElement)obj;
if (e.InSet(value))
{
return true;
}
}
}
return false;
}
public override void PrintTo(TextWriter output, string indent)
{
output.WriteLine(indent + ToString());
}
public override string ToString()
{
// Handle predefined character sets
if (this == Dot)
{
return ".";
}
else if (this == Digit)
{
return "\\d";
}
else if (this == NonDigit)
{
return "\\D";
}
else if (this == Whitespace)
{
return "\\s";
}
else if (this == NonWhitespace)
{
return "\\S";
}
else if (this == Word)
{
return "\\w";
}
else if (this == NonWord)
{
return "\\W";
}
// Handle user-defined character sets
var buffer = new StringBuilder();
if (_inverted)
{
buffer.Append("^[");
}
else
{
buffer.Append("[");
}
for (int i = 0; i < _contents.Count; i++)
{
buffer.Append(_contents[i]);
}
buffer.Append("]");
return buffer.ToString();
}
private class Range
{
private readonly char _min;
private readonly char _max;
public Range(char min, char max)
{
this._min = min;
this._max = max;
}
public bool Inside(char c)
{
return _min <= c && c <= _max;
}
public override string ToString()
{
return _min + "-" + _max;
}
}
}
}

58
Parsing/CombineElement.cs Normal file
View File

@@ -0,0 +1,58 @@
namespace Flee.Parsing
{
internal class CombineElement : Element
{
private readonly Element _elem1;
private readonly Element _elem2;
public CombineElement(Element first, Element second)
{
_elem1 = first;
_elem2 = second;
}
public override object Clone()
{
return new CombineElement(_elem1, _elem2);
}
public override int Match(Matcher m,
ReaderBuffer buffer,
int start,
int skip)
{
int length1 = -1;
int length2 = 0;
int skip1 = 0;
int skip2 = 0;
while (skip >= 0)
{
length1 = _elem1.Match(m, buffer, start, skip1);
if (length1 < 0)
{
return -1;
}
length2 = _elem2.Match(m, buffer, start + length1, skip2);
if (length2 < 0)
{
skip1++;
skip2 = 0;
}
else
{
skip2++;
skip--;
}
}
return length1 + length2;
}
public override void PrintTo(TextWriter output, string indent)
{
_elem1.PrintTo(output, indent);
_elem2.PrintTo(output, indent);
}
}
}

View File

@@ -0,0 +1,596 @@
using System.Collections;
using System.Diagnostics;
using System.Globalization;
using System.Text.RegularExpressions;
using Flee.ExpressionElements;
using Flee.ExpressionElements.Base;
using Flee.ExpressionElements.Base.Literals;
using Flee.ExpressionElements.Literals;
using Flee.ExpressionElements.Literals.Integral;
using Flee.ExpressionElements.LogicalBitwise;
using Flee.ExpressionElements.MemberElements;
using Flee.InternalTypes;
using Flee.PublicTypes;
namespace Flee.Parsing
{
internal class FleeExpressionAnalyzer : ExpressionAnalyzer
{
private IServiceProvider _myServices;
private readonly Regex _myUnicodeEscapeRegex;
private readonly Regex _myRegularEscapeRegex;
private bool _myInUnaryNegate;
internal FleeExpressionAnalyzer()
{
_myUnicodeEscapeRegex = new Regex("\\\\u[0-9a-f]{4}", RegexOptions.IgnoreCase);
_myRegularEscapeRegex = new Regex("\\\\[\\\\\"'trn]", RegexOptions.IgnoreCase);
}
public void SetServices(IServiceProvider services)
{
_myServices = services;
}
public override void Reset()
{
_myServices = null;
}
public override Node ExitExpression(Production node)
{
this.AddFirstChildValue(node);
return node;
}
public override Node ExitExpressionGroup(Production node)
{
node.AddValues(this.GetChildValues(node));
return node;
}
public override Node ExitXorExpression(Production node)
{
this.AddBinaryOp(node, typeof(XorElement));
return node;
}
public override Node ExitOrExpression(Production node)
{
this.AddBinaryOp(node, typeof(AndOrElement));
return node;
}
public override Node ExitAndExpression(Production node)
{
this.AddBinaryOp(node, typeof(AndOrElement));
return node;
}
public override Node ExitNotExpression(Production node)
{
this.AddUnaryOp(node, typeof(NotElement));
return node;
}
public override Node ExitCompareExpression(Production node)
{
this.AddBinaryOp(node, typeof(CompareElement));
return node;
}
public override Node ExitShiftExpression(Production node)
{
this.AddBinaryOp(node, typeof(ShiftElement));
return node;
}
public override Node ExitAdditiveExpression(Production node)
{
this.AddBinaryOp(node, typeof(ArithmeticElement));
return node;
}
public override Node ExitMultiplicativeExpression(Production node)
{
this.AddBinaryOp(node, typeof(ArithmeticElement));
return node;
}
public override Node ExitPowerExpression(Production node)
{
this.AddBinaryOp(node, typeof(ArithmeticElement));
return node;
}
// Try to fold a negated constant int32. We have to do this so that parsing int32.MinValue will work
public override Node ExitNegateExpression(Production node)
{
IList childValues = this.GetChildValues(node);
// Get last child
ExpressionElement childElement = (ExpressionElement)childValues[childValues.Count - 1];
// Is it an signed integer constant?
if (object.ReferenceEquals(childElement.GetType(), typeof(Int32LiteralElement)) & childValues.Count == 2)
{
((Int32LiteralElement)childElement).Negate();
// Add it directly instead of the negate element since it will already be negated
node.AddValue(childElement);
}
else if (object.ReferenceEquals(childElement.GetType(), typeof(Int64LiteralElement)) & childValues.Count == 2)
{
((Int64LiteralElement)childElement).Negate();
// Add it directly instead of the negate element since it will already be negated
node.AddValue(childElement);
}
else
{
// No so just add a regular negate
this.AddUnaryOp(node, typeof(NegateElement));
}
return node;
}
public override Node ExitMemberExpression(Production node)
{
IList childValues = this.GetChildValues(node);
object first = childValues[0];
if (childValues.Count == 1 && !(first is MemberElement))
{
node.AddValue(first);
}
else
{
InvocationListElement list = new InvocationListElement(childValues, _myServices);
node.AddValue(list);
}
return node;
}
public override Node ExitIndexExpression(Production node)
{
IList childValues = this.GetChildValues(node);
ArgumentList args = new ArgumentList(childValues);
IndexerElement e = new IndexerElement(args);
node.AddValue(e);
return node;
}
public override Node ExitMemberAccessExpression(Production node)
{
node.AddValue(node.GetChildAt(1).GetValue(0));
return node;
}
public override Node ExitSpecialFunctionExpression(Production node)
{
this.AddFirstChildValue(node);
return node;
}
public override Node ExitIfExpression(Production node)
{
IList childValues = this.GetChildValues(node);
ConditionalElement op = new ConditionalElement((ExpressionElement)childValues[0], (ExpressionElement)childValues[1], (ExpressionElement)childValues[2]);
node.AddValue(op);
return node;
}
public override Node ExitInExpression(Production node)
{
IList childValues = this.GetChildValues(node);
if (childValues.Count == 1)
{
this.AddFirstChildValue(node);
return node;
}
ExpressionElement operand = (ExpressionElement)childValues[0];
childValues.RemoveAt(0);
object second = childValues[0];
InElement op = default(InElement);
if ((second) is IList)
{
op = new InElement(operand, (IList)second);
}
else
{
InvocationListElement il = new InvocationListElement(childValues, _myServices);
op = new InElement(operand, il);
}
node.AddValue(op);
return node;
}
public override Node ExitInTargetExpression(Production node)
{
this.AddFirstChildValue(node);
return node;
}
public override Node ExitInListTargetExpression(Production node)
{
IList childValues = this.GetChildValues(node);
node.AddValue(childValues);
return node;
}
public override Node ExitCastExpression(Production node)
{
IList childValues = this.GetChildValues(node);
string[] destTypeParts = (string[])childValues[1];
bool isArray = (bool)childValues[2];
CastElement op = new CastElement((ExpressionElement)childValues[0], destTypeParts, isArray, _myServices);
node.AddValue(op);
return node;
}
public override Node ExitCastTypeExpression(Production node)
{
IList childValues = this.GetChildValues(node);
List<string> parts = new List<string>();
foreach (string part in childValues)
{
parts.Add(part);
}
bool isArray = false;
if (parts[parts.Count - 1] == "[]")
{
isArray = true;
parts.RemoveAt(parts.Count - 1);
}
node.AddValue(parts.ToArray());
node.AddValue(isArray);
return node;
}
public override Node ExitMemberFunctionExpression(Production node)
{
this.AddFirstChildValue(node);
return node;
}
public override Node ExitFieldPropertyExpression(Production node)
{
//string name = ((Token)node.GetChildAt(0))?.Image;
string name = node.GetChildAt(0).GetValue(0).ToString();
IdentifierElement elem = new IdentifierElement(name);
node.AddValue(elem);
return node;
}
public override Node ExitFunctionCallExpression(Production node)
{
IList childValues = this.GetChildValues(node);
string name = (string)childValues[0];
childValues.RemoveAt(0);
ArgumentList args = new ArgumentList(childValues);
FunctionCallElement funcCall = new FunctionCallElement(name, args);
node.AddValue(funcCall);
return node;
}
public override Node ExitArgumentList(Production node)
{
IList childValues = this.GetChildValues(node);
node.AddValues((ArrayList)childValues);
return node;
}
public override Node ExitBasicExpression(Production node)
{
this.AddFirstChildValue(node);
return node;
}
public override Node ExitLiteralExpression(Production node)
{
this.AddFirstChildValue(node);
return node;
}
private void AddFirstChildValue(Production node)
{
node.AddValue(this.GetChildAt(node, 0).Values[0]);
}
private void AddUnaryOp(Production node, Type elementType)
{
IList childValues = this.GetChildValues(node);
if (childValues.Count == 2)
{
UnaryElement element = (UnaryElement)Activator.CreateInstance(elementType);
element.SetChild((ExpressionElement)childValues[1]);
node.AddValue(element);
}
else
{
node.AddValue(childValues[0]);
}
}
private void AddBinaryOp(Production node, Type elementType)
{
IList childValues = this.GetChildValues(node);
if (childValues.Count > 1)
{
BinaryExpressionElement e = BinaryExpressionElement.CreateElement(childValues, elementType);
node.AddValue(e);
}
else if (childValues.Count == 1)
{
node.AddValue(childValues[0]);
}
else
{
Debug.Assert(false, "wrong number of chilren");
}
}
public override Node ExitReal(Token node)
{
string image = node.Image;
LiteralElement element = RealLiteralElement.Create(image, _myServices);
node.AddValue(element);
return node;
}
public override Node ExitInteger(Token node)
{
LiteralElement element = IntegralLiteralElement.Create(node.Image, false, _myInUnaryNegate, _myServices);
node.AddValue(element);
return node;
}
public override Node ExitHexliteral(Token node)
{
LiteralElement element = IntegralLiteralElement.Create(node.Image, true, _myInUnaryNegate, _myServices);
node.AddValue(element);
return node;
}
public override Node ExitBooleanLiteralExpression(Production node)
{
this.AddFirstChildValue(node);
return node;
}
public override Node ExitTrue(Token node)
{
node.AddValue(new BooleanLiteralElement(true));
return node;
}
public override Node ExitFalse(Token node)
{
node.AddValue(new BooleanLiteralElement(false));
return node;
}
public override Node ExitStringLiteral(Token node)
{
string s = this.DoEscapes(node.Image);
StringLiteralElement element = new StringLiteralElement(s);
node.AddValue(element);
return node;
}
public override Node ExitCharLiteral(Token node)
{
string s = this.DoEscapes(node.Image);
node.AddValue(new CharLiteralElement(s[0]));
return node;
}
public override Node ExitDatetime(Token node)
{
ExpressionContext context = (ExpressionContext)_myServices.GetService(typeof(ExpressionContext));
string image = node.Image.Substring(1, node.Image.Length - 2);
DateTimeLiteralElement element = new DateTimeLiteralElement(image, context);
node.AddValue(element);
return node;
}
public override Node ExitTimespan(Token node)
{
string image = node.Image.Substring(2, node.Image.Length - 3);
TimeSpanLiteralElement element = new TimeSpanLiteralElement(image);
node.AddValue(element);
return node;
}
private string DoEscapes(string image)
{
// Remove outer quotes
image = image.Substring(1, image.Length - 2);
image = _myUnicodeEscapeRegex.Replace(image, UnicodeEscapeMatcher);
image = _myRegularEscapeRegex.Replace(image, RegularEscapeMatcher);
return image;
}
private string RegularEscapeMatcher(Match m)
{
string s = m.Value;
// Remove leading \
s = s.Remove(0, 1);
switch (s)
{
case "\\":
case "\"":
case "'":
return s;
case "t":
case "T":
return Convert.ToChar(9).ToString();
case "n":
case "N":
return Convert.ToChar(10).ToString();
case "r":
case "R":
return Convert.ToChar(13).ToString();
default:
Debug.Assert(false, "Unrecognized escape sequence");
return null;
}
}
private string UnicodeEscapeMatcher(Match m)
{
string s = m.Value;
// Remove \u
s = s.Remove(0, 2);
int code = int.Parse(s, NumberStyles.AllowHexSpecifier);
char c = Convert.ToChar(code);
return c.ToString();
}
public override Node ExitIdentifier(Token node)
{
node.AddValue(node.Image);
return node;
}
public override Node ExitNullLiteral(Token node)
{
node.AddValue(new NullLiteralElement());
return node;
}
public override Node ExitArrayBraces(Token node)
{
node.AddValue("[]");
return node;
}
public override Node ExitAdd(Token node)
{
node.AddValue(BinaryArithmeticOperation.Add);
return node;
}
public override Node ExitSub(Token node)
{
node.AddValue(BinaryArithmeticOperation.Subtract);
return node;
}
public override Node ExitMul(Token node)
{
node.AddValue(BinaryArithmeticOperation.Multiply);
return node;
}
public override Node ExitDiv(Token node)
{
node.AddValue(BinaryArithmeticOperation.Divide);
return node;
}
public override Node ExitMod(Token node)
{
node.AddValue(BinaryArithmeticOperation.Mod);
return node;
}
public override Node ExitPower(Token node)
{
node.AddValue(BinaryArithmeticOperation.Power);
return node;
}
public override Node ExitEq(Token node)
{
node.AddValue(LogicalCompareOperation.Equal);
return node;
}
public override Node ExitNe(Token node)
{
node.AddValue(LogicalCompareOperation.NotEqual);
return node;
}
public override Node ExitLt(Token node)
{
node.AddValue(LogicalCompareOperation.LessThan);
return node;
}
public override Node ExitGt(Token node)
{
node.AddValue(LogicalCompareOperation.GreaterThan);
return node;
}
public override Node ExitLte(Token node)
{
node.AddValue(LogicalCompareOperation.LessThanOrEqual);
return node;
}
public override Node ExitGte(Token node)
{
node.AddValue(LogicalCompareOperation.GreaterThanOrEqual);
return node;
}
public override Node ExitAnd(Token node)
{
node.AddValue(AndOrOperation.And);
return node;
}
public override Node ExitOr(Token node)
{
node.AddValue(AndOrOperation.Or);
return node;
}
public override Node ExitXor(Token node)
{
node.AddValue("Xor");
return node;
}
public override Node ExitNot(Token node)
{
node.AddValue(string.Empty);
return node;
}
public override Node ExitLeftShift(Token node)
{
node.AddValue(ShiftOperation.LeftShift);
return node;
}
public override Node ExitRightShift(Token node)
{
node.AddValue(ShiftOperation.RightShift);
return node;
}
public override void Child(Production node, Node child)
{
base.Child(node, child);
_myInUnaryNegate = node.Id == (int)ExpressionConstants.NEGATE_EXPRESSION & child.Id == (int)ExpressionConstants.SUB;
}
}
}

View File

@@ -0,0 +1,49 @@
using Flee.PublicTypes;
namespace Flee.Parsing
{
internal abstract class CustomTokenPattern : TokenPattern
{
protected CustomTokenPattern(int id, string name, PatternType type, string pattern) : base(id, name, type, pattern)
{
}
public void Initialize(int id, string name, PatternType type, string pattern, ExpressionContext context)
{
this.ComputeToken(id, name, type, pattern, context);
}
protected abstract void ComputeToken(int id, string name, PatternType type, string pattern, ExpressionContext context);
}
internal class RealPattern : CustomTokenPattern
{
public RealPattern(int id, string name, PatternType type, string pattern) : base(id, name, type, pattern)
{
}
protected override void ComputeToken(int id, string name, PatternType type, string pattern, ExpressionContext context)
{
ExpressionParserOptions options = context.ParserOptions;
char digitsBeforePattern = (options.RequireDigitsBeforeDecimalPoint ? '+' : '*');
pattern = string.Format(pattern, digitsBeforePattern, options.DecimalSeparator);
this.SetData(id, name, type, pattern);
}
}
internal class ArgumentSeparatorPattern : CustomTokenPattern
{
public ArgumentSeparatorPattern(int id, string name, PatternType type, string pattern) : base(id, name, type, pattern)
{
}
protected override void ComputeToken(int id, string name, PatternType type, string pattern, ExpressionContext context)
{
ExpressionParserOptions options = context.ParserOptions;
this.SetData(id, name, type, options.FunctionArgumentSeparator.ToString());
}
}
}

19
Parsing/Element.cs Normal file
View File

@@ -0,0 +1,19 @@
namespace Flee.Parsing
{
/**
* A regular expression element. This is the common base class for
* all regular expression elements, i.e. the parts of the regular
* expression.
*/
internal abstract class Element : ICloneable
{
public abstract object Clone();
public abstract int Match(Matcher m,
ReaderBuffer buffer,
int start,
int skip);
public abstract void PrintTo(TextWriter output, string indent);
}
}

133
Parsing/Expression.grammar Normal file
View File

@@ -0,0 +1,133 @@
%header%
DESCRIPTION = "A general expression grammar"
AUTHOR = "Eugene Ciloci"
VERSION = "1.0"
DATE = "May 2007"
GRAMMARTYPE = "LL"
CASESENSITIVE = "False"
LICENSE = "This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License
as published by the Free Software Foundation; either version 2.1
of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
MA 02111-1307, USA.
"
COPYRIGHT = "Copyright (c) 2007 Eugene Ciloci"
%tokens%
ADD = "+"
SUB = "-"
MUL = "*"
DIV = "/"
POWER = "^"
MOD = "%"
LEFT_PAREN = "("
RIGHT_PAREN = ")"
LEFT_BRACE = "["
RIGHT_BRACE = "]"
EQ = "="
LT = "<"
GT = ">"
LTE = "<="
GTE = ">="
NE = "<>"
AND = "AND"
OR = "OR"
XOR = "XOR"
NOT = "NOT"
IN = "in"
DOT = "."
ARGUMENT_SEPARATOR = ","
ARRAY_BRACES = "[]"
LEFT_SHIFT = "<<"
RIGHT_SHIFT = ">>"
WHITESPACE = <<\s+>> %ignore%
// Primitives
INTEGER = <<\d+(u|l|ul|lu)?>>
REAL = <<\d*\.\d+([e][+-]\d{1,3})?f?>>
STRING_LITERAL = <<"([^"\r\n\\]|\\u[0-9a-f]{4}|\\[\\"'trn])*">>
CHAR_LITERAL = <<'([^'\r\n\\]|\\u[0-9a-f]{4}|\\[\\"'trn])'>>
TRUE = "True"
FALSE = "False"
IDENTIFIER = <<[a-z_]\w*>>
HEX_LITERAL = <<0x[0-9a-f]+(u|l|ul|lu)?>>
NULL_LITERAL = "null"
TIMESPAN = <<##(\d+\.)?\d{2}:\d{2}(:\d{2}(\.\d{1,7})?)?#>>
DATETIME = <<#[^#]+#>>
// Special Functions
IF = "if"
CAST = "cast"
%productions%
Expression = XorExpression;
XorExpression = OrExpression {XOR OrExpression};
OrExpression = AndExpression {OR AndExpression};
AndExpression = NotExpression {AND NotExpression};
NotExpression = NOT? InExpression;
InExpression = CompareExpression [IN InTargetExpression];
InTargetExpression = FieldPropertyExpression | InListTargetExpression;
InListTargetExpression = "(" ArgumentList ")";
CompareExpression = ShiftExpression {("=" | ">" | "<" | ">=" | "<=" | "<>") ShiftExpression};
ShiftExpression = AdditiveExpression {("<<" | ">>") AdditiveExpression};
AdditiveExpression = MultiplicativeExpression {("+" | "-") MultiplicativeExpression};
MultiplicativeExpression = PowerExpression {("*" | "/" | "%") PowerExpression};
PowerExpression = NegateExpression {"^" NegateExpression};
NegateExpression = "-"? MemberExpression;
MemberExpression = BasicExpression {MemberAccessExpression | IndexExpression};
MemberAccessExpression = "." MemberFunctionExpression;
BasicExpression = LiteralExpression | ExpressionGroup | MemberFunctionExpression | SpecialFunctionExpression;
MemberFunctionExpression = FieldPropertyExpression | FunctionCallExpression;
FieldPropertyExpression = IDENTIFIER;
SpecialFunctionExpression = IfExpression | CastExpression;
IfExpression = IF "(" Expression "," Expression "," Expression ")";
CastExpression = CAST "(" Expression "," CastTypeExpression ")";
CastTypeExpression = IDENTIFIER {"." IDENTIFIER} ARRAY_BRACES?;
IndexExpression = "[" ArgumentList "]";
FunctionCallExpression = IDENTIFIER "(" ArgumentList? ")";
ArgumentList = Expression {"," Expression};
LiteralExpression = INTEGER | REAL | STRING_LITERAL | BooleanLiteralExpression | HEX_LITERAL | CHAR_LITERAL | NULL_LITERAL | DATETIME | TIMESPAN;
BooleanLiteralExpression = TRUE | FALSE;
ExpressionGroup = "(" Expression ")";

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,78 @@
namespace Flee.Parsing
{
///<remarks>
/// An enumeration with token and production node
///constants.</remarks>
internal enum ExpressionConstants
{
ADD = 1001,
SUB = 1002,
MUL = 1003,
DIV = 1004,
POWER = 1005,
MOD = 1006,
LEFT_PAREN = 1007,
RIGHT_PAREN = 1008,
LEFT_BRACE = 1009,
RIGHT_BRACE = 1010,
EQ = 1011,
LT = 1012,
GT = 1013,
LTE = 1014,
GTE = 1015,
NE = 1016,
AND = 1017,
OR = 1018,
XOR = 1019,
NOT = 1020,
IN = 1021,
DOT = 1022,
ARGUMENT_SEPARATOR = 1023,
ARRAY_BRACES = 1024,
LEFT_SHIFT = 1025,
RIGHT_SHIFT = 1026,
WHITESPACE = 1027,
INTEGER = 1028,
REAL = 1029,
STRING_LITERAL = 1030,
CHAR_LITERAL = 1031,
TRUE = 1032,
FALSE = 1033,
NULL_LITERAL = 1034,
HEX_LITERAL = 1035,
IDENTIFIER = 1036,
TIMESPAN = 1037,
DATETIME = 1038,
IF = 1039,
CAST = 1040,
EXPRESSION = 2001,
XOR_EXPRESSION = 2002,
OR_EXPRESSION = 2003,
AND_EXPRESSION = 2004,
NOT_EXPRESSION = 2005,
IN_EXPRESSION = 2006,
IN_TARGET_EXPRESSION = 2007,
IN_LIST_TARGET_EXPRESSION = 2008,
COMPARE_EXPRESSION = 2009,
SHIFT_EXPRESSION = 2010,
ADDITIVE_EXPRESSION = 2011,
MULTIPLICATIVE_EXPRESSION = 2012,
POWER_EXPRESSION = 2013,
NEGATE_EXPRESSION = 2014,
MEMBER_EXPRESSION = 2015,
MEMBER_ACCESS_EXPRESSION = 2016,
BASIC_EXPRESSION = 2017,
MEMBER_FUNCTION_EXPRESSION = 2018,
FIELD_PROPERTY_EXPRESSION = 2019,
SPECIAL_FUNCTION_EXPRESSION = 2020,
IF_EXPRESSION = 2021,
CAST_EXPRESSION = 2022,
CAST_TYPE_EXPRESSION = 2023,
INDEX_EXPRESSION = 2024,
FUNCTION_CALL_EXPRESSION = 2025,
ARGUMENT_LIST = 2026,
LITERAL_EXPRESSION = 2027,
BOOLEAN_LITERAL_EXPRESSION = 2028,
EXPRESSION_GROUP = 2029
}
}

460
Parsing/ExpressionParser.cs Normal file
View File

@@ -0,0 +1,460 @@
using Flee.PublicTypes;
namespace Flee.Parsing
{
/// <summary>
/// A token stream parser.
/// </summary>
internal class ExpressionParser : StackParser
{
private enum SynteticPatterns
{
SUBPRODUCTION_1 = 3001,
SUBPRODUCTION_2 = 3002,
SUBPRODUCTION_3 = 3003,
SUBPRODUCTION_4 = 3004,
SUBPRODUCTION_5 = 3005,
SUBPRODUCTION_6 = 3006,
SUBPRODUCTION_7 = 3007,
SUBPRODUCTION_8 = 3008,
SUBPRODUCTION_9 = 3009,
SUBPRODUCTION_10 = 3010,
SUBPRODUCTION_11 = 3011,
SUBPRODUCTION_12 = 3012,
SUBPRODUCTION_13 = 3013,
SUBPRODUCTION_14 = 3014,
SUBPRODUCTION_15 = 3015,
SUBPRODUCTION_16 = 3016
}
public ExpressionParser(TextReader input, Analyzer analyzer, ExpressionContext context) : base(new ExpressionTokenizer(input, context), analyzer)
{
CreatePatterns();
}
public ExpressionParser(TextReader input) : base(new ExpressionTokenizer(input))
{
CreatePatterns();
}
public ExpressionParser(TextReader input, Analyzer analyzer) : base(new ExpressionTokenizer(input), analyzer)
{
CreatePatterns();
}
private void CreatePatterns()
{
ProductionPattern pattern = default(ProductionPattern);
ProductionPatternAlternative alt = default(ProductionPatternAlternative);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.EXPRESSION), "Expression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.XOR_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.XOR_EXPRESSION), "XorExpression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.OR_EXPRESSION), 1, 1);
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_1), 0, -1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.OR_EXPRESSION), "OrExpression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.AND_EXPRESSION), 1, 1);
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_2), 0, -1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.AND_EXPRESSION), "AndExpression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.NOT_EXPRESSION), 1, 1);
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_3), 0, -1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.NOT_EXPRESSION), "NotExpression");
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.NOT), 0, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.IN_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.IN_EXPRESSION), "InExpression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.COMPARE_EXPRESSION), 1, 1);
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_4), 0, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.IN_TARGET_EXPRESSION), "InTargetExpression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.FIELD_PROPERTY_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.IN_LIST_TARGET_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.IN_LIST_TARGET_EXPRESSION), "InListTargetExpression");
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.LEFT_PAREN), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.ARGUMENT_LIST), 1, 1);
alt.AddToken(Convert.ToInt32(ExpressionConstants.RIGHT_PAREN), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.COMPARE_EXPRESSION), "CompareExpression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.SHIFT_EXPRESSION), 1, 1);
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_6), 0, -1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.SHIFT_EXPRESSION), "ShiftExpression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.ADDITIVE_EXPRESSION), 1, 1);
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_8), 0, -1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.ADDITIVE_EXPRESSION), "AdditiveExpression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.MULTIPLICATIVE_EXPRESSION), 1, 1);
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_10), 0, -1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.MULTIPLICATIVE_EXPRESSION), "MultiplicativeExpression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.POWER_EXPRESSION), 1, 1);
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_12), 0, -1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.POWER_EXPRESSION), "PowerExpression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.NEGATE_EXPRESSION), 1, 1);
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_13), 0, -1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.NEGATE_EXPRESSION), "NegateExpression");
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.SUB), 0, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.MEMBER_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.MEMBER_EXPRESSION), "MemberExpression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.BASIC_EXPRESSION), 1, 1);
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_14), 0, -1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.MEMBER_ACCESS_EXPRESSION), "MemberAccessExpression");
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.DOT), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.MEMBER_FUNCTION_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.BASIC_EXPRESSION), "BasicExpression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.LITERAL_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION_GROUP), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.MEMBER_FUNCTION_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.SPECIAL_FUNCTION_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.MEMBER_FUNCTION_EXPRESSION), "MemberFunctionExpression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.FIELD_PROPERTY_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.FUNCTION_CALL_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.FIELD_PROPERTY_EXPRESSION), "FieldPropertyExpression");
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.IDENTIFIER), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.SPECIAL_FUNCTION_EXPRESSION), "SpecialFunctionExpression");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.IF_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.CAST_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.IF_EXPRESSION), "IfExpression");
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.IF), 1, 1);
alt.AddToken(Convert.ToInt32(ExpressionConstants.LEFT_PAREN), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION), 1, 1);
alt.AddToken(Convert.ToInt32(ExpressionConstants.ARGUMENT_SEPARATOR), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION), 1, 1);
alt.AddToken(Convert.ToInt32(ExpressionConstants.ARGUMENT_SEPARATOR), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION), 1, 1);
alt.AddToken(Convert.ToInt32(ExpressionConstants.RIGHT_PAREN), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.CAST_EXPRESSION), "CastExpression");
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.CAST), 1, 1);
alt.AddToken(Convert.ToInt32(ExpressionConstants.LEFT_PAREN), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION), 1, 1);
alt.AddToken(Convert.ToInt32(ExpressionConstants.ARGUMENT_SEPARATOR), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.CAST_TYPE_EXPRESSION), 1, 1);
alt.AddToken(Convert.ToInt32(ExpressionConstants.RIGHT_PAREN), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.CAST_TYPE_EXPRESSION), "CastTypeExpression");
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.IDENTIFIER), 1, 1);
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_15), 0, -1);
alt.AddToken(Convert.ToInt32(ExpressionConstants.ARRAY_BRACES), 0, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.INDEX_EXPRESSION), "IndexExpression");
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.LEFT_BRACE), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.ARGUMENT_LIST), 1, 1);
alt.AddToken(Convert.ToInt32(ExpressionConstants.RIGHT_BRACE), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.FUNCTION_CALL_EXPRESSION), "FunctionCallExpression");
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.IDENTIFIER), 1, 1);
alt.AddToken(Convert.ToInt32(ExpressionConstants.LEFT_PAREN), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.ARGUMENT_LIST), 0, 1);
alt.AddToken(Convert.ToInt32(ExpressionConstants.RIGHT_PAREN), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.ARGUMENT_LIST), "ArgumentList");
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION), 1, 1);
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_16), 0, -1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.LITERAL_EXPRESSION), "LiteralExpression");
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.INTEGER), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.REAL), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.STRING_LITERAL), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.BOOLEAN_LITERAL_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.HEX_LITERAL), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.CHAR_LITERAL), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.NULL_LITERAL), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.DATETIME), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.TIMESPAN), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.BOOLEAN_LITERAL_EXPRESSION), "BooleanLiteralExpression");
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.TRUE), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.FALSE), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(ExpressionConstants.EXPRESSION_GROUP), "ExpressionGroup");
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.LEFT_PAREN), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION), 1, 1);
alt.AddToken(Convert.ToInt32(ExpressionConstants.RIGHT_PAREN), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_1), "Subproduction1");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.XOR), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.OR_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_2), "Subproduction2");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.OR), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.AND_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_3), "Subproduction3");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.AND), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.NOT_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_4), "Subproduction4");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.IN), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.IN_TARGET_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_5), "Subproduction5");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.EQ), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.GT), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.LT), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.GTE), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.LTE), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.NE), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_6), "Subproduction6");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_5), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.SHIFT_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_7), "Subproduction7");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.LEFT_SHIFT), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.RIGHT_SHIFT), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_8), "Subproduction8");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_7), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.ADDITIVE_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_9), "Subproduction9");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.ADD), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.SUB), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_10), "Subproduction10");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_9), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.MULTIPLICATIVE_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_11), "Subproduction11");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.MUL), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.DIV), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.MOD), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_12), "Subproduction12");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_11), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.POWER_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_13), "Subproduction13");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.POWER), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.NEGATE_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_14), "Subproduction14");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.MEMBER_ACCESS_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
alt = new ProductionPatternAlternative();
alt.AddProduction(Convert.ToInt32(ExpressionConstants.INDEX_EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_15), "Subproduction15");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.DOT), 1, 1);
alt.AddToken(Convert.ToInt32(ExpressionConstants.IDENTIFIER), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
pattern = new ProductionPattern(Convert.ToInt32(SynteticPatterns.SUBPRODUCTION_16), "Subproduction16");
pattern.Synthetic = true;
alt = new ProductionPatternAlternative();
alt.AddToken(Convert.ToInt32(ExpressionConstants.ARGUMENT_SEPARATOR), 1, 1);
alt.AddProduction(Convert.ToInt32(ExpressionConstants.EXPRESSION), 1, 1);
pattern.AddAlternative(alt);
AddPattern(pattern);
}
}
}

View File

@@ -0,0 +1,153 @@
using Flee.PublicTypes;
namespace Flee.Parsing
{
/// <summary>
/// A character stream tokenizer.
/// </summary>
internal class ExpressionTokenizer : Tokenizer
{
private readonly ExpressionContext _myContext;
public ExpressionTokenizer(TextReader input, ExpressionContext context) : base(input, true)
{
_myContext = context;
CreatePatterns();
}
public ExpressionTokenizer(TextReader input) : base(input, true)
{
CreatePatterns();
}
private void CreatePatterns()
{
TokenPattern pattern = default(TokenPattern);
CustomTokenPattern customPattern = default(CustomTokenPattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.ADD), "ADD", TokenPattern.PatternType.STRING, "+");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.SUB), "SUB", TokenPattern.PatternType.STRING, "-");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.MUL), "MUL", TokenPattern.PatternType.STRING, "*");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.DIV), "DIV", TokenPattern.PatternType.STRING, "/");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.POWER), "POWER", TokenPattern.PatternType.STRING, "^");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.MOD), "MOD", TokenPattern.PatternType.STRING, "%");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.LEFT_PAREN), "LEFT_PAREN", TokenPattern.PatternType.STRING, "(");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.RIGHT_PAREN), "RIGHT_PAREN", TokenPattern.PatternType.STRING, ")");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.LEFT_BRACE), "LEFT_BRACE", TokenPattern.PatternType.STRING, "[");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.RIGHT_BRACE), "RIGHT_BRACE", TokenPattern.PatternType.STRING, "]");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.EQ), "EQ", TokenPattern.PatternType.STRING, "=");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.LT), "LT", TokenPattern.PatternType.STRING, "<");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.GT), "GT", TokenPattern.PatternType.STRING, ">");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.LTE), "LTE", TokenPattern.PatternType.STRING, "<=");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.GTE), "GTE", TokenPattern.PatternType.STRING, ">=");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.NE), "NE", TokenPattern.PatternType.STRING, "<>");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.AND), "AND", TokenPattern.PatternType.STRING, "AND");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.OR), "OR", TokenPattern.PatternType.STRING, "OR");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.XOR), "XOR", TokenPattern.PatternType.STRING, "XOR");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.NOT), "NOT", TokenPattern.PatternType.STRING, "NOT");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.IN), "IN", TokenPattern.PatternType.STRING, "in");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.DOT), "DOT", TokenPattern.PatternType.STRING, ".");
AddPattern(pattern);
customPattern = new ArgumentSeparatorPattern(Convert.ToInt32(ExpressionConstants.ARGUMENT_SEPARATOR), "ARGUMENT_SEPARATOR", TokenPattern.PatternType.STRING, ",");
customPattern.Initialize(Convert.ToInt32(ExpressionConstants.ARGUMENT_SEPARATOR), "ARGUMENT_SEPARATOR", TokenPattern.PatternType.STRING, ",", _myContext);
AddPattern(customPattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.ARRAY_BRACES), "ARRAY_BRACES", TokenPattern.PatternType.STRING, "[]");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.LEFT_SHIFT), "LEFT_SHIFT", TokenPattern.PatternType.STRING, "<<");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.RIGHT_SHIFT), "RIGHT_SHIFT", TokenPattern.PatternType.STRING, ">>");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.WHITESPACE), "WHITESPACE", TokenPattern.PatternType.REGEXP, "\\s+");
pattern.Ignore = true;
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.INTEGER), "INTEGER", TokenPattern.PatternType.REGEXP, "\\d+(u|l|ul|lu|f|m)?");
AddPattern(pattern);
customPattern = new RealPattern(Convert.ToInt32(ExpressionConstants.REAL), "REAL", TokenPattern.PatternType.REGEXP, "\\d{0}\\{1}\\d+([e][+-]\\d{{1,3}})?(d|f|m)?");
customPattern.Initialize(Convert.ToInt32(ExpressionConstants.REAL), "REAL", TokenPattern.PatternType.REGEXP, "\\d{0}\\{1}\\d+([e][+-]\\d{{1,3}})?(d|f|m)?", _myContext);
AddPattern(customPattern, false);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.STRING_LITERAL), "STRING_LITERAL", TokenPattern.PatternType.REGEXP, "\"([^\"\\r\\n\\\\]|\\\\u[0-9a-f]{4}|\\\\[\\\\\"'trn])*\"");
AddPattern(pattern, false);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.CHAR_LITERAL), "CHAR_LITERAL", TokenPattern.PatternType.REGEXP, "'([^'\\r\\n\\\\]|\\\\u[0-9a-f]{4}|\\\\[\\\\\"'trn])'");
AddPattern(pattern, false);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.TRUE), "TRUE", TokenPattern.PatternType.STRING, "True");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.FALSE), "FALSE", TokenPattern.PatternType.STRING, "False");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.IDENTIFIER), "IDENTIFIER", TokenPattern.PatternType.REGEXP, "[a-z_]\\w*");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.HEX_LITERAL), "HEX_LITERAL", TokenPattern.PatternType.REGEXP, "0x[0-9a-f]+(u|l|ul|lu)?");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.NULL_LITERAL), "NULL_LITERAL", TokenPattern.PatternType.STRING, "null");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.TIMESPAN), "TIMESPAN", TokenPattern.PatternType.REGEXP, "##(\\d+\\.)?\\d{2}:\\d{2}(:\\d{2}(\\.\\d{1,7})?)?#");
AddPattern(pattern, false);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.DATETIME), "DATETIME", TokenPattern.PatternType.REGEXP, "#[^#]+#");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.IF), "IF", TokenPattern.PatternType.STRING, "if");
AddPattern(pattern);
pattern = new TokenPattern(Convert.ToInt32(ExpressionConstants.CAST), "CAST", TokenPattern.PatternType.STRING, "cast");
AddPattern(pattern);
}
}
}

226
Parsing/LookAheadReader.cs Normal file
View File

@@ -0,0 +1,226 @@
namespace Flee.Parsing
{
// * A look-ahead character stream reader. This class provides the
// * functionalities of a buffered line-number reader, but with the
// * additional possibility of peeking an unlimited number of
// * characters ahead. When looking further and further ahead in the
// * character stream, the buffer is continously enlarged to contain
// * all the required characters from the current position an
// * onwards. This means that looking more characters ahead requires
// * more memory, and thus becomes unviable in the end.
internal class LookAheadReader : TextReader
{
private const int StreamBlockSize = 4096;
private const int BufferBlockSize = 1024;
private char[] _buffer = new char[StreamBlockSize];
private int _pos;
private int _length;
private TextReader _input = null;
private int _line = 1;
private int _column = 1;
public LookAheadReader(TextReader input) : base()
{
this._input = input;
}
public int LineNumber => _line;
public int ColumnNumber => _column;
public override int Read()
{
ReadAhead(1);
if (_pos >= _length)
{
return -1;
}
else
{
UpdateLineColumnNumbers(1);
return Convert.ToInt32(_buffer[System.Math.Max(System.Threading.Interlocked.Increment(ref _pos), _pos - 1)]);
}
}
public override int Read(char[] cbuf, int off, int len)
{
ReadAhead(len);
if (_pos >= _length)
{
return -1;
}
else
{
var count = _length - _pos;
if (count > len)
{
count = len;
}
UpdateLineColumnNumbers(count);
Array.Copy(_buffer, _pos, cbuf, off, count);
_pos += count;
return count;
}
}
public string ReadString(int len)
{
ReadAhead(len);
if (_pos >= _length)
{
return null;
}
else
{
var count = _length - _pos;
if (count > len)
{
count = len;
}
UpdateLineColumnNumbers(count);
var result = new string(_buffer, _pos, count);
_pos += count;
return result;
}
}
public override int Peek()
{
return Peek(0);
}
public int Peek(int off)
{
ReadAhead(off + 1);
if (_pos + off >= _length)
{
return -1;
}
else
{
return Convert.ToInt32(_buffer[_pos + off]);
}
}
public string PeekString(int off, int len)
{
ReadAhead(off + len + 1);
if (_pos + off >= _length)
{
return null;
}
else
{
var count = _length - (_pos + off);
if (count > len)
{
count = len;
}
return new string(_buffer, _pos + off, count);
}
}
public override void Close()
{
_buffer = null;
_pos = 0;
_length = 0;
if (_input != null)
{
_input.Close();
_input = null;
}
}
private void ReadAhead(int offset)
{
int size = 0;
int readSize = 0;
// Check for end of stream or already read characters
if (_input == null || _pos + offset < _length)
{
return;
}
// Remove old characters from buffer
if (_pos > BufferBlockSize)
{
Array.Copy(_buffer, _pos, _buffer, 0, _length - _pos);
_length -= _pos;
_pos = 0;
}
// Calculate number of characters to read
size = _pos + offset - _length + 1;
if (size % StreamBlockSize != 0)
{
size = (size / StreamBlockSize) * StreamBlockSize;
size += StreamBlockSize;
}
EnsureBufferCapacity(_length + size);
// Read characters
try
{
readSize = _input.Read(_buffer, _length, size);
}
catch (IOException e)
{
_input = null;
throw;
}
// Append characters to buffer
if (readSize > 0)
{
_length += readSize;
}
if (readSize < size)
{
try
{
_input.Close();
}
finally
{
_input = null;
}
}
}
private void EnsureBufferCapacity(int size)
{
char[] newbuf = null;
if (_buffer.Length >= size)
{
return;
}
if (size % BufferBlockSize != 0)
{
size = (size / BufferBlockSize) * BufferBlockSize;
size += BufferBlockSize;
}
newbuf = new char[size];
Array.Copy(_buffer, 0, newbuf, 0, _length);
_buffer = newbuf;
}
private void UpdateLineColumnNumbers(int offset)
{
for (int i = 0; i <= offset - 1; i++)
{
if (_buffer.Contains(_buffer[_pos + i]))
{
_line += 1;
_column = 1;
}
else
{
_column += 1;
}
}
}
}
}

589
Parsing/LookAheadSet.cs Normal file
View File

@@ -0,0 +1,589 @@
using System.Collections;
using System.Text;
namespace Flee.Parsing
{
/*
* A token look-ahead set. This class contains a set of token id
* sequences. All sequences in the set are limited in length, so
* that no single sequence is longer than a maximum value. This
* class also filters out duplicates. Each token sequence also
* contains a repeat flag, allowing the look-ahead set to contain
* information about possible infinite repetitions of certain
* sequences. That information is important when conflicts arise
* between two look-ahead sets, as such a conflict cannot be
* resolved if the conflicting sequences can be repeated (would
* cause infinite loop).
*/
internal class LookAheadSet
{
private readonly ArrayList _elements = new ArrayList();
private readonly int _maxLength;
public LookAheadSet(int maxLength)
{
this._maxLength = maxLength;
}
public LookAheadSet(int maxLength, LookAheadSet set)
: this(maxLength)
{
AddAll(set);
}
public int Size()
{
return _elements.Count;
}
public int GetMinLength()
{
int min = -1;
for (int i = 0; i < _elements.Count; i++)
{
var seq = (Sequence)_elements[i];
if (min < 0 || seq.Length() < min)
{
min = seq.Length();
}
}
return (min < 0) ? 0 : min;
}
public int GetMaxLength()
{
int max = 0;
for (int i = 0; i < _elements.Count; i++)
{
var seq = (Sequence)_elements[i];
if (seq.Length() > max)
{
max = seq.Length();
}
}
return max;
}
public int[] GetInitialTokens()
{
ArrayList list = new ArrayList();
int i;
for (i = 0; i < _elements.Count; i++)
{
var token = ((Sequence)_elements[i]).GetToken(0);
if (token != null && !list.Contains(token))
{
list.Add(token);
}
}
var result = new int[list.Count];
for (i = 0; i < list.Count; i++)
{
result[i] = (int)list[i];
}
return result;
}
public bool IsRepetitive()
{
for (int i = 0; i < _elements.Count; i++)
{
var seq = (Sequence)_elements[i];
if (seq.IsRepetitive())
{
return true;
}
}
return false;
}
public bool IsNext(Parser parser)
{
for (int i = 0; i < _elements.Count; i++)
{
var seq = (Sequence)_elements[i];
if (seq.IsNext(parser))
{
return true;
}
}
return false;
}
public bool IsNext(Parser parser, int length)
{
for (int i = 0; i < _elements.Count; i++)
{
var seq = (Sequence)_elements[i];
if (seq.IsNext(parser, length))
{
return true;
}
}
return false;
}
public bool IsOverlap(LookAheadSet set)
{
for (int i = 0; i < _elements.Count; i++)
{
if (set.IsOverlap((Sequence)_elements[i]))
{
return true;
}
}
return false;
}
private bool IsOverlap(Sequence seq)
{
for (int i = 0; i < _elements.Count; i++)
{
var elem = (Sequence)_elements[i];
if (seq.StartsWith(elem) || elem.StartsWith(seq))
{
return true;
}
}
return false;
}
private bool Contains(Sequence elem)
{
return FindSequence(elem) != null;
}
public bool Intersects(LookAheadSet set)
{
for (int i = 0; i < _elements.Count; i++)
{
if (set.Contains((Sequence)_elements[i]))
{
return true;
}
}
return false;
}
private Sequence FindSequence(Sequence elem)
{
for (int i = 0; i < _elements.Count; i++)
{
if (_elements[i].Equals(elem))
{
return (Sequence)_elements[i];
}
}
return null;
}
private void Add(Sequence seq)
{
if (seq.Length() > _maxLength)
{
seq = new Sequence(_maxLength, seq);
}
if (!Contains(seq))
{
_elements.Add(seq);
}
}
public void Add(int token)
{
Add(new Sequence(false, token));
}
public void AddAll(LookAheadSet set)
{
for (int i = 0; i < set._elements.Count; i++)
{
Add((Sequence)set._elements[i]);
}
}
public void AddEmpty()
{
Add(new Sequence());
}
private void Remove(Sequence seq)
{
_elements.Remove(seq);
}
public void RemoveAll(LookAheadSet set)
{
for (int i = 0; i < set._elements.Count; i++)
{
Remove((Sequence)set._elements[i]);
}
}
public LookAheadSet CreateNextSet(int token)
{
LookAheadSet result = new LookAheadSet(_maxLength - 1);
for (int i = 0; i < _elements.Count; i++)
{
var seq = (Sequence)_elements[i];
var value = seq.GetToken(0);
if (value != null && token == (int)value)
{
result.Add(seq.Subsequence(1));
}
}
return result;
}
public LookAheadSet CreateIntersection(LookAheadSet set)
{
LookAheadSet result = new LookAheadSet(_maxLength);
for (int i = 0; i < _elements.Count; i++)
{
var seq1 = (Sequence)_elements[i];
var seq2 = set.FindSequence(seq1);
if (seq2 != null && seq1.IsRepetitive())
{
result.Add(seq2);
}
else if (seq2 != null)
{
result.Add(seq1);
}
}
return result;
}
public LookAheadSet CreateCombination(LookAheadSet set)
{
LookAheadSet result = new LookAheadSet(_maxLength);
// Handle special cases
if (this.Size() <= 0)
{
return set;
}
else if (set.Size() <= 0)
{
return this;
}
// Create combinations
for (int i = 0; i < _elements.Count; i++)
{
var first = (Sequence)_elements[i];
if (first.Length() >= _maxLength)
{
result.Add(first);
}
else if (first.Length() <= 0)
{
result.AddAll(set);
}
else
{
for (int j = 0; j < set._elements.Count; j++)
{
var second = (Sequence)set._elements[j];
result.Add(first.Concat(_maxLength, second));
}
}
}
return result;
}
public LookAheadSet CreateOverlaps(LookAheadSet set)
{
LookAheadSet result = new LookAheadSet(_maxLength);
for (int i = 0; i < _elements.Count; i++)
{
var seq = (Sequence)_elements[i];
if (set.IsOverlap(seq))
{
result.Add(seq);
}
}
return result;
}
public LookAheadSet CreateFilter(LookAheadSet set)
{
LookAheadSet result = new LookAheadSet(_maxLength);
// Handle special cases
if (this.Size() <= 0 || set.Size() <= 0)
{
return this;
}
// Create combinations
for (int i = 0; i < _elements.Count; i++)
{
var first = (Sequence)_elements[i];
for (int j = 0; j < set._elements.Count; j++)
{
var second = (Sequence)set._elements[j];
if (first.StartsWith(second))
{
result.Add(first.Subsequence(second.Length()));
}
}
}
return result;
}
public LookAheadSet CreateRepetitive()
{
LookAheadSet result = new LookAheadSet(_maxLength);
for (int i = 0; i < _elements.Count; i++)
{
var seq = (Sequence)_elements[i];
if (seq.IsRepetitive())
{
result.Add(seq);
}
else
{
result.Add(new Sequence(true, seq));
}
}
return result;
}
public override string ToString()
{
return ToString(null);
}
public string ToString(Tokenizer tokenizer)
{
StringBuilder buffer = new StringBuilder();
buffer.Append("{");
for (int i = 0; i < _elements.Count; i++)
{
var seq = (Sequence)_elements[i];
buffer.Append("\n ");
buffer.Append(seq.ToString(tokenizer));
}
buffer.Append("\n}");
return buffer.ToString();
}
private class Sequence
{
private bool _repeat;
private readonly ArrayList _tokens;
public Sequence()
{
this._repeat = false;
this._tokens = new ArrayList(0);
}
public Sequence(bool repeat, int token)
{
_repeat = false;
_tokens = new ArrayList(1);
_tokens.Add(token);
}
public Sequence(int length, Sequence seq)
{
this._repeat = seq._repeat;
this._tokens = new ArrayList(length);
if (seq.Length() < length)
{
length = seq.Length();
}
for (int i = 0; i < length; i++)
{
_tokens.Add(seq._tokens[i]);
}
}
public Sequence(bool repeat, Sequence seq)
{
this._repeat = repeat;
this._tokens = seq._tokens;
}
public int Length()
{
return _tokens.Count;
}
public object GetToken(int pos)
{
if (pos >= 0 && pos < _tokens.Count)
{
return _tokens[pos];
}
else
{
return null;
}
}
public override bool Equals(object obj)
{
if (obj is Sequence)
{
return Equals((Sequence)obj);
}
else
{
return false;
}
}
public bool Equals(Sequence seq)
{
if (_tokens.Count != seq._tokens.Count)
{
return false;
}
for (int i = 0; i < _tokens.Count; i++)
{
if (!_tokens[i].Equals(seq._tokens[i]))
{
return false;
}
}
return true;
}
public override int GetHashCode()
{
return _tokens.Count.GetHashCode();
}
public bool StartsWith(Sequence seq)
{
if (Length() < seq.Length())
{
return false;
}
for (int i = 0; i < seq._tokens.Count; i++)
{
if (!_tokens[i].Equals(seq._tokens[i]))
{
return false;
}
}
return true;
}
public bool IsRepetitive()
{
return _repeat;
}
public bool IsNext(Parser parser)
{
for (int i = 0; i < _tokens.Count; i++)
{
var id = (int)_tokens[i];
var token = parser.PeekToken(i);
if (token == null || token.Id != id)
{
return false;
}
}
return true;
}
public bool IsNext(Parser parser, int length)
{
if (length > _tokens.Count)
{
length = _tokens.Count;
}
for (int i = 0; i < length; i++)
{
var id = (int)_tokens[i];
var token = parser.PeekToken(i);
if (token == null || token.Id != id)
{
return false;
}
}
return true;
}
public override string ToString()
{
return ToString(null);
}
public string ToString(Tokenizer tokenizer)
{
StringBuilder buffer = new StringBuilder();
if (tokenizer == null)
{
buffer.Append(_tokens.ToString());
}
else
{
buffer.Append("[");
for (int i = 0; i < _tokens.Count; i++)
{
var id = (int)_tokens[i];
var str = tokenizer.GetPatternDescription(id);
if (i > 0)
{
buffer.Append(" ");
}
buffer.Append(str);
}
buffer.Append("]");
}
if (_repeat)
{
buffer.Append(" *");
}
return buffer.ToString();
}
public Sequence Concat(int length, Sequence seq)
{
Sequence res = new Sequence(length, this);
if (seq._repeat)
{
res._repeat = true;
}
length -= this.Length();
if (length > seq.Length())
{
res._tokens.AddRange(seq._tokens);
}
else
{
for (int i = 0; i < length; i++)
{
res._tokens.Add(seq._tokens[i]);
}
}
return res;
}
public Sequence Subsequence(int start)
{
Sequence res = new Sequence(Length(), this);
while (start > 0 && res._tokens.Count > 0)
{
res._tokens.RemoveAt(0);
start--;
}
return res;
}
}
}
}

107
Parsing/Matcher.cs Normal file
View File

@@ -0,0 +1,107 @@
namespace Flee.Parsing
{
/**
* A regular expression string matcher. This class handles the
* matching of a specific string with a specific regular
* expression. It contains state information about the matching
* process, as for example the position of the latest match, and a
* number of flags that were set. This class is not thread-safe.
*/
internal class Matcher
{
private readonly Element _element;
private ReaderBuffer _buffer;
private readonly bool _ignoreCase;
private int _start;
private int _length;
private bool _endOfString;
internal Matcher(Element e, ReaderBuffer buffer, bool ignoreCase)
{
this._element = e;
this._buffer = buffer;
this._ignoreCase = ignoreCase;
this._start = 0;
Reset();
}
public bool IsCaseInsensitive()
{
return _ignoreCase;
}
public void Reset()
{
_length = -1;
_endOfString = false;
}
public void Reset(string str)
{
Reset(new ReaderBuffer(new StringReader(str)));
}
public void Reset(ReaderBuffer buffer)
{
this._buffer = buffer;
Reset();
}
public int Start()
{
return _start;
}
public int End()
{
if (_length > 0)
{
return _start + _length;
}
else
{
return _start;
}
}
public int Length()
{
return _length;
}
public bool HasReadEndOfString()
{
return _endOfString;
}
public bool MatchFromBeginning()
{
return MatchFrom(0);
}
public bool MatchFrom(int pos)
{
Reset();
_start = pos;
_length = _element.Match(this, _buffer, _start, 0);
return _length >= 0;
}
public override string ToString()
{
if (_length <= 0)
{
return "";
}
else
{
return _buffer.Substring(_buffer.Position, _length);
}
}
internal void SetReadEndOfString()
{
_endOfString = true;
}
}
}

240
Parsing/Node.cs Normal file
View File

@@ -0,0 +1,240 @@
using System.Collections;
namespace Flee.Parsing
{
/**
* An abstract parse tree node. This class is inherited by all
* nodes in the parse tree, i.e. by the token and production
* classes.
*/
internal abstract class Node
{
private Node _parent;
private ArrayList _values;
internal virtual bool IsHidden()
{
return false;
}
public abstract int Id
{
get;
}
public virtual int GetId()
{
return Id;
}
public abstract string Name
{
get;
}
public virtual string GetName()
{
return Name;
}
public virtual int StartLine
{
get
{
for (int i = 0; i < Count; i++)
{
var line = this[i].StartLine;
if (line >= 0)
{
return line;
}
}
return -1;
}
}
public virtual int GetStartLine()
{
return StartLine;
}
public virtual int StartColumn
{
get
{
for (int i = 0; i < Count; i++)
{
var col = this[i].StartColumn;
if (col >= 0)
{
return col;
}
}
return -1;
}
}
public virtual int GetStartColumn()
{
return StartColumn;
}
public virtual int EndLine
{
get
{
for (int i = Count - 1; i >= 0; i--)
{
var line = this[i].EndLine;
if (line >= 0)
{
return line;
}
}
return -1;
}
}
public virtual int GetEndLine()
{
return EndLine;
}
public virtual int EndColumn
{
get
{
int col;
for (int i = Count - 1; i >= 0; i--)
{
col = this[i].EndColumn;
if (col >= 0)
{
return col;
}
}
return -1;
}
}
public virtual int GetEndColumn()
{
return EndColumn;
}
public Node Parent => _parent;
public Node GetParent()
{
return Parent;
}
internal void SetParent(Node parent)
{
this._parent = parent;
}
public virtual int Count => 0;
public virtual int GetChildCount()
{
return Count;
}
public int GetDescendantCount()
{
int count = 0;
for (int i = 0; i < Count; i++)
{
count += 1 + this[i].GetDescendantCount();
}
return count;
}
public virtual Node this[int index] => null;
public virtual Node GetChildAt(int index)
{
return this[index];
}
public ArrayList Values
{
get
{
if (_values == null)
{
_values = new ArrayList();
}
return _values;
}
set
{
this._values = value;
}
}
public int GetValueCount()
{
if (_values == null)
{
return 0;
}
else
{
return _values.Count;
}
}
public object GetValue(int pos)
{
return Values[pos];
}
public ArrayList GetAllValues()
{
return _values;
}
public void AddValue(object value)
{
if (value != null)
{
Values.Add(value);
}
}
public void AddValues(ArrayList values)
{
if (values != null)
{
Values.AddRange(values);
}
}
public void RemoveAllValues()
{
_values = null;
}
public void PrintTo(TextWriter output)
{
PrintTo(output, "");
output.Flush();
}
private void PrintTo(TextWriter output, string indent)
{
output.WriteLine(indent + ToString());
indent = indent + " ";
for (int i = 0; i < Count; i++)
{
this[i].PrintTo(output, indent);
}
}
}
}

250
Parsing/ParseException.cs Normal file
View File

@@ -0,0 +1,250 @@
using System.Collections;
using System.Text;
namespace Flee.Parsing
{
/**
* A parse exception.
*/
public class ParseException : Exception
{
public enum ErrorType
{
/**
* The internal error type is only used to signal an error
* that is a result of a bug in the parser or tokenizer
* code.
*/
INTERNAL,
/**
* The I/O error type is used for stream I/O errors.
*/
IO,
/**
* The unexpected end of file error type is used when end
* of file is encountered instead of a valid token.
*/
UNEXPECTED_EOF,
/**
* The unexpected character error type is used when a
* character is read that isn't handled by one of the
* token patterns.
*/
UNEXPECTED_CHAR,
/**
* The unexpected token error type is used when another
* token than the expected one is encountered.
*/
UNEXPECTED_TOKEN,
/**
* The invalid token error type is used when a token
* pattern with an error message is matched. The
* additional information provided should contain the
* error message.
*/
INVALID_TOKEN,
/**
* The analysis error type is used when an error is
* encountered in the analysis. The additional information
* provided should contain the error message.
*/
ANALYSIS
}
private readonly ErrorType _type;
private readonly string _info;
private readonly ArrayList _details;
private readonly int _line;
private readonly int _column;
/// <summary>
/// Creates a new parse exception.
/// </summary>
/// <param name="type"></param>
/// <param name="info"></param>
/// <param name="line"></param>
/// <param name="column"></param>
public ParseException(ErrorType type,
string info,
int line,
int column)
: this(type, info, null, line, column)
{
}
/// <summary>
/// Creates a new parse exception. This constructor is only
/// used to supply the detailed information array, which is
/// only used for expected token errors. The list then contains
/// descriptions of the expected tokens.
/// </summary>
/// <param name="type"></param>
/// <param name="info"></param>
/// <param name="details"></param>
/// <param name="line"></param>
/// <param name="column"></param>
public ParseException(ErrorType type,
string info,
ArrayList details,
int line,
int column)
{
this._type = type;
this._info = info;
this._details = details;
this._line = line;
this._column = column;
}
public ErrorType Type => _type;
public ErrorType GetErrorType()
{
return Type;
}
public string Info => _info;
public string GetInfo()
{
return Info;
}
public ArrayList Details => new ArrayList(_details);
public ArrayList GetDetails()
{
return Details;
}
public int Line => _line;
public int GetLine()
{
return Line;
}
public int Column => _column;
public int GetColumn()
{
return _column;
}
public override string Message
{
get
{
StringBuilder buffer = new StringBuilder();
// Add error description
buffer.Append(ErrorMessage);
// Add line and column
if (_line > 0 && _column > 0)
{
buffer.Append(", on line: ");
buffer.Append(_line);
buffer.Append(" column: ");
buffer.Append(_column);
}
return buffer.ToString();
}
}
public string GetMessage()
{
return Message;
}
public string ErrorMessage
{
get
{
StringBuilder buffer = new StringBuilder();
// Add type and info
switch (_type)
{
case ErrorType.IO:
buffer.Append("I/O error: ");
buffer.Append(_info);
break;
case ErrorType.UNEXPECTED_EOF:
buffer.Append("unexpected end of file");
break;
case ErrorType.UNEXPECTED_CHAR:
buffer.Append("unexpected character '");
buffer.Append(_info);
buffer.Append("'");
break;
case ErrorType.UNEXPECTED_TOKEN:
buffer.Append("unexpected token ");
buffer.Append(_info);
if (_details != null)
{
buffer.Append(", expected ");
if (_details.Count > 1)
{
buffer.Append("one of ");
}
buffer.Append(GetMessageDetails());
}
break;
case ErrorType.INVALID_TOKEN:
buffer.Append(_info);
break;
case ErrorType.ANALYSIS:
buffer.Append(_info);
break;
default:
buffer.Append("internal error");
if (_info != null)
{
buffer.Append(": ");
buffer.Append(_info);
}
break;
}
return buffer.ToString();
}
}
public string GetErrorMessage()
{
return ErrorMessage;
}
private string GetMessageDetails()
{
StringBuilder buffer = new StringBuilder();
for (int i = 0; i < _details.Count; i++)
{
if (i > 0)
{
buffer.Append(", ");
if (i + 1 == _details.Count)
{
buffer.Append("or ");
}
}
buffer.Append(_details[i]);
}
return buffer.ToString();
}
}
}

492
Parsing/Parser.cs Normal file
View File

@@ -0,0 +1,492 @@
using System.Collections;
using System.Text;
namespace Flee.Parsing
{
[Obsolete(" A base parser class. This class provides the standard parser interface, as well as token handling.")]
internal abstract class Parser
{
private bool _initialized;
private readonly Tokenizer _tokenizer;
private Analyzer _analyzer;
private readonly ArrayList _patterns = new ArrayList();
private readonly Hashtable _patternIds = new Hashtable();
private readonly ArrayList _tokens = new ArrayList();
private ParserLogException _errorLog = new ParserLogException();
private int _errorRecovery = -1;
/// <summary>
/// Creates a new parser.
/// </summary>
/// <param name="input"></param>
internal Parser(TextReader input) : this(input, null)
{
}
/// <summary>
/// Creates a new parser.
/// </summary>
/// <param name="input"></param>
/// <param name="analyzer"></param>
internal Parser(TextReader input, Analyzer analyzer)
{
_tokenizer = NewTokenizer(input);
this._analyzer = analyzer ?? NewAnalyzer();
}
/**
* Creates a new parser.
*
* @param tokenizer the tokenizer to use
*/
internal Parser(Tokenizer tokenizer) : this(tokenizer, null)
{
}
internal Parser(Tokenizer tokenizer, Analyzer analyzer)
{
this._tokenizer = tokenizer;
this._analyzer = analyzer ?? NewAnalyzer();
}
protected virtual Tokenizer NewTokenizer(TextReader input)
{
// TODO: This method should really be abstract, but it isn't in this
// version due to backwards compatibility requirements.
return new Tokenizer(input);
}
protected virtual Analyzer NewAnalyzer()
{
// TODO: This method should really be abstract, but it isn't in this
// version due to backwards compatibility requirements.
return new Analyzer();
}
public Tokenizer Tokenizer => _tokenizer;
public Analyzer Analyzer => _analyzer;
public Tokenizer GetTokenizer()
{
return Tokenizer;
}
public Analyzer GetAnalyzer()
{
return Analyzer;
}
internal void SetInitialized(bool initialized)
{
_initialized = initialized;
}
public virtual void AddPattern(ProductionPattern pattern)
{
if (pattern.Count <= 0)
{
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_PRODUCTION,
pattern.Name,
"no production alternatives are present (must have at " +
"least one)");
}
if (_patternIds.ContainsKey(pattern.Id))
{
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_PRODUCTION,
pattern.Name,
"another pattern with the same id (" + pattern.Id +
") has already been added");
}
_patterns.Add(pattern);
_patternIds.Add(pattern.Id, pattern);
SetInitialized(false);
}
public virtual void Prepare()
{
if (_patterns.Count <= 0)
{
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_PARSER,
"no production patterns have been added");
}
for (int i = 0; i < _patterns.Count; i++)
{
CheckPattern((ProductionPattern)_patterns[i]);
}
SetInitialized(true);
}
private void CheckPattern(ProductionPattern pattern)
{
for (int i = 0; i < pattern.Count; i++)
{
CheckAlternative(pattern.Name, pattern[i]);
}
}
private void CheckAlternative(string name,
ProductionPatternAlternative alt)
{
for (int i = 0; i < alt.Count; i++)
{
CheckElement(name, alt[i]);
}
}
private void CheckElement(string name,
ProductionPatternElement elem)
{
if (elem.IsProduction() && GetPattern(elem.Id) == null)
{
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_PRODUCTION,
name,
"an undefined production pattern id (" + elem.Id +
") is referenced");
}
}
public void Reset(TextReader input)
{
this._tokenizer.Reset(input);
this._analyzer.Reset();
}
public void Reset(TextReader input, Analyzer analyzer)
{
this._tokenizer.Reset(input);
this._analyzer = analyzer;
}
public Node Parse()
{
Node root = null;
// Initialize parser
if (!_initialized)
{
Prepare();
}
this._tokens.Clear();
this._errorLog = new ParserLogException();
this._errorRecovery = -1;
// Parse input
try
{
root = ParseStart();
}
catch (ParseException e)
{
AddError(e, true);
}
// Check for errors
if (_errorLog.Count > 0)
{
throw _errorLog;
}
return root;
}
protected abstract Node ParseStart();
protected virtual Production NewProduction(ProductionPattern pattern)
{
return _analyzer.NewProduction(pattern);
}
internal void AddError(ParseException e, bool recovery)
{
if (_errorRecovery <= 0)
{
_errorLog.AddError(e);
}
if (recovery)
{
_errorRecovery = 3;
}
}
internal ProductionPattern GetPattern(int id)
{
return (ProductionPattern)_patternIds[id];
}
internal ProductionPattern GetStartPattern()
{
if (_patterns.Count <= 0)
{
return null;
}
else
{
return (ProductionPattern)_patterns[0];
}
}
internal ICollection GetPatterns()
{
return _patterns;
}
internal void EnterNode(Node node)
{
if (!node.IsHidden() && _errorRecovery < 0)
{
try
{
_analyzer.Enter(node);
}
catch (ParseException e)
{
AddError(e, false);
}
}
}
internal Node ExitNode(Node node)
{
if (!node.IsHidden() && _errorRecovery < 0)
{
try
{
return _analyzer.Exit(node);
}
catch (ParseException e)
{
AddError(e, false);
}
}
return node;
}
internal void AddNode(Production node, Node child)
{
if (_errorRecovery >= 0)
{
// Do nothing
}
else if (node.IsHidden())
{
node.AddChild(child);
}
else if (child != null && child.IsHidden())
{
for (int i = 0; i < child.Count; i++)
{
AddNode(node, child[i]);
}
}
else
{
try
{
_analyzer.Child(node, child);
}
catch (ParseException e)
{
AddError(e, false);
}
}
}
internal Token NextToken()
{
Token token = PeekToken(0);
if (token != null)
{
_tokens.RemoveAt(0);
return token;
}
else
{
throw new ParseException(
ParseException.ErrorType.UNEXPECTED_EOF,
null,
_tokenizer.GetCurrentLine(),
_tokenizer.GetCurrentColumn());
}
}
internal Token NextToken(int id)
{
Token token = NextToken();
if (token.Id == id)
{
if (_errorRecovery > 0)
{
_errorRecovery--;
}
return token;
}
else
{
var list = new ArrayList(1) {_tokenizer.GetPatternDescription(id)};
throw new ParseException(
ParseException.ErrorType.UNEXPECTED_TOKEN,
token.ToShortString(),
list,
token.StartLine,
token.StartColumn);
}
}
internal Token PeekToken(int steps)
{
while (steps >= _tokens.Count)
{
try
{
var token = _tokenizer.Next();
if (token == null)
{
return null;
}
else
{
_tokens.Add(token);
}
}
catch (ParseException e)
{
AddError(e, true);
}
}
return (Token)_tokens[steps];
}
public override string ToString()
{
StringBuilder buffer = new StringBuilder();
for (int i = 0; i < _patterns.Count; i++)
{
buffer.Append(ToString((ProductionPattern)_patterns[i]));
buffer.Append("\n");
}
return buffer.ToString();
}
private string ToString(ProductionPattern prod)
{
StringBuilder buffer = new StringBuilder();
StringBuilder indent = new StringBuilder();
int i;
buffer.Append(prod.Name);
buffer.Append(" (");
buffer.Append(prod.Id);
buffer.Append(") ");
for (i = 0; i < buffer.Length; i++)
{
indent.Append(" ");
}
buffer.Append("= ");
indent.Append("| ");
for (i = 0; i < prod.Count; i++)
{
if (i > 0)
{
buffer.Append(indent);
}
buffer.Append(ToString(prod[i]));
buffer.Append("\n");
}
for (i = 0; i < prod.Count; i++)
{
var set = prod[i].LookAhead;
if (set.GetMaxLength() > 1)
{
buffer.Append("Using ");
buffer.Append(set.GetMaxLength());
buffer.Append(" token look-ahead for alternative ");
buffer.Append(i + 1);
buffer.Append(": ");
buffer.Append(set.ToString(_tokenizer));
buffer.Append("\n");
}
}
return buffer.ToString();
}
private string ToString(ProductionPatternAlternative alt)
{
StringBuilder buffer = new StringBuilder();
for (int i = 0; i < alt.Count; i++)
{
if (i > 0)
{
buffer.Append(" ");
}
buffer.Append(ToString(alt[i]));
}
return buffer.ToString();
}
private string ToString(ProductionPatternElement elem)
{
StringBuilder buffer = new StringBuilder();
int min = elem.MinCount;
int max = elem.MaxCount;
if (min == 0 && max == 1)
{
buffer.Append("[");
}
if (elem.IsToken())
{
buffer.Append(GetTokenDescription(elem.Id));
}
else
{
buffer.Append(GetPattern(elem.Id).Name);
}
if (min == 0 && max == 1)
{
buffer.Append("]");
}
else if (min == 0 && max == Int32.MaxValue)
{
buffer.Append("*");
}
else if (min == 1 && max == Int32.MaxValue)
{
buffer.Append("+");
}
else if (min != 1 || max != 1)
{
buffer.Append("{");
buffer.Append(min);
buffer.Append(",");
buffer.Append(max);
buffer.Append("}");
}
return buffer.ToString();
}
internal string GetTokenDescription(int token)
{
if (_tokenizer == null)
{
return "";
}
else
{
return _tokenizer.GetPatternDescription(token);
}
}
}
}

View File

@@ -0,0 +1,216 @@
using System.Collections;
using System.Text;
namespace Flee.Parsing
{
/**
* A parser creation exception. This exception is used for signalling
* an error in the token or production patterns, making it impossible
* to create a working parser or tokenizer.
*/
internal class ParserCreationException : Exception
{
/**
* The error type enumeration.
*/
public enum ErrorType
{
/**
* The internal error type is only used to signal an
* error that is a result of a bug in the parser or
* tokenizer code.
*/
INTERNAL,
/**
* The invalid parser error type is used when the parser
* as such is invalid. This error is typically caused by
* using a parser without any patterns.
*/
INVALID_PARSER,
/**
* The invalid token error type is used when a token
* pattern is erroneous. This error is typically caused
* by an invalid pattern type or an erroneous regular
* expression.
*/
INVALID_TOKEN,
/**
* The invalid production error type is used when a
* production pattern is erroneous. This error is
* typically caused by referencing undeclared productions,
* or violating some other production pattern constraint.
*/
INVALID_PRODUCTION,
/**
* The infinite loop error type is used when an infinite
* loop has been detected in the grammar. One of the
* productions in the loop will be reported.
*/
INFINITE_LOOP,
/**
* The inherent ambiguity error type is used when the set
* of production patterns (i.e. the grammar) contains
* ambiguities that cannot be resolved.
*/
INHERENT_AMBIGUITY
}
private readonly ErrorType _type;
private readonly string _name;
private readonly string _info;
private readonly ArrayList _details;
public ParserCreationException(ErrorType type,
String info)
: this(type, null, info)
{
}
public ParserCreationException(ErrorType type,
String name,
String info)
: this(type, name, info, null)
{
}
public ParserCreationException(ErrorType type,
String name,
String info,
ArrayList details)
{
this._type = type;
this._name = name;
this._info = info;
this._details = details;
}
public ErrorType Type => _type;
public ErrorType GetErrorType()
{
return Type;
}
public string Name => _name;
public string GetName()
{
return Name;
}
public string Info => _info;
public string GetInfo()
{
return Info;
}
public string Details
{
get
{
StringBuilder buffer = new StringBuilder();
if (_details == null)
{
return null;
}
for (int i = 0; i < _details.Count; i++)
{
if (i > 0)
{
buffer.Append(", ");
if (i + 1 == _details.Count)
{
buffer.Append("and ");
}
}
buffer.Append(_details[i]);
}
return buffer.ToString();
}
}
public string GetDetails()
{
return Details;
}
public override string Message
{
get
{
StringBuilder buffer = new StringBuilder();
switch (_type)
{
case ErrorType.INVALID_PARSER:
buffer.Append("parser is invalid, as ");
buffer.Append(_info);
break;
case ErrorType.INVALID_TOKEN:
buffer.Append("token '");
buffer.Append(_name);
buffer.Append("' is invalid, as ");
buffer.Append(_info);
break;
case ErrorType.INVALID_PRODUCTION:
buffer.Append("production '");
buffer.Append(_name);
buffer.Append("' is invalid, as ");
buffer.Append(_info);
break;
case ErrorType.INFINITE_LOOP:
buffer.Append("infinite loop found in production pattern '");
buffer.Append(_name);
buffer.Append("'");
break;
case ErrorType.INHERENT_AMBIGUITY:
buffer.Append("inherent ambiguity in production '");
buffer.Append(_name);
buffer.Append("'");
if (_info != null)
{
buffer.Append(" ");
buffer.Append(_info);
}
if (_details != null)
{
buffer.Append(" starting with ");
if (_details.Count > 1)
{
buffer.Append("tokens ");
}
else
{
buffer.Append("token ");
}
buffer.Append(Details);
}
break;
default:
buffer.Append("internal error");
break;
}
return buffer.ToString();
}
}
public string GetMessage()
{
return Message;
}
}
}

View File

@@ -0,0 +1,55 @@
using System.Collections;
using System.Text;
namespace Flee.Parsing
{
internal class ParserLogException : Exception
{
private readonly ArrayList _errors = new ArrayList();
public ParserLogException()
{
}
public override string Message
{
get
{
StringBuilder buffer = new StringBuilder();
for (int i = 0; i < Count; i++)
{
if (i > 0)
{
buffer.Append("\n");
}
buffer.Append(this[i].Message);
}
return buffer.ToString();
}
}
public int Count => _errors.Count;
public int GetErrorCount()
{
return Count;
}
public ParseException this[int index] => (ParseException)_errors[index];
public ParseException GetError(int index)
{
return this[index];
}
public void AddError(ParseException e)
{
_errors.Add(e);
}
public string GetMessage()
{
return Message;
}
}
}

70
Parsing/Production.cs Normal file
View File

@@ -0,0 +1,70 @@
using System.Collections;
namespace Flee.Parsing
{
/**
* A production node. This class represents a grammar production
* (i.e. a list of child nodes) in a parse tree. The productions
* are created by a parser, that adds children a according to a
* set of production patterns (i.e. grammar rules).
*/
internal class Production : Node
{
private readonly ProductionPattern _pattern;
private readonly ArrayList _children;
public Production(ProductionPattern pattern)
{
this._pattern = pattern;
this._children = new ArrayList();
}
public override int Id => _pattern.Id;
public override string Name => _pattern.Name;
public override int Count => _children.Count;
public override Node this[int index]
{
get
{
if (index < 0 || index >= _children.Count)
{
return null;
}
else
{
return (Node)_children[index];
}
}
}
public void AddChild(Node child)
{
if (child != null)
{
child.SetParent(this);
_children.Add(child);
}
}
public ProductionPattern Pattern => _pattern;
public ProductionPattern GetPattern()
{
return Pattern;
}
internal override bool IsHidden()
{
return _pattern.Synthetic;
}
public override string ToString()
{
return _pattern.Name + '(' + _pattern.Id + ')';
}
}
}

View File

@@ -0,0 +1,213 @@
using System.Collections;
using System.Text;
namespace Flee.Parsing
{
/**
* A production pattern. This class represents a set of production
* alternatives that together forms a single production. A
* production pattern is identified by an integer id and a name,
* both provided upon creation. The pattern id is used for
* referencing the production pattern from production pattern
* elements.
*/
internal class ProductionPattern
{
private readonly int _id;
private readonly string _name;
private bool _synthetic;
private readonly ArrayList _alternatives;
private int _defaultAlt;
private LookAheadSet _lookAhead;
public ProductionPattern(int id, string name)
{
this._id = id;
this._name = name;
this._synthetic = false;
this._alternatives = new ArrayList();
this._defaultAlt = -1;
this._lookAhead = null;
}
public int Id => _id;
public int GetId()
{
return Id;
}
public string Name => _name;
public string GetName()
{
return Name;
}
public bool Synthetic
{
get
{
return _synthetic;
}
set
{
_synthetic = value;
}
}
public bool IsSyntetic()
{
return Synthetic;
}
public void SetSyntetic(bool synthetic)
{
Synthetic = synthetic;
}
internal LookAheadSet LookAhead
{
get
{
return _lookAhead;
}
set
{
_lookAhead = value;
}
}
internal ProductionPatternAlternative DefaultAlternative
{
get
{
if (_defaultAlt >= 0)
{
object obj = _alternatives[_defaultAlt];
return (ProductionPatternAlternative)obj;
}
else
{
return null;
}
}
set
{
_defaultAlt = 0;
for (int i = 0; i < _alternatives.Count; i++)
{
if (_alternatives[i] == value)
{
_defaultAlt = i;
}
}
}
}
public int Count => _alternatives.Count;
public int GetAlternativeCount()
{
return Count;
}
public ProductionPatternAlternative this[int index] => (ProductionPatternAlternative)_alternatives[index];
public ProductionPatternAlternative GetAlternative(int pos)
{
return this[pos];
}
public bool IsLeftRecursive()
{
ProductionPatternAlternative alt;
for (int i = 0; i < _alternatives.Count; i++)
{
alt = (ProductionPatternAlternative)_alternatives[i];
if (alt.IsLeftRecursive())
{
return true;
}
}
return false;
}
public bool IsRightRecursive()
{
ProductionPatternAlternative alt;
for (int i = 0; i < _alternatives.Count; i++)
{
alt = (ProductionPatternAlternative)_alternatives[i];
if (alt.IsRightRecursive())
{
return true;
}
}
return false;
}
public bool IsMatchingEmpty()
{
ProductionPatternAlternative alt;
for (int i = 0; i < _alternatives.Count; i++)
{
alt = (ProductionPatternAlternative)_alternatives[i];
if (alt.IsMatchingEmpty())
{
return true;
}
}
return false;
}
public void AddAlternative(ProductionPatternAlternative alt)
{
if (_alternatives.Contains(alt))
{
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_PRODUCTION,
_name,
"two identical alternatives exist");
}
alt.SetPattern(this);
_alternatives.Add(alt);
}
public override string ToString()
{
StringBuilder buffer = new StringBuilder();
StringBuilder indent = new StringBuilder();
int i;
buffer.Append(_name);
buffer.Append("(");
buffer.Append(_id);
buffer.Append(") ");
for (i = 0; i < buffer.Length; i++)
{
indent.Append(" ");
}
for (i = 0; i < _alternatives.Count; i++)
{
if (i == 0)
{
buffer.Append("= ");
}
else
{
buffer.Append("\n");
buffer.Append(indent);
buffer.Append("| ");
}
buffer.Append(_alternatives[i]);
}
return buffer.ToString();
}
}
}

View File

@@ -0,0 +1,211 @@
using System.Collections;
using System.Text;
namespace Flee.Parsing
{
/**
* A production pattern alternative. This class represents a list of
* production pattern elements. In order to provide productions that
* cannot be represented with the element occurance counters, multiple
* alternatives must be created and added to the same production
* pattern. A production pattern alternative is always contained
* within a production pattern.
*/
internal class ProductionPatternAlternative
{
private ProductionPattern _pattern;
private readonly ArrayList _elements = new ArrayList();
private LookAheadSet _lookAhead = null;
public ProductionPatternAlternative()
{
}
public ProductionPattern Pattern => _pattern;
public ProductionPattern GetPattern()
{
return Pattern;
}
internal LookAheadSet LookAhead
{
get
{
return _lookAhead;
}
set
{
_lookAhead = value;
}
}
public int Count => _elements.Count;
public int GetElementCount()
{
return Count;
}
public ProductionPatternElement this[int index] => (ProductionPatternElement)_elements[index];
public ProductionPatternElement GetElement(int pos)
{
return this[pos];
}
public bool IsLeftRecursive()
{
for (int i = 0; i < _elements.Count; i++)
{
var elem = (ProductionPatternElement)_elements[i];
if (elem.Id == _pattern.Id)
{
return true;
}
else if (elem.MinCount > 0)
{
break;
}
}
return false;
}
public bool IsRightRecursive()
{
for (int i = _elements.Count - 1; i >= 0; i--)
{
var elem = (ProductionPatternElement)_elements[i];
if (elem.Id == _pattern.Id)
{
return true;
}
else if (elem.MinCount > 0)
{
break;
}
}
return false;
}
public bool IsMatchingEmpty()
{
return GetMinElementCount() == 0;
}
internal void SetPattern(ProductionPattern pattern)
{
this._pattern = pattern;
}
public int GetMinElementCount()
{
int min = 0;
for (int i = 0; i < _elements.Count; i++)
{
var elem = (ProductionPatternElement)_elements[i];
min += elem.MinCount;
}
return min;
}
public int GetMaxElementCount()
{
int max = 0;
for (int i = 0; i < _elements.Count; i++)
{
var elem = (ProductionPatternElement)_elements[i];
if (elem.MaxCount >= Int32.MaxValue)
{
return Int32.MaxValue;
}
else
{
max += elem.MaxCount;
}
}
return max;
}
public void AddToken(int id, int min, int max)
{
AddElement(new ProductionPatternElement(true, id, min, max));
}
public void AddProduction(int id, int min, int max)
{
AddElement(new ProductionPatternElement(false, id, min, max));
}
public void AddElement(ProductionPatternElement elem)
{
_elements.Add(elem);
}
public void AddElement(ProductionPatternElement elem,
int min,
int max)
{
if (elem.IsToken())
{
AddToken(elem.Id, min, max);
}
else
{
AddProduction(elem.Id, min, max);
}
}
public override bool Equals(object obj)
{
if (obj is ProductionPatternAlternative)
{
return Equals((ProductionPatternAlternative)obj);
}
else
{
return false;
}
}
public bool Equals(ProductionPatternAlternative alt)
{
if (_elements.Count != alt._elements.Count)
{
return false;
}
for (int i = 0; i < _elements.Count; i++)
{
if (!_elements[i].Equals(alt._elements[i]))
{
return false;
}
}
return true;
}
public override int GetHashCode()
{
return _elements.Count.GetHashCode();
}
public override string ToString()
{
StringBuilder buffer = new StringBuilder();
for (int i = 0; i < _elements.Count; i++)
{
if (i > 0)
{
buffer.Append(" ");
}
buffer.Append(_elements[i]);
}
return buffer.ToString();
}
}
}

View File

@@ -0,0 +1,138 @@
using System.Text;
namespace Flee.Parsing
{
/**
* A production pattern element. This class represents a reference to
* either a token or a production. Each element also contains minimum
* and maximum occurence counters, controlling the number of
* repetitions allowed. A production pattern element is always
* contained within a production pattern rule.
*/
internal class ProductionPatternElement
{
private readonly bool _token;
private readonly int _id;
private readonly int _min;
private readonly int _max;
private LookAheadSet _lookAhead;
public ProductionPatternElement(bool isToken,
int id,
int min,
int max)
{
this._token = isToken;
this._id = id;
if (min < 0)
{
min = 0;
}
this._min = min;
if (max <= 0)
{
max = Int32.MaxValue;
}
else if (max < min)
{
max = min;
}
this._max = max;
this._lookAhead = null;
}
public int Id => _id;
public int GetId()
{
return Id;
}
public int MinCount => _min;
public int GetMinCount()
{
return MinCount;
}
public int MaxCount => _max;
public int GetMaxCount()
{
return MaxCount;
}
internal LookAheadSet LookAhead
{
get
{
return _lookAhead;
}
set
{
_lookAhead = value;
}
}
public bool IsToken()
{
return _token;
}
public bool IsProduction()
{
return !_token;
}
public bool IsMatch(Token token)
{
return IsToken() && token != null && token.Id == _id;
}
public override bool Equals(object obj)
{
if (obj is ProductionPatternElement)
{
var elem = (ProductionPatternElement)obj;
return this._token == elem._token
&& this._id == elem._id
&& this._min == elem._min
&& this._max == elem._max;
}
else
{
return false;
}
}
public override int GetHashCode()
{
return this._id * 37;
}
public override string ToString()
{
StringBuilder buffer = new StringBuilder();
buffer.Append(_id);
if (_token)
{
buffer.Append("(Token)");
}
else
{
buffer.Append("(Production)");
}
if (_min != 1 || _max != 1)
{
buffer.Append("{");
buffer.Append(_min);
buffer.Append(",");
buffer.Append(_max);
buffer.Append("}");
}
return buffer.ToString();
}
}
}

180
Parsing/ReaderBuffer.cs Normal file
View File

@@ -0,0 +1,180 @@
namespace Flee.Parsing
{
/**
* A character buffer that automatically reads from an input source
* stream when needed. This class keeps track of the current position
* in the buffer and its line and column number in the original input
* source. It allows unlimited look-ahead of characters in the input,
* reading and buffering the required data internally. As the
* position is advanced, the buffer content prior to the current
* position is subject to removal to make space for reading new
* content. A few characters before the current position are always
* kept to enable boundary condition checks.
*/
internal class ReaderBuffer
{
public const int BlockSize = 1024;
private char[] _buffer = new char[BlockSize * 4];
private int _pos = 0;
private int _length = 0;
private TextReader _input;
private int _line = 1;
private int _column = 1;
public ReaderBuffer(TextReader input)
{
this._input = input;
}
public void Dispose()
{
_buffer = null;
_pos = 0;
_length = 0;
if (_input != null)
{
try
{
_input.Close();
}
catch (Exception)
{
// Do nothing
}
_input = null;
}
}
public int Position => _pos;
public int LineNumber => _line;
public int ColumnNumber => _column;
public int Length => _length;
public string Substring(int index, int length)
{
return new string(_buffer, index, length);
}
public override string ToString()
{
return new string(_buffer, 0, _length);
}
public int Peek(int offset)
{
int index = _pos + offset;
// Avoid most calls to EnsureBuffered(), since we are in a
// performance hotspot here. This check is not exhaustive,
// but only present here to speed things up.
if (index >= _length)
{
EnsureBuffered(offset + 1);
index = _pos + offset;
}
return (index >= _length) ? -1 : _buffer[index];
}
public string Read(int offset)
{
EnsureBuffered(offset + 1);
if (_pos >= _length)
{
return null;
}
else
{
var count = _length - _pos;
if (count > offset)
{
count = offset;
}
UpdateLineColumnNumbers(count);
var result = new string(_buffer, _pos, count);
_pos += count;
if (_input == null && _pos >= _length)
{
Dispose();
}
return result;
}
}
private void UpdateLineColumnNumbers(int offset)
{
for (int i = 0; i < offset; i++)
{
if (_buffer[_pos + i] == '\n')
{
_line++;
_column = 1;
}
else
{
_column++;
}
}
}
private void EnsureBuffered(int offset)
{
// Check for end of stream or already read characters
if (_input == null || _pos + offset < _length)
{
return;
}
// Remove (almost all) old characters from buffer
if (_pos > BlockSize)
{
_length -= (_pos - 16);
Array.Copy(_buffer, _pos - 16, _buffer, 0, _length);
_pos = 16;
}
// Calculate number of characters to read
var size = _pos + offset - _length + 1;
if (size % BlockSize != 0)
{
size = (1 + size / BlockSize) * BlockSize;
}
EnsureCapacity(_length + size);
// Read characters
try
{
while (_input != null && size > 0)
{
var readSize = _input.Read(_buffer, _length, size);
if (readSize > 0)
{
_length += readSize;
size -= readSize;
}
else
{
_input.Close();
_input = null;
}
}
}
catch (IOException e)
{
_input = null;
throw e;
}
}
private void EnsureCapacity(int size)
{
if (_buffer.Length >= size)
{
return;
}
if (size % BlockSize != 0)
{
size = (1 + size / BlockSize) * BlockSize;
}
Array.Resize(ref _buffer, size);
}
}
}

View File

@@ -0,0 +1,648 @@
using System.Collections;
namespace Flee.Parsing
{
/**
* A recursive descent parser. This parser handles LL(n) grammars,
* selecting the appropriate pattern to parse based on the next few
* tokens. The parser is more efficient the fewer look-ahead tokens
* that is has to consider.
*/
internal class RecursiveDescentParser : Parser
{
private int _stackdepth = 0;
public RecursiveDescentParser(TextReader input) : base(input)
{
}
public RecursiveDescentParser(TextReader input, Analyzer analyzer)
: base(input, analyzer)
{
}
public RecursiveDescentParser(Tokenizer tokenizer)
: base(tokenizer)
{
}
public RecursiveDescentParser(Tokenizer tokenizer,
Analyzer analyzer)
: base(tokenizer, analyzer)
{
}
public override void AddPattern(ProductionPattern pattern)
{
// Check for empty matches
if (pattern.IsMatchingEmpty())
{
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_PRODUCTION,
pattern.Name,
"zero elements can be matched (minimum is one)");
}
// Check for left-recusive patterns
if (pattern.IsLeftRecursive())
{
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_PRODUCTION,
pattern.Name,
"left recursive patterns are not allowed");
}
// Add pattern
base.AddPattern(pattern);
}
public override void Prepare()
{
// Performs production pattern checks
base.Prepare();
SetInitialized(false);
// Calculate production look-ahead sets
var e = GetPatterns().GetEnumerator();
while (e.MoveNext())
{
CalculateLookAhead((ProductionPattern)e.Current);
}
// Set initialized flag
SetInitialized(true);
}
protected override Node ParseStart()
{
_stackdepth = 0;
var node = ParsePattern(GetStartPattern());
var token = PeekToken(0);
if (token != null)
{
var list = new ArrayList(1) { "<EOF>" };
throw new ParseException(
ParseException.ErrorType.UNEXPECTED_TOKEN,
token.ToShortString(),
list,
token.StartLine,
token.StartColumn);
}
return node;
}
private Node ParsePattern(ProductionPattern pattern)
{
_stackdepth++;
if (_stackdepth > 200)
{
throw new System.StackOverflowException();
}
try
{
var defaultAlt = pattern.DefaultAlternative;
for (int i = 0; i < pattern.Count; i++)
{
var alt = pattern[i];
if (defaultAlt != alt && IsNext(alt))
{
return ParseAlternative(alt);
}
}
if (defaultAlt == null || !IsNext(defaultAlt))
{
ThrowParseException(FindUnion(pattern));
}
return ParseAlternative(defaultAlt);
}
finally
{
_stackdepth--;
}
}
private Node ParseAlternative(ProductionPatternAlternative alt)
{
var node = NewProduction(alt.Pattern);
EnterNode(node);
for (int i = 0; i < alt.Count; i++)
{
try
{
ParseElement(node, alt[i]);
}
catch (ParseException e)
{
AddError(e, true);
NextToken();
i--;
}
}
return ExitNode(node);
}
private void ParseElement(Production node,
ProductionPatternElement elem)
{
for (int i = 0; i < elem.MaxCount; i++)
{
if (i < elem.MinCount || IsNext(elem))
{
Node child;
if (elem.IsToken())
{
child = NextToken(elem.Id);
EnterNode(child);
AddNode(node, ExitNode(child));
}
else
{
child = ParsePattern(GetPattern(elem.Id));
AddNode(node, child);
}
}
else
{
break;
}
}
}
private bool IsNext(ProductionPattern pattern)
{
LookAheadSet set = pattern.LookAhead;
if (set == null)
{
return false;
}
else
{
return set.IsNext(this);
}
}
private bool IsNext(ProductionPatternAlternative alt)
{
LookAheadSet set = alt.LookAhead;
if (set == null)
{
return false;
}
else
{
return set.IsNext(this);
}
}
private bool IsNext(ProductionPatternElement elem)
{
LookAheadSet set = elem.LookAhead;
if (set != null)
{
return set.IsNext(this);
}
else if (elem.IsToken())
{
return elem.IsMatch(PeekToken(0));
}
else
{
return IsNext(GetPattern(elem.Id));
}
}
private void CalculateLookAhead(ProductionPattern pattern)
{
ProductionPatternAlternative alt;
LookAheadSet previous = new LookAheadSet(0);
int length = 1;
int i;
CallStack stack = new CallStack();
// Calculate simple look-ahead
stack.Push(pattern.Name, 1);
var result = new LookAheadSet(1);
var alternatives = new LookAheadSet[pattern.Count];
for (i = 0; i < pattern.Count; i++)
{
alt = pattern[i];
alternatives[i] = FindLookAhead(alt, 1, 0, stack, null);
alt.LookAhead = alternatives[i];
result.AddAll(alternatives[i]);
}
if (pattern.LookAhead == null)
{
pattern.LookAhead = result;
}
var conflicts = FindConflicts(pattern, 1);
// Resolve conflicts
while (conflicts.Size() > 0)
{
length++;
stack.Clear();
stack.Push(pattern.Name, length);
conflicts.AddAll(previous);
for (i = 0; i < pattern.Count; i++)
{
alt = pattern[i];
if (alternatives[i].Intersects(conflicts))
{
alternatives[i] = FindLookAhead(alt,
length,
0,
stack,
conflicts);
alt.LookAhead = alternatives[i];
}
if (alternatives[i].Intersects(conflicts))
{
if (pattern.DefaultAlternative == null)
{
pattern.DefaultAlternative = alt;
}
else if (pattern.DefaultAlternative != alt)
{
result = alternatives[i].CreateIntersection(conflicts);
ThrowAmbiguityException(pattern.Name,
null,
result);
}
}
}
previous = conflicts;
conflicts = FindConflicts(pattern, length);
}
// Resolve conflicts inside rules
for (i = 0; i < pattern.Count; i++)
{
CalculateLookAhead(pattern[i], 0);
}
}
private void CalculateLookAhead(ProductionPatternAlternative alt,
int pos)
{
LookAheadSet previous = new LookAheadSet(0);
int length = 1;
// Check trivial cases
if (pos >= alt.Count)
{
return;
}
// Check for non-optional element
var pattern = alt.Pattern;
var elem = alt[pos];
if (elem.MinCount == elem.MaxCount)
{
CalculateLookAhead(alt, pos + 1);
return;
}
// Calculate simple look-aheads
var first = FindLookAhead(elem, 1, new CallStack(), null);
var follow = FindLookAhead(alt, 1, pos + 1, new CallStack(), null);
// Resolve conflicts
var location = "at position " + (pos + 1);
var conflicts = FindConflicts(pattern.Name,
location,
first,
follow);
while (conflicts.Size() > 0)
{
length++;
conflicts.AddAll(previous);
first = FindLookAhead(elem,
length,
new CallStack(),
conflicts);
follow = FindLookAhead(alt,
length,
pos + 1,
new CallStack(),
conflicts);
first = first.CreateCombination(follow);
elem.LookAhead = first;
if (first.Intersects(conflicts))
{
first = first.CreateIntersection(conflicts);
ThrowAmbiguityException(pattern.Name, location, first);
}
previous = conflicts;
conflicts = FindConflicts(pattern.Name,
location,
first,
follow);
}
// Check remaining elements
CalculateLookAhead(alt, pos + 1);
}
private LookAheadSet FindLookAhead(ProductionPattern pattern,
int length,
CallStack stack,
LookAheadSet filter)
{
// Check for infinite loop
if (stack.Contains(pattern.Name, length))
{
throw new ParserCreationException(
ParserCreationException.ErrorType.INFINITE_LOOP,
pattern.Name,
(String)null);
}
// Find pattern look-ahead
stack.Push(pattern.Name, length);
var result = new LookAheadSet(length);
for (int i = 0; i < pattern.Count; i++)
{
var temp = FindLookAhead(pattern[i],
length,
0,
stack,
filter);
result.AddAll(temp);
}
stack.Pop();
return result;
}
private LookAheadSet FindLookAhead(ProductionPatternAlternative alt,
int length,
int pos,
CallStack stack,
LookAheadSet filter)
{
LookAheadSet follow;
// Check trivial cases
if (length <= 0 || pos >= alt.Count)
{
return new LookAheadSet(0);
}
// Find look-ahead for this element
var first = FindLookAhead(alt[pos], length, stack, filter);
if (alt[pos].MinCount == 0)
{
first.AddEmpty();
}
// Find remaining look-ahead
if (filter == null)
{
length -= first.GetMinLength();
if (length > 0)
{
follow = FindLookAhead(alt, length, pos + 1, stack, null);
first = first.CreateCombination(follow);
}
}
else if (filter.IsOverlap(first))
{
var overlaps = first.CreateOverlaps(filter);
length -= overlaps.GetMinLength();
filter = filter.CreateFilter(overlaps);
follow = FindLookAhead(alt, length, pos + 1, stack, filter);
first.RemoveAll(overlaps);
first.AddAll(overlaps.CreateCombination(follow));
}
return first;
}
private LookAheadSet FindLookAhead(ProductionPatternElement elem,
int length,
CallStack stack,
LookAheadSet filter)
{
// Find initial element look-ahead
var first = FindLookAhead(elem, length, 0, stack, filter);
var result = new LookAheadSet(length);
result.AddAll(first);
if (filter == null || !filter.IsOverlap(result))
{
return result;
}
// Handle element repetitions
if (elem.MaxCount == Int32.MaxValue)
{
first = first.CreateRepetitive();
}
var max = elem.MaxCount;
if (length < max)
{
max = length;
}
for (int i = 1; i < max; i++)
{
first = first.CreateOverlaps(filter);
if (first.Size() <= 0 || first.GetMinLength() >= length)
{
break;
}
var follow = FindLookAhead(elem,
length,
0,
stack,
filter.CreateFilter(first));
first = first.CreateCombination(follow);
result.AddAll(first);
}
return result;
}
private LookAheadSet FindLookAhead(ProductionPatternElement elem,
int length,
int dummy,
CallStack stack,
LookAheadSet filter)
{
LookAheadSet result;
if (elem.IsToken())
{
result = new LookAheadSet(length);
result.Add(elem.Id);
}
else
{
var pattern = GetPattern(elem.Id);
result = FindLookAhead(pattern, length, stack, filter);
if (stack.Contains(pattern.Name))
{
result = result.CreateRepetitive();
}
}
return result;
}
private LookAheadSet FindConflicts(ProductionPattern pattern,
int maxLength)
{
LookAheadSet result = new LookAheadSet(maxLength);
for (int i = 0; i < pattern.Count; i++)
{
var set1 = pattern[i].LookAhead;
for (int j = 0; j < i; j++)
{
var set2 = pattern[j].LookAhead;
result.AddAll(set1.CreateIntersection(set2));
}
}
if (result.IsRepetitive())
{
ThrowAmbiguityException(pattern.Name, null, result);
}
return result;
}
private LookAheadSet FindConflicts(string pattern,
string location,
LookAheadSet set1,
LookAheadSet set2)
{
var result = set1.CreateIntersection(set2);
if (result.IsRepetitive())
{
ThrowAmbiguityException(pattern, location, result);
}
return result;
}
private LookAheadSet FindUnion(ProductionPattern pattern)
{
LookAheadSet result;
int length = 0;
int i;
for (i = 0; i < pattern.Count; i++)
{
result = pattern[i].LookAhead;
if (result.GetMaxLength() > length)
{
length = result.GetMaxLength();
}
}
result = new LookAheadSet(length);
for (i = 0; i < pattern.Count; i++)
{
result.AddAll(pattern[i].LookAhead);
}
return result;
}
private void ThrowParseException(LookAheadSet set)
{
ArrayList list = new ArrayList();
// Read tokens until mismatch
while (set.IsNext(this, 1))
{
set = set.CreateNextSet(NextToken().Id);
}
// Find next token descriptions
var initials = set.GetInitialTokens();
for (int i = 0; i < initials.Length; i++)
{
list.Add(GetTokenDescription(initials[i]));
}
// Create exception
var token = NextToken();
throw new ParseException(ParseException.ErrorType.UNEXPECTED_TOKEN,
token.ToShortString(),
list,
token.StartLine,
token.StartColumn);
}
private void ThrowAmbiguityException(string pattern,
string location,
LookAheadSet set)
{
ArrayList list = new ArrayList();
// Find next token descriptions
var initials = set.GetInitialTokens();
for (int i = 0; i < initials.Length; i++)
{
list.Add(GetTokenDescription(initials[i]));
}
// Create exception
throw new ParserCreationException(
ParserCreationException.ErrorType.INHERENT_AMBIGUITY,
pattern,
location,
list);
}
private class CallStack
{
private readonly ArrayList _nameStack = new ArrayList();
private readonly ArrayList _valueStack = new ArrayList();
public bool Contains(string name)
{
return _nameStack.Contains(name);
}
public bool Contains(string name, int value)
{
for (int i = 0; i < _nameStack.Count; i++)
{
if (_nameStack[i].Equals(name)
&& _valueStack[i].Equals(value))
{
return true;
}
}
return false;
}
public void Clear()
{
_nameStack.Clear();
_valueStack.Clear();
}
public void Push(string name, int value)
{
_nameStack.Add(name);
_valueStack.Add(value);
}
public void Pop()
{
if (_nameStack.Count > 0)
{
_nameStack.RemoveAt(_nameStack.Count - 1);
_valueStack.RemoveAt(_valueStack.Count - 1);
}
}
}
}
}

505
Parsing/RegExp.cs Normal file
View File

@@ -0,0 +1,505 @@
using System.Collections;
using System.Globalization;
using System.Text;
namespace Flee.Parsing
{
/**
* A regular expression. This class creates and holds an internal
* data structure representing a regular expression. It also
* allows creating matchers. This class is thread-safe. Multiple
* matchers may operate simultanously on the same regular
* expression.
*/
internal class RegExp
{
private readonly Element _element;
private readonly string _pattern;
private readonly bool _ignoreCase;
private int _pos;
public RegExp(string pattern)
: this(pattern, false)
{
}
public RegExp(string pattern, bool ignoreCase)
{
this._pattern = pattern;
this._ignoreCase = ignoreCase;
this._pos = 0;
this._element = ParseExpr();
if (_pos < pattern.Length)
{
throw new RegExpException(
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
_pos,
pattern);
}
}
public Matcher Matcher(string str)
{
return Matcher(new ReaderBuffer(new StringReader(str)));
}
public Matcher Matcher(ReaderBuffer buffer)
{
return new Matcher((Element)_element.Clone(), buffer, _ignoreCase);
}
public override string ToString()
{
var str = new StringWriter();
str.WriteLine("Regular Expression");
str.WriteLine(" Pattern: " + _pattern);
str.Write(" Flags:");
if (_ignoreCase)
{
str.Write(" caseignore");
}
str.WriteLine();
str.WriteLine(" Compiled:");
_element.PrintTo(str, " ");
return str.ToString();
}
private Element ParseExpr()
{
var first = ParseTerm();
if (PeekChar(0) != '|')
{
return first;
}
else
{
ReadChar('|');
var second = ParseExpr();
return new AlternativeElement(first, second);
}
}
private Element ParseTerm()
{
ArrayList list = new ArrayList();
list.Add(ParseFact());
while (true)
{
switch (PeekChar(0))
{
case -1:
case ')':
case ']':
case '{':
case '}':
case '?':
case '+':
case '|':
return CombineElements(list);
default:
list.Add(ParseFact());
break;
}
}
}
private Element ParseFact()
{
var elem = ParseAtom();
switch (PeekChar(0))
{
case '?':
case '*':
case '+':
case '{':
return ParseAtomModifier(elem);
default:
return elem;
}
}
private Element ParseAtom()
{
Element elem;
switch (PeekChar(0))
{
case '.':
ReadChar('.');
return CharacterSetElement.Dot;
case '(':
ReadChar('(');
elem = ParseExpr();
ReadChar(')');
return elem;
case '[':
ReadChar('[');
elem = ParseCharSet();
ReadChar(']');
return elem;
case -1:
case ')':
case ']':
case '{':
case '}':
case '?':
case '*':
case '+':
case '|':
throw new RegExpException(
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
_pos,
_pattern);
default:
return ParseChar();
}
}
private Element ParseAtomModifier(Element elem)
{
int min = 0;
int max = -1;
RepeatElement.RepeatType type;
int firstPos;
// Read min and max
type = RepeatElement.RepeatType.GREEDY;
switch (ReadChar())
{
case '?':
min = 0;
max = 1;
break;
case '*':
min = 0;
max = -1;
break;
case '+':
min = 1;
max = -1;
break;
case '{':
firstPos = _pos - 1;
min = ReadNumber();
max = min;
if (PeekChar(0) == ',')
{
ReadChar(',');
max = -1;
if (PeekChar(0) != '}')
{
max = ReadNumber();
}
}
ReadChar('}');
if (max == 0 || (max > 0 && min > max))
{
throw new RegExpException(
RegExpException.ErrorType.INVALID_REPEAT_COUNT,
firstPos,
_pattern);
}
break;
default:
throw new RegExpException(
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
_pos - 1,
_pattern);
}
// Read operator mode
if (PeekChar(0) == '?')
{
ReadChar('?');
type = RepeatElement.RepeatType.RELUCTANT;
}
else if (PeekChar(0) == '+')
{
ReadChar('+');
type = RepeatElement.RepeatType.POSSESSIVE;
}
return new RepeatElement(elem, min, max, type);
}
private Element ParseCharSet()
{
CharacterSetElement charset;
bool repeat = true;
if (PeekChar(0) == '^')
{
ReadChar('^');
charset = new CharacterSetElement(true);
}
else
{
charset = new CharacterSetElement(false);
}
while (PeekChar(0) > 0 && repeat)
{
var start = (char)PeekChar(0);
switch (start)
{
case ']':
repeat = false;
break;
case '\\':
var elem = ParseEscapeChar();
if (elem is StringElement)
{
charset.AddCharacters((StringElement)elem);
}
else
{
charset.AddCharacterSet((CharacterSetElement)elem);
}
break;
default:
ReadChar(start);
if (PeekChar(0) == '-'
&& PeekChar(1) > 0
&& PeekChar(1) != ']')
{
ReadChar('-');
var end = ReadChar();
charset.AddRange(FixChar(start), FixChar(end));
}
else
{
charset.AddCharacter(FixChar(start));
}
break;
}
}
return charset;
}
private Element ParseChar()
{
switch (PeekChar(0))
{
case '\\':
return ParseEscapeChar();
case '^':
case '$':
throw new RegExpException(
RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
_pos,
_pattern);
default:
return new StringElement(FixChar(ReadChar()));
}
}
private Element ParseEscapeChar()
{
char c;
string str;
int value;
ReadChar('\\');
c = ReadChar();
switch (c)
{
case '0':
c = ReadChar();
if (c < '0' || c > '3')
{
throw new RegExpException(
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
_pos - 3,
_pattern);
}
value = c - '0';
c = (char)PeekChar(0);
if ('0' <= c && c <= '7')
{
value *= 8;
value += ReadChar() - '0';
c = (char)PeekChar(0);
if ('0' <= c && c <= '7')
{
value *= 8;
value += ReadChar() - '0';
}
}
return new StringElement(FixChar((char)value));
case 'x':
str = ReadChar().ToString() +
ReadChar().ToString();
try
{
value = Int32.Parse(str,
NumberStyles.AllowHexSpecifier);
return new StringElement(FixChar((char)value));
}
catch (FormatException)
{
throw new RegExpException(
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
_pos - str.Length - 2,
_pattern);
}
case 'u':
str = ReadChar().ToString() +
ReadChar().ToString() +
ReadChar().ToString() +
ReadChar().ToString();
try
{
value = Int32.Parse(str,
NumberStyles.AllowHexSpecifier);
return new StringElement(FixChar((char)value));
}
catch (FormatException)
{
throw new RegExpException(
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
_pos - str.Length - 2,
_pattern);
}
case 't':
return new StringElement('\t');
case 'n':
return new StringElement('\n');
case 'r':
return new StringElement('\r');
case 'f':
return new StringElement('\f');
case 'a':
return new StringElement('\u0007');
case 'e':
return new StringElement('\u001B');
case 'd':
return CharacterSetElement.Digit;
case 'D':
return CharacterSetElement.NonDigit;
case 's':
return CharacterSetElement.Whitespace;
case 'S':
return CharacterSetElement.NonWhitespace;
case 'w':
return CharacterSetElement.Word;
case 'W':
return CharacterSetElement.NonWord;
default:
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'))
{
throw new RegExpException(
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
_pos - 2,
_pattern);
}
return new StringElement(FixChar(c));
}
}
private char FixChar(char c)
{
return _ignoreCase ? Char.ToLower(c) : c;
}
private int ReadNumber()
{
StringBuilder buf = new StringBuilder();
int c;
c = PeekChar(0);
while ('0' <= c && c <= '9')
{
buf.Append(ReadChar());
c = PeekChar(0);
}
if (buf.Length <= 0)
{
throw new RegExpException(
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
_pos,
_pattern);
}
return Int32.Parse(buf.ToString());
}
private char ReadChar()
{
int c = PeekChar(0);
if (c < 0)
{
throw new RegExpException(
RegExpException.ErrorType.UNTERMINATED_PATTERN,
_pos,
_pattern);
}
else
{
_pos++;
return (char)c;
}
}
private char ReadChar(char c)
{
if (c != ReadChar())
{
throw new RegExpException(
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
_pos - 1,
_pattern);
}
return c;
}
private int PeekChar(int count)
{
if (_pos + count < _pattern.Length)
{
return _pattern[_pos + count];
}
else
{
return -1;
}
}
private Element CombineElements(ArrayList list)
{
Element elem;
int i;
// Concatenate string elements
var prev = (Element)list[0];
for (i = 1; i < list.Count; i++)
{
elem = (Element)list[i];
if (prev is StringElement
&& elem is StringElement)
{
var str = ((StringElement)prev).GetString() +
((StringElement)elem).GetString();
elem = new StringElement(str);
list.RemoveAt(i);
list[i - 1] = elem;
i--;
}
prev = elem;
}
// Combine all remaining elements
elem = (Element)list[list.Count - 1];
for (i = list.Count - 2; i >= 0; i--)
{
prev = (Element)list[i];
elem = new CombineElement(prev, elem);
}
return elem;
}
}
}

113
Parsing/RegExpException.cs Normal file
View File

@@ -0,0 +1,113 @@
using System.Text;
namespace Flee.Parsing
{
/**
* A regular expression exception. This exception is thrown if a
* regular expression couldn't be processed (or "compiled")
* properly.
*/
internal class RegExpException : Exception
{
public enum ErrorType
{
/**
* The unexpected character error constant. This error is
* used when a character was read that didn't match the
* allowed set of characters at the given position.
*/
UNEXPECTED_CHARACTER,
/**
* The unterminated pattern error constant. This error is
* used when more characters were expected in the pattern.
*/
UNTERMINATED_PATTERN,
/**
* The unsupported special character error constant. This
* error is used when special regular expression
* characters are used in the pattern, but not supported
* in this implementation.
*/
UNSUPPORTED_SPECIAL_CHARACTER,
/**
* The unsupported escape character error constant. This
* error is used when an escape character construct is
* used in the pattern, but not supported in this
* implementation.
*/
UNSUPPORTED_ESCAPE_CHARACTER,
/**
* The invalid repeat count error constant. This error is
* used when a repetition count of zero is specified, or
* when the minimum exceeds the maximum.
*/
INVALID_REPEAT_COUNT
}
private readonly ErrorType _type;
private readonly int _position;
private readonly string _pattern;
public RegExpException(ErrorType type, int pos, string pattern)
{
this._type = type;
this._position = pos;
this._pattern = pattern;
}
public override string Message => GetMessage();
public string GetMessage()
{
StringBuilder buffer = new StringBuilder();
// Append error type name
switch (_type)
{
case ErrorType.UNEXPECTED_CHARACTER:
buffer.Append("unexpected character");
break;
case ErrorType.UNTERMINATED_PATTERN:
buffer.Append("unterminated pattern");
break;
case ErrorType.UNSUPPORTED_SPECIAL_CHARACTER:
buffer.Append("unsupported character");
break;
case ErrorType.UNSUPPORTED_ESCAPE_CHARACTER:
buffer.Append("unsupported escape character");
break;
case ErrorType.INVALID_REPEAT_COUNT:
buffer.Append("invalid repeat count");
break;
default:
buffer.Append("internal error");
break;
}
// Append erroneous character
buffer.Append(": ");
if (_position < _pattern.Length)
{
buffer.Append('\'');
buffer.Append(_pattern.Substring(_position));
buffer.Append('\'');
}
else
{
buffer.Append("<end of pattern>");
}
// Append position
buffer.Append(" at position ");
buffer.Append(_position);
return buffer.ToString();
}
}
}

239
Parsing/RepeatElement.cs Normal file
View File

@@ -0,0 +1,239 @@
using System.Collections;
namespace Flee.Parsing
{
/**
* A regular expression element repeater. The element repeats the
* matches from a specified element, attempting to reach the
* maximum repetition count.
*/
internal class RepeatElement : Element
{
public enum RepeatType
{
GREEDY = 1,
RELUCTANT = 2,
POSSESSIVE = 3
}
private readonly Element _elem;
private readonly int _min;
private readonly int _max;
private readonly RepeatType _type;
private int _matchStart;
private BitArray _matches;
public RepeatElement(Element elem,
int min,
int max,
RepeatType type)
{
this._elem = elem;
this._min = min;
if (max <= 0)
{
this._max = Int32.MaxValue;
}
else
{
this._max = max;
}
this._type = type;
this._matchStart = -1;
this._matches = null;
}
public override object Clone()
{
return new RepeatElement((Element)_elem.Clone(),
_min,
_max,
_type);
}
public override int Match(Matcher m,
ReaderBuffer buffer,
int start,
int skip)
{
if (skip == 0)
{
_matchStart = -1;
_matches = null;
}
switch (_type)
{
case RepeatType.GREEDY:
return MatchGreedy(m, buffer, start, skip);
case RepeatType.RELUCTANT:
return MatchReluctant(m, buffer, start, skip);
case RepeatType.POSSESSIVE:
if (skip == 0)
{
return MatchPossessive(m, buffer, start, 0);
}
break;
}
return -1;
}
private int MatchGreedy(Matcher m,
ReaderBuffer buffer,
int start,
int skip)
{
// Check for simple case
if (skip == 0)
{
return MatchPossessive(m, buffer, start, 0);
}
// Find all matches
if (_matchStart != start)
{
_matchStart = start;
_matches = new BitArray(10);
FindMatches(m, buffer, start, 0, 0, 0);
}
// Find first non-skipped match
for (int i = _matches.Count - 1; i >= 0; i--)
{
if (_matches[i])
{
if (skip == 0)
{
return i;
}
skip--;
}
}
return -1;
}
private int MatchReluctant(Matcher m,
ReaderBuffer buffer,
int start,
int skip)
{
if (_matchStart != start)
{
_matchStart = start;
_matches = new BitArray(10);
FindMatches(m, buffer, start, 0, 0, 0);
}
// Find first non-skipped match
for (int i = 0; i < _matches.Count; i++)
{
if (_matches[i])
{
if (skip == 0)
{
return i;
}
skip--;
}
}
return -1;
}
private int MatchPossessive(Matcher m,
ReaderBuffer buffer,
int start,
int count)
{
int length = 0;
int subLength = 1;
// Match as many elements as possible
while (subLength > 0 && count < _max)
{
subLength = _elem.Match(m, buffer, start + length, 0);
if (subLength >= 0)
{
count++;
length += subLength;
}
}
// Return result
if (_min <= count && count <= _max)
{
return length;
}
else
{
return -1;
}
}
private void FindMatches(Matcher m,
ReaderBuffer buffer,
int start,
int length,
int count,
int attempt)
{
int subLength;
// Check match ending here
if (count > _max)
{
return;
}
if (_min <= count && attempt == 0)
{
if (_matches.Length <= length)
{
_matches.Length = length + 10;
}
_matches[length] = true;
}
// Check element match
subLength = _elem.Match(m, buffer, start, attempt);
if (subLength < 0)
{
return;
}
else if (subLength == 0)
{
if (_min == count + 1)
{
if (_matches.Length <= length)
{
_matches.Length = length + 10;
}
_matches[length] = true;
}
return;
}
// Find alternative and subsequent matches
FindMatches(m, buffer, start, length, count, attempt + 1);
FindMatches(m,
buffer,
start + subLength,
length + subLength,
count + 1,
0);
}
public override void PrintTo(TextWriter output, string indent)
{
output.Write(indent + "Repeat (" + _min + "," + _max + ")");
if (_type == RepeatType.RELUCTANT)
{
output.Write("?");
}
else if (_type == RepeatType.POSSESSIVE)
{
output.Write("+");
}
output.WriteLine();
_elem.PrintTo(output, indent + " ");
}
}
}

761
Parsing/StackParser.cs Normal file
View File

@@ -0,0 +1,761 @@
using System.Collections;
namespace Flee.Parsing
{
/**
* based on recursive descent parser, this implementation removes recursion
* and uses a stack instead. This parser handles LL(n) grammars,
* selecting the appropriate pattern to parse based on the next few
* tokens.
*/
internal class StackParser : Parser
{
/**
* this is the parser state that is pushed onto the stack, simulating
* the variable state needed in recursive version. Some variables
* substitute for execution position, such as validnext, so patterns
* are processed in the proper order.
*/
internal class ParseState
{
/**
* pattern for this state
*/
internal ProductionPattern pattern;
/**
* index of the alt pattern we are currently checking
*/
internal int altindex;
/**
* index into the list of elements for the alt pattern
*/
internal int elementindex;
/**
* index to the token we are processing.
*/
internal int tokenindex;
/**
* The node for current state
*/
internal Node node;
/**
* true if we already checked IsNext on the current pattern
* so we should not call it again
*/
internal bool validnext;
}
public StackParser(TextReader input) : base(input)
{
}
public StackParser(TextReader input, Analyzer analyzer)
: base(input, analyzer)
{
}
public StackParser(Tokenizer tokenizer)
: base(tokenizer)
{
}
public StackParser(Tokenizer tokenizer,
Analyzer analyzer)
: base(tokenizer, analyzer)
{
}
public override void AddPattern(ProductionPattern pattern)
{
// Check for empty matches
if (pattern.IsMatchingEmpty())
{
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_PRODUCTION,
pattern.Name,
"zero elements can be matched (minimum is one)");
}
// Check for left-recusive patterns
if (pattern.IsLeftRecursive())
{
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_PRODUCTION,
pattern.Name,
"left recursive patterns are not allowed");
}
// Add pattern
base.AddPattern(pattern);
}
public override void Prepare()
{
// Performs production pattern checks
base.Prepare();
SetInitialized(false);
// Calculate production look-ahead sets
var e = GetPatterns().GetEnumerator();
while (e.MoveNext())
{
CalculateLookAhead((ProductionPattern)e.Current);
}
// Set initialized flag
SetInitialized(true);
}
protected override Node ParseStart()
{
var node = ParsePatterns(GetStartPattern());
var token = PeekToken(0);
if (token != null)
{
var list = new ArrayList(1) { "<EOF>" };
throw new ParseException(
ParseException.ErrorType.UNEXPECTED_TOKEN,
token.ToShortString(),
list,
token.StartLine,
token.StartColumn);
}
return node;
}
private ParseState NewState(ProductionPattern pattern)
{
return new ParseState()
{
pattern = pattern,
altindex = 0,
elementindex = 0,
tokenindex = 0,
node = null,
validnext = false
};
}
/// <summary>
/// parse patterns using a stack. The stack is local to this method, since the parser
/// is a singleton and may be parsing expressions from multiple threads, so cannot
/// use the object to store our stack.
/// </summary>
/// <param name="start"></param>
/// <returns></returns>
private Node ParsePatterns(ProductionPattern start)
{
Stack<ParseState> _stack = new Stack<ParseState>();
_stack.Push(NewState(start));
while (_stack.Count > 0)
{
ParseState state = _stack.Peek();
ProductionPattern pattern = state.pattern;
var defaultAlt = pattern.DefaultAlternative;
ProductionPattern nextpattern = null;
while (state.altindex < pattern.Count)
{
var alt = pattern[state.altindex];
if (state.validnext || (defaultAlt != alt && IsNext(alt)))
{
state.validnext = true;
nextpattern = ParseAlternative(state, alt);
break;
}
else
{
state.altindex++;
state.validnext = false;
}
}
// check if completed pass through alt patterns. try default
if (state.altindex >= pattern.Count)
{
if (!state.validnext && (defaultAlt == null || !IsNext(defaultAlt)))
{
ThrowParseException(FindUnion(pattern));
}
else
{
state.validnext = true;
nextpattern = ParseAlternative(state, defaultAlt);
}
}
if (nextpattern != null)
{
_stack.Push(NewState(nextpattern));
}
// we finished current pattern, so back up to previous state.
else
{
// if we have a node set, add it to the parent
var child = state.node;
_stack.Pop();
if (_stack.Count == 0)
{
// back to top, can return our result, which is top node
return child;
}
state = _stack.Peek();
AddNode((Production)state.node, child);
}
}
// should never get here, but must show we return something.
return null;
}
/**
* return the pattern to push onto stack and process next.
*/
private ProductionPattern ParseAlternative(ParseState state, ProductionPatternAlternative alt)
{
if (state.node == null)
{
state.node = NewProduction(alt.Pattern);
state.elementindex = 0;
EnterNode(state.node);
}
while (state.elementindex < alt.Count)
{
try
{
var pattern = ParseElement(state, alt[state.elementindex]);
if (pattern == null)
state.elementindex++;
else
return pattern;
}
catch (ParseException e)
{
AddError(e, true);
NextToken();
}
}
state.node = ExitNode(state.node);
return null;
}
private ProductionPattern ParseElement(ParseState state,
ProductionPatternElement elem)
{
for (int i = state.tokenindex; i < elem.MaxCount; i++)
{
if (i < elem.MinCount || IsNext(elem))
{
Node child;
if (elem.IsToken())
{
child = NextToken(elem.Id);
EnterNode(child);
AddNode((Production)state.node, ExitNode(child));
}
else
{
// continue from next token when we return
state.tokenindex = i + 1;
// return to start processing the new pattern at this state
return GetPattern(elem.Id); ;
}
}
else
{
break;
}
}
//
// we completed processing this element
state.tokenindex = 0;
return null;
}
private bool IsNext(ProductionPattern pattern)
{
LookAheadSet set = pattern.LookAhead;
if (set == null)
{
return false;
}
else
{
return set.IsNext(this);
}
}
private bool IsNext(ProductionPatternAlternative alt)
{
LookAheadSet set = alt.LookAhead;
if (set == null)
{
return false;
}
else
{
return set.IsNext(this);
}
}
private bool IsNext(ProductionPatternElement elem)
{
LookAheadSet set = elem.LookAhead;
if (set != null)
{
return set.IsNext(this);
}
else if (elem.IsToken())
{
return elem.IsMatch(PeekToken(0));
}
else
{
return IsNext(GetPattern(elem.Id));
}
}
private void CalculateLookAhead(ProductionPattern pattern)
{
ProductionPatternAlternative alt;
LookAheadSet previous = new LookAheadSet(0);
int length = 1;
int i;
CallStack stack = new CallStack();
// Calculate simple look-ahead
stack.Push(pattern.Name, 1);
var result = new LookAheadSet(1);
var alternatives = new LookAheadSet[pattern.Count];
for (i = 0; i < pattern.Count; i++)
{
alt = pattern[i];
alternatives[i] = FindLookAhead(alt, 1, 0, stack, null);
alt.LookAhead = alternatives[i];
result.AddAll(alternatives[i]);
}
if (pattern.LookAhead == null)
{
pattern.LookAhead = result;
}
var conflicts = FindConflicts(pattern, 1);
// Resolve conflicts
while (conflicts.Size() > 0)
{
length++;
stack.Clear();
stack.Push(pattern.Name, length);
conflicts.AddAll(previous);
for (i = 0; i < pattern.Count; i++)
{
alt = pattern[i];
if (alternatives[i].Intersects(conflicts))
{
alternatives[i] = FindLookAhead(alt,
length,
0,
stack,
conflicts);
alt.LookAhead = alternatives[i];
}
if (alternatives[i].Intersects(conflicts))
{
if (pattern.DefaultAlternative == null)
{
pattern.DefaultAlternative = alt;
}
else if (pattern.DefaultAlternative != alt)
{
result = alternatives[i].CreateIntersection(conflicts);
ThrowAmbiguityException(pattern.Name,
null,
result);
}
}
}
previous = conflicts;
conflicts = FindConflicts(pattern, length);
}
// Resolve conflicts inside rules
for (i = 0; i < pattern.Count; i++)
{
CalculateLookAhead(pattern[i], 0);
}
}
private void CalculateLookAhead(ProductionPatternAlternative alt,
int pos)
{
LookAheadSet previous = new LookAheadSet(0);
int length = 1;
// Check trivial cases
if (pos >= alt.Count)
{
return;
}
// Check for non-optional element
var pattern = alt.Pattern;
var elem = alt[pos];
if (elem.MinCount == elem.MaxCount)
{
CalculateLookAhead(alt, pos + 1);
return;
}
// Calculate simple look-aheads
var first = FindLookAhead(elem, 1, new CallStack(), null);
var follow = FindLookAhead(alt, 1, pos + 1, new CallStack(), null);
// Resolve conflicts
var location = "at position " + (pos + 1);
var conflicts = FindConflicts(pattern.Name,
location,
first,
follow);
while (conflicts.Size() > 0)
{
length++;
conflicts.AddAll(previous);
first = FindLookAhead(elem,
length,
new CallStack(),
conflicts);
follow = FindLookAhead(alt,
length,
pos + 1,
new CallStack(),
conflicts);
first = first.CreateCombination(follow);
elem.LookAhead = first;
if (first.Intersects(conflicts))
{
first = first.CreateIntersection(conflicts);
ThrowAmbiguityException(pattern.Name, location, first);
}
previous = conflicts;
conflicts = FindConflicts(pattern.Name,
location,
first,
follow);
}
// Check remaining elements
CalculateLookAhead(alt, pos + 1);
}
private LookAheadSet FindLookAhead(ProductionPattern pattern,
int length,
CallStack stack,
LookAheadSet filter)
{
// Check for infinite loop
if (stack.Contains(pattern.Name, length))
{
throw new ParserCreationException(
ParserCreationException.ErrorType.INFINITE_LOOP,
pattern.Name,
(String)null);
}
// Find pattern look-ahead
stack.Push(pattern.Name, length);
var result = new LookAheadSet(length);
for (int i = 0; i < pattern.Count; i++)
{
var temp = FindLookAhead(pattern[i],
length,
0,
stack,
filter);
result.AddAll(temp);
}
stack.Pop();
return result;
}
private LookAheadSet FindLookAhead(ProductionPatternAlternative alt,
int length,
int pos,
CallStack stack,
LookAheadSet filter)
{
LookAheadSet follow;
// Check trivial cases
if (length <= 0 || pos >= alt.Count)
{
return new LookAheadSet(0);
}
// Find look-ahead for this element
var first = FindLookAhead(alt[pos], length, stack, filter);
if (alt[pos].MinCount == 0)
{
first.AddEmpty();
}
// Find remaining look-ahead
if (filter == null)
{
length -= first.GetMinLength();
if (length > 0)
{
follow = FindLookAhead(alt, length, pos + 1, stack, null);
first = first.CreateCombination(follow);
}
}
else if (filter.IsOverlap(first))
{
var overlaps = first.CreateOverlaps(filter);
length -= overlaps.GetMinLength();
filter = filter.CreateFilter(overlaps);
follow = FindLookAhead(alt, length, pos + 1, stack, filter);
first.RemoveAll(overlaps);
first.AddAll(overlaps.CreateCombination(follow));
}
return first;
}
private LookAheadSet FindLookAhead(ProductionPatternElement elem,
int length,
CallStack stack,
LookAheadSet filter)
{
// Find initial element look-ahead
var first = FindLookAhead(elem, length, 0, stack, filter);
var result = new LookAheadSet(length);
result.AddAll(first);
if (filter == null || !filter.IsOverlap(result))
{
return result;
}
// Handle element repetitions
if (elem.MaxCount == Int32.MaxValue)
{
first = first.CreateRepetitive();
}
var max = elem.MaxCount;
if (length < max)
{
max = length;
}
for (int i = 1; i < max; i++)
{
first = first.CreateOverlaps(filter);
if (first.Size() <= 0 || first.GetMinLength() >= length)
{
break;
}
var follow = FindLookAhead(elem,
length,
0,
stack,
filter.CreateFilter(first));
first = first.CreateCombination(follow);
result.AddAll(first);
}
return result;
}
private LookAheadSet FindLookAhead(ProductionPatternElement elem,
int length,
int dummy,
CallStack stack,
LookAheadSet filter)
{
LookAheadSet result;
if (elem.IsToken())
{
result = new LookAheadSet(length);
result.Add(elem.Id);
}
else
{
var pattern = GetPattern(elem.Id);
result = FindLookAhead(pattern, length, stack, filter);
if (stack.Contains(pattern.Name))
{
result = result.CreateRepetitive();
}
}
return result;
}
private LookAheadSet FindConflicts(ProductionPattern pattern,
int maxLength)
{
LookAheadSet result = new LookAheadSet(maxLength);
for (int i = 0; i < pattern.Count; i++)
{
var set1 = pattern[i].LookAhead;
for (int j = 0; j < i; j++)
{
var set2 = pattern[j].LookAhead;
result.AddAll(set1.CreateIntersection(set2));
}
}
if (result.IsRepetitive())
{
ThrowAmbiguityException(pattern.Name, null, result);
}
return result;
}
private LookAheadSet FindConflicts(string pattern,
string location,
LookAheadSet set1,
LookAheadSet set2)
{
var result = set1.CreateIntersection(set2);
if (result.IsRepetitive())
{
ThrowAmbiguityException(pattern, location, result);
}
return result;
}
private LookAheadSet FindUnion(ProductionPattern pattern)
{
LookAheadSet result;
int length = 0;
int i;
for (i = 0; i < pattern.Count; i++)
{
result = pattern[i].LookAhead;
if (result.GetMaxLength() > length)
{
length = result.GetMaxLength();
}
}
result = new LookAheadSet(length);
for (i = 0; i < pattern.Count; i++)
{
result.AddAll(pattern[i].LookAhead);
}
return result;
}
private void ThrowParseException(LookAheadSet set)
{
ArrayList list = new ArrayList();
// Read tokens until mismatch
while (set.IsNext(this, 1))
{
set = set.CreateNextSet(NextToken().Id);
}
// Find next token descriptions
var initials = set.GetInitialTokens();
for (int i = 0; i < initials.Length; i++)
{
list.Add(GetTokenDescription(initials[i]));
}
// Create exception
var token = NextToken();
throw new ParseException(ParseException.ErrorType.UNEXPECTED_TOKEN,
token.ToShortString(),
list,
token.StartLine,
token.StartColumn);
}
private void ThrowAmbiguityException(string pattern,
string location,
LookAheadSet set)
{
ArrayList list = new ArrayList();
// Find next token descriptions
var initials = set.GetInitialTokens();
for (int i = 0; i < initials.Length; i++)
{
list.Add(GetTokenDescription(initials[i]));
}
// Create exception
throw new ParserCreationException(
ParserCreationException.ErrorType.INHERENT_AMBIGUITY,
pattern,
location,
list);
}
private class CallStack
{
private readonly ArrayList _nameStack = new ArrayList();
private readonly ArrayList _valueStack = new ArrayList();
public bool Contains(string name)
{
return _nameStack.Contains(name);
}
public bool Contains(string name, int value)
{
for (int i = 0; i < _nameStack.Count; i++)
{
if (_nameStack[i].Equals(name)
&& _valueStack[i].Equals(value))
{
return true;
}
}
return false;
}
public void Clear()
{
_nameStack.Clear();
_valueStack.Clear();
}
public void Push(string name, int value)
{
_nameStack.Add(name);
_valueStack.Add(value);
}
public void Pop()
{
if (_nameStack.Count > 0)
{
_nameStack.RemoveAt(_nameStack.Count - 1);
_valueStack.RemoveAt(_valueStack.Count - 1);
}
}
}
}
}

64
Parsing/StringElement.cs Normal file
View File

@@ -0,0 +1,64 @@
namespace Flee.Parsing
{
/**
* A regular expression string element. This element only matches
* an exact string. Once created, the string element is immutable.
*/
internal class StringElement : Element
{
private readonly string _value;
public StringElement(char c)
: this(c.ToString())
{
}
public StringElement(string str)
{
_value = str;
}
public string GetString()
{
return _value;
}
public override object Clone()
{
return this;
}
public override int Match(Matcher m,
ReaderBuffer buffer,
int start,
int skip)
{
if (skip != 0)
{
return -1;
}
for (int i = 0; i < _value.Length; i++)
{
var c = buffer.Peek(start + i);
if (c < 0)
{
m.SetReadEndOfString();
return -1;
}
if (m.IsCaseInsensitive())
{
c = (int)Char.ToLower((char)c);
}
if (c != (int)_value[i])
{
return -1;
}
}
return _value.Length;
}
public override void PrintTo(TextWriter output, string indent)
{
output.WriteLine(indent + "'" + _value + "'");
}
}
}

168
Parsing/Token.cs Normal file
View File

@@ -0,0 +1,168 @@
using System.Text;
namespace Flee.Parsing
{
/**
* A token node. This class represents a token (i.e. a set of adjacent
* characters) in a parse tree. The tokens are created by a tokenizer,
* that groups characters together into tokens according to a set of
* token patterns.
*/
internal class Token : Node
{
private readonly TokenPattern _pattern;
private readonly string _image;
private readonly int _startLine;
private readonly int _startColumn;
private readonly int _endLine;
private readonly int _endColumn;
private Token _previous = null;
private Token _next = null;
public Token(TokenPattern pattern, string image, int line, int col)
{
this._pattern = pattern;
this._image = image;
this._startLine = line;
this._startColumn = col;
this._endLine = line;
this._endColumn = col + image.Length - 1;
for (int pos = 0; image.IndexOf('\n', pos) >= 0;)
{
pos = image.IndexOf('\n', pos) + 1;
this._endLine++;
_endColumn = image.Length - pos;
}
}
public override int Id => _pattern.Id;
public override string Name => _pattern.Name;
public override int StartLine => _startLine;
public override int StartColumn => _startColumn;
public override int EndLine => _endLine;
public override int EndColumn => _endColumn;
public string Image => _image;
public string GetImage()
{
return Image;
}
internal TokenPattern Pattern => _pattern;
public Token Previous
{
get
{
return _previous;
}
set
{
if (_previous != null)
{
_previous._next = null;
}
_previous = value;
if (_previous != null)
{
_previous._next = this;
}
}
}
public Token GetPreviousToken()
{
return Previous;
}
public Token Next
{
get
{
return _next;
}
set
{
if (_next != null)
{
_next._previous = null;
}
_next = value;
if (_next != null)
{
_next._previous = this;
}
}
}
public Token GetNextToken()
{
return Next;
}
public override string ToString()
{
StringBuilder buffer = new StringBuilder();
int newline = _image.IndexOf('\n');
buffer.Append(_pattern.Name);
buffer.Append("(");
buffer.Append(_pattern.Id);
buffer.Append("): \"");
if (newline >= 0)
{
if (newline > 0 && _image[newline - 1] == '\r')
{
newline--;
}
buffer.Append(_image.Substring(0, newline));
buffer.Append("(...)");
}
else
{
buffer.Append(_image);
}
buffer.Append("\", line: ");
buffer.Append(_startLine);
buffer.Append(", col: ");
buffer.Append(_startColumn);
return buffer.ToString();
}
public string ToShortString()
{
StringBuilder buffer = new StringBuilder();
int newline = _image.IndexOf('\n');
buffer.Append('"');
if (newline >= 0)
{
if (newline > 0 && _image[newline - 1] == '\r')
{
newline--;
}
buffer.Append(_image.Substring(0, newline));
buffer.Append("(...)");
}
else
{
buffer.Append(_image);
}
buffer.Append('"');
if (_pattern.Type == TokenPattern.PatternType.REGEXP)
{
buffer.Append(" <");
buffer.Append(_pattern.Name);
buffer.Append(">");
}
return buffer.ToString();
}
}
}

31
Parsing/TokenMatch.cs Normal file
View File

@@ -0,0 +1,31 @@
namespace Flee.Parsing
{
/**
* The token match status. This class contains logic to ensure that
* only the longest match is considered.
*/
internal class TokenMatch
{
private int _length = 0;
private TokenPattern _pattern = null;
public void Clear()
{
_length = 0;
_pattern = null;
}
public int Length => _length;
public TokenPattern Pattern => _pattern;
public void Update(int length, TokenPattern pattern)
{
if (this._length < length)
{
this._length = length;
this._pattern = pattern;
}
}
}
}

825
Parsing/TokenNFA.cs Normal file
View File

@@ -0,0 +1,825 @@
namespace Flee.Parsing
{
/**
* A non-deterministic finite state automaton (NFA) for matching
* tokens. It supports both fixed strings and simple regular
* expressions, but should perform similar to a DFA due to highly
* optimized data structures and tuning. The memory footprint during
* matching should be near zero, since no heap memory is allocated
* unless the pre-allocated queues need to be enlarged. The NFA also
* does not use recursion, but iterates in a loop instead.
*/
internal class TokenNFA
{
private readonly NFAState[] _initialChar = new NFAState[128];
private readonly NFAState _initial = new NFAState();
private readonly NFAStateQueue _queue = new NFAStateQueue();
public void AddTextMatch(string str, bool ignoreCase, TokenPattern value)
{
NFAState state;
char ch = str[0];
if (ch < 128 && !ignoreCase)
{
state = _initialChar[ch];
if (state == null)
{
state = _initialChar[ch] = new NFAState();
}
}
else
{
state = _initial.AddOut(ch, ignoreCase, null);
}
for (int i = 1; i < str.Length; i++)
{
state = state.AddOut(str[i], ignoreCase, null);
}
state.Value = value;
}
public void AddRegExpMatch(string pattern,
bool ignoreCase,
TokenPattern value)
{
TokenRegExpParser parser = new TokenRegExpParser(pattern, ignoreCase);
string debug = "DFA regexp; " + parser.GetDebugInfo();
var isAscii = parser.Start.IsAsciiOutgoing();
for (int i = 0; isAscii && i < 128; i++)
{
bool match = false;
for (int j = 0; j < parser.Start.Outgoing.Length; j++)
{
if (parser.Start.Outgoing[j].Match((char)i))
{
if (match)
{
isAscii = false;
break;
}
match = true;
}
}
if (match && _initialChar[i] != null)
{
isAscii = false;
}
}
if (parser.Start.Incoming.Length > 0)
{
_initial.AddOut(new NFAEpsilonTransition(parser.Start));
debug += ", uses initial epsilon";
}
else if (isAscii && !ignoreCase)
{
for (int i = 0; isAscii && i < 128; i++)
{
for (int j = 0; j < parser.Start.Outgoing.Length; j++)
{
if (parser.Start.Outgoing[j].Match((char)i))
{
_initialChar[i] = parser.Start.Outgoing[j].State;
}
}
}
debug += ", uses ASCII lookup";
}
else
{
parser.Start.MergeInto(_initial);
debug += ", uses initial state";
}
parser.End.Value = value;
value.DebugInfo = debug;
}
public int Match(ReaderBuffer buffer, TokenMatch match)
{
int length = 0;
int pos = 1;
NFAState state;
// The first step of the match loop has been unrolled and
// optimized for performance below.
this._queue.Clear();
var peekChar = buffer.Peek(0);
if (0 <= peekChar && peekChar < 128)
{
state = this._initialChar[peekChar];
if (state != null)
{
this._queue.AddLast(state);
}
}
if (peekChar >= 0)
{
this._initial.MatchTransitions((char)peekChar, this._queue, true);
}
this._queue.MarkEnd();
peekChar = buffer.Peek(1);
// The remaining match loop processes all subsequent states
while (!this._queue.Empty)
{
if (this._queue.Marked)
{
pos++;
peekChar = buffer.Peek(pos);
this._queue.MarkEnd();
}
state = this._queue.RemoveFirst();
if (state.Value != null)
{
match.Update(pos, state.Value);
}
if (peekChar >= 0)
{
state.MatchTransitions((char)peekChar, this._queue, false);
}
}
return length;
}
}
/**
* An NFA state. The NFA consists of a series of states, each
* having zero or more transitions to other states.
*/
internal class NFAState
{
internal TokenPattern Value = null;
internal NFATransition[] Incoming = new NFATransition[0];
internal NFATransition[] Outgoing = new NFATransition[0];
internal bool EpsilonOut = false;
public bool HasTransitions()
{
return Incoming.Length > 0 || Outgoing.Length > 0;
}
public bool IsAsciiOutgoing()
{
for (int i = 0; i < Outgoing.Length; i++)
{
if (!Outgoing[i].IsAscii())
{
return false;
}
}
return true;
}
public void AddIn(NFATransition trans)
{
Array.Resize(ref Incoming, Incoming.Length + 1);
Incoming[Incoming.Length - 1] = trans;
}
public NFAState AddOut(char ch, bool ignoreCase, NFAState state)
{
if (ignoreCase)
{
if (state == null)
{
state = new NFAState();
}
AddOut(new NFACharTransition(Char.ToLower(ch), state));
AddOut(new NFACharTransition(Char.ToUpper(ch), state));
return state;
}
else
{
if (state == null)
{
state = FindUniqueCharTransition(ch);
if (state != null)
{
return state;
}
state = new NFAState();
}
return AddOut(new NFACharTransition(ch, state));
}
}
public NFAState AddOut(NFATransition trans)
{
Array.Resize(ref Outgoing, Outgoing.Length + 1);
Outgoing[Outgoing.Length - 1] = trans;
if (trans is NFAEpsilonTransition)
{
EpsilonOut = true;
}
return trans.State;
}
public void MergeInto(NFAState state)
{
for (int i = 0; i < Incoming.Length; i++)
{
state.AddIn(Incoming[i]);
Incoming[i].State = state;
}
Incoming = null;
for (int i = 0; i < Outgoing.Length; i++)
{
state.AddOut(Outgoing[i]);
}
Outgoing = null;
}
private NFAState FindUniqueCharTransition(char ch)
{
NFATransition res = null;
NFATransition trans;
for (int i = 0; i < Outgoing.Length; i++)
{
trans = Outgoing[i];
if (trans.Match(ch) && trans is NFACharTransition)
{
if (res != null)
{
return null;
}
res = trans;
}
}
for (int i = 0; res != null && i < Outgoing.Length; i++)
{
trans = Outgoing[i];
if (trans != res && trans.State == res.State)
{
return null;
}
}
return res?.State;
}
public void MatchTransitions(char ch, NFAStateQueue queue, bool initial)
{
for (int i = 0; i < Outgoing.Length; i++)
{
var trans = Outgoing[i];
var target = trans.State;
if (initial && trans is NFAEpsilonTransition)
{
target.MatchTransitions(ch, queue, true);
}
else if (trans.Match(ch))
{
queue.AddLast(target);
if (target.EpsilonOut)
{
target.MatchEmpty(queue);
}
}
}
}
public void MatchEmpty(NFAStateQueue queue)
{
for (int i = 0; i < Outgoing.Length; i++)
{
var trans = Outgoing[i];
if (trans is NFAEpsilonTransition)
{
var target = trans.State;
queue.AddLast(target);
if (target.EpsilonOut)
{
target.MatchEmpty(queue);
}
}
}
}
}
/**
* An NFA state transition. A transition checks a single
* character of input an determines if it is a match. If a match
* is encountered, the NFA should move forward to the transition
* state.
*/
internal abstract class NFATransition
{
internal NFAState State;
protected NFATransition(NFAState state)
{
this.State = state;
this.State.AddIn(this);
}
public abstract bool IsAscii();
public abstract bool Match(char ch);
public abstract NFATransition Copy(NFAState state);
}
/**
* The special epsilon transition. This transition matches the
* empty input, i.e. it is an automatic transition that doesn't
* read any input. As such, it returns false in the match method
* and is handled specially everywhere.
*/
internal class NFAEpsilonTransition : NFATransition
{
public NFAEpsilonTransition(NFAState state) : base(state)
{
}
public override bool IsAscii()
{
return false;
}
public override bool Match(char ch)
{
return false;
}
public override NFATransition Copy(NFAState state)
{
return new NFAEpsilonTransition(state);
}
}
/**
* A single character match transition.
*/
internal class NFACharTransition : NFATransition
{
private readonly char _match;
public NFACharTransition(char match, NFAState state) : base(state)
{
_match = match;
}
public override bool IsAscii()
{
return 0 <= _match && _match < 128;
}
public override bool Match(char ch)
{
return this._match == ch;
}
public override NFATransition Copy(NFAState state)
{
return new NFACharTransition(_match, state);
}
}
/**
* A character range match transition. Used for user-defined
* character sets in regular expressions.
*/
internal class NFACharRangeTransition : NFATransition
{
protected bool Inverse;
protected bool IgnoreCase;
private object[] _contents = new object[0];
public NFACharRangeTransition(bool inverse,
bool ignoreCase,
NFAState state) : base(state)
{
this.Inverse = inverse;
this.IgnoreCase = ignoreCase;
}
public override bool IsAscii()
{
if (Inverse)
{
return false;
}
for (int i = 0; i < _contents.Length; i++)
{
var obj = _contents[i];
if (obj is char)
{
var c = (char)obj;
if (c < 0 || 128 <= c)
{
return false;
}
}
else if (obj is Range)
{
if (!((Range)obj).IsAscii())
{
return false;
}
}
}
return true;
}
public void AddCharacter(char c)
{
if (IgnoreCase)
{
c = Char.ToLower(c);
}
AddContent(c);
}
public void AddRange(char min, char max)
{
if (IgnoreCase)
{
min = Char.ToLower(min);
max = Char.ToLower(max);
}
AddContent(new Range(min, max));
}
private void AddContent(Object obj)
{
Array.Resize(ref _contents, _contents.Length + 1);
_contents[_contents.Length - 1] = obj;
}
public override bool Match(char ch)
{
object obj;
char c;
Range r;
if (IgnoreCase)
{
ch = Char.ToLower(ch);
}
for (int i = 0; i < _contents.Length; i++)
{
obj = _contents[i];
if (obj is char)
{
c = (char)obj;
if (c == ch)
{
return !Inverse;
}
}
else if (obj is Range)
{
r = (Range)obj;
if (r.Inside(ch))
{
return !Inverse;
}
}
}
return Inverse;
}
public override NFATransition Copy(NFAState state)
{
var copy = new NFACharRangeTransition(Inverse, IgnoreCase, state) { _contents = _contents };
return copy;
}
private class Range
{
private readonly char _min;
private readonly char _max;
public Range(char min, char max)
{
this._min = min;
this._max = max;
}
public bool IsAscii()
{
return 0 <= _min && _min < 128 &&
0 <= _max && _max < 128;
}
public bool Inside(char c)
{
return _min <= c && c <= _max;
}
}
}
/**
* The dot ('.') character set transition. This transition
* matches a single character that is not equal to a newline
* character.
*/
internal class NFADotTransition : NFATransition
{
public NFADotTransition(NFAState state) : base(state)
{
}
public override bool IsAscii()
{
return false;
}
public override bool Match(char ch)
{
switch (ch)
{
case '\n':
case '\r':
case '\u0085':
case '\u2028':
case '\u2029':
return false;
default:
return true;
}
}
public override NFATransition Copy(NFAState state)
{
return new NFADotTransition(state);
}
}
/**
* The digit character set transition. This transition matches a
* single numeric character.
*/
internal class NFADigitTransition : NFATransition
{
public NFADigitTransition(NFAState state) : base(state)
{
}
public override bool IsAscii()
{
return true;
}
public override bool Match(char ch)
{
return '0' <= ch && ch <= '9';
}
public override NFATransition Copy(NFAState state)
{
return new NFADigitTransition(state);
}
}
/**
* The non-digit character set transition. This transition
* matches a single non-numeric character.
*/
internal class NFANonDigitTransition : NFATransition
{
public NFANonDigitTransition(NFAState state) : base(state)
{
}
public override bool IsAscii()
{
return false;
}
public override bool Match(char ch)
{
return ch < '0' || '9' < ch;
}
public override NFATransition Copy(NFAState state)
{
return new NFANonDigitTransition(state);
}
}
/**
* The whitespace character set transition. This transition
* matches a single whitespace character.
*/
internal class NFAWhitespaceTransition : NFATransition
{
public NFAWhitespaceTransition(NFAState state) : base(state)
{
}
public override bool IsAscii()
{
return true;
}
public override bool Match(char ch)
{
switch (ch)
{
case ' ':
case '\t':
case '\n':
case '\f':
case '\r':
case (char)11:
return true;
default:
return false;
}
}
public override NFATransition Copy(NFAState state)
{
return new NFAWhitespaceTransition(state);
}
}
/**
* The non-whitespace character set transition. This transition
* matches a single non-whitespace character.
*/
internal class NFANonWhitespaceTransition : NFATransition
{
public NFANonWhitespaceTransition(NFAState state) : base(state)
{
}
public override bool IsAscii()
{
return false;
}
public override bool Match(char ch)
{
switch (ch)
{
case ' ':
case '\t':
case '\n':
case '\f':
case '\r':
case (char)11:
return false;
default:
return true;
}
}
public override NFATransition Copy(NFAState state)
{
return new NFANonWhitespaceTransition(state);
}
}
/**
* The word character set transition. This transition matches a
* single word character.
*/
internal class NFAWordTransition : NFATransition
{
public NFAWordTransition(NFAState state) : base(state)
{
}
public override bool IsAscii()
{
return true;
}
public override bool Match(char ch)
{
return ('a' <= ch && ch <= 'z')
|| ('A' <= ch && ch <= 'Z')
|| ('0' <= ch && ch <= '9')
|| ch == '_';
}
public override NFATransition Copy(NFAState state)
{
return new NFAWordTransition(state);
}
}
/**
* The non-word character set transition. This transition matches
* a single non-word character.
*/
internal class NFANonWordTransition : NFATransition
{
public NFANonWordTransition(NFAState state) : base(state)
{
}
public override bool IsAscii()
{
return false;
}
public override bool Match(char ch)
{
bool word = ('a' <= ch && ch <= 'z')
|| ('A' <= ch && ch <= 'Z')
|| ('0' <= ch && ch <= '9')
|| ch == '_';
return !word;
}
public override NFATransition Copy(NFAState state)
{
return new NFANonWordTransition(state);
}
}
/**
* An NFA state queue. This queue is used during processing to
* keep track of the current and subsequent NFA states. The
* current state is read from the beginning of the queue, and new
* states are added at the end. A marker index is used to
* separate the current from the subsequent states.<p>
*
* The queue implementation is optimized for quick removal at the
* beginning and addition at the end. It will attempt to use a
* fixed-size array to store the whole queue, and moves the data
* in this array only when absolutely needed. The array is also
* enlarged automatically if too many states are being processed
* at a single time.
*/
internal class NFAStateQueue
{
private NFAState[] _queue = new NFAState[2048];
private int _first = 0;
private int _last = 0;
private int _mark = 0;
public bool Empty => (_last <= _first);
public bool Marked => _first == _mark;
public void Clear()
{
_first = 0;
_last = 0;
_mark = 0;
}
public void MarkEnd()
{
_mark = _last;
}
public NFAState RemoveFirst()
{
if (_first < _last)
{
_first++;
return _queue[_first - 1];
}
else
{
return null;
}
}
public void AddLast(NFAState state)
{
if (_last >= _queue.Length)
{
if (_first <= 0)
{
Array.Resize(ref _queue, _queue.Length * 2);
}
else
{
Array.Copy(_queue, _first, _queue, 0, _last - _first);
_last -= _first;
_mark -= _first;
_first = 0;
}
}
_queue[_last++] = state;
}
}
}

303
Parsing/TokenPattern.cs Normal file
View File

@@ -0,0 +1,303 @@
using System.Text;
namespace Flee.Parsing
{
/**
* A token pattern. This class contains the definition of a token
* (i.e. it's pattern), and allows testing a string against this
* pattern. A token pattern is uniquely identified by an integer id,
* that must be provided upon creation.
*
*/
internal class TokenPattern
{
public enum PatternType
{
/**
* The string pattern type is used for tokens that only
* match an exact string.
*/
STRING,
/**
* The regular expression pattern type is used for tokens
* that match a regular expression.
*/
REGEXP
}
private int _id;
private string _name;
private PatternType _type;
private string _pattern;
private bool _error;
private string _errorMessage;
private bool _ignore;
private string _ignoreMessage;
private string _debugInfo;
public TokenPattern(int id,
string name,
PatternType type,
string pattern)
{
this._id = id;
this._name = name;
this._type = type;
this._pattern = pattern;
}
public int Id
{
get
{
return _id;
}
set { _id = value; }
}
public int GetId()
{
return _id;
}
public string Name
{
get
{
return _name;
}
set { _name = value; }
}
public string GetName()
{
return _name;
}
public PatternType Type
{
get
{
return _type;
}
set { _type = value; }
}
public PatternType GetPatternType()
{
return _type;
}
public string Pattern
{
get
{
return _pattern;
}
set { _pattern = value; }
}
public string GetPattern()
{
return _pattern;
}
public bool Error
{
get
{
return _error;
}
set
{
_error = value;
if (_error && _errorMessage == null)
{
_errorMessage = "unrecognized token found";
}
}
}
public string ErrorMessage
{
get
{
return _errorMessage;
}
set
{
_error = true;
_errorMessage = value;
}
}
public bool IsError()
{
return Error;
}
public string GetErrorMessage()
{
return ErrorMessage;
}
public void SetError()
{
Error = true;
}
public void SetError(string message)
{
ErrorMessage = message;
}
public bool Ignore
{
get
{
return _ignore;
}
set
{
_ignore = value;
}
}
public string IgnoreMessage
{
get
{
return _ignoreMessage;
}
set
{
_ignore = true;
_ignoreMessage = value;
}
}
public bool IsIgnore()
{
return Ignore;
}
public string GetIgnoreMessage()
{
return IgnoreMessage;
}
public void SetIgnore()
{
Ignore = true;
}
public void SetIgnore(string message)
{
IgnoreMessage = message;
}
public string DebugInfo
{
get
{
return _debugInfo;
}
set
{
_debugInfo = value;
}
}
public override string ToString()
{
StringBuilder buffer = new StringBuilder();
buffer.Append(_name);
buffer.Append(" (");
buffer.Append(_id);
buffer.Append("): ");
switch (_type)
{
case PatternType.STRING:
buffer.Append("\"");
buffer.Append(_pattern);
buffer.Append("\"");
break;
case PatternType.REGEXP:
buffer.Append("<<");
buffer.Append(_pattern);
buffer.Append(">>");
break;
}
if (_error)
{
buffer.Append(" ERROR: \"");
buffer.Append(_errorMessage);
buffer.Append("\"");
}
if (_ignore)
{
buffer.Append(" IGNORE");
if (_ignoreMessage != null)
{
buffer.Append(": \"");
buffer.Append(_ignoreMessage);
buffer.Append("\"");
}
}
if (_debugInfo != null)
{
buffer.Append("\n ");
buffer.Append(_debugInfo);
}
return buffer.ToString();
}
public string ToShortString()
{
StringBuilder buffer = new StringBuilder();
int newline = _pattern.IndexOf('\n');
if (_type == PatternType.STRING)
{
buffer.Append("\"");
if (newline >= 0)
{
if (newline > 0 && _pattern[newline - 1] == '\r')
{
newline--;
}
buffer.Append(_pattern.Substring(0, newline));
buffer.Append("(...)");
}
else
{
buffer.Append(_pattern);
}
buffer.Append("\"");
}
else
{
buffer.Append("<");
buffer.Append(_name);
buffer.Append(">");
}
return buffer.ToString();
}
public void SetData(int id, string name, PatternType type, string pattern)
{
Id = id;
Name = name;
Type = type;
Pattern = pattern;
}
}
}

View File

@@ -0,0 +1,545 @@
using System.Collections;
using System.Globalization;
using System.Text;
namespace Flee.Parsing
{
/**
* A regular expression parser. The parser creates an NFA for the
* regular expression having a single start and acceptance states.
*/
internal class TokenRegExpParser
{
private readonly string _pattern;
private readonly bool _ignoreCase;
private int _pos;
internal NFAState Start = new NFAState();
internal NFAState End;
private int _stateCount;
private int _transitionCount;
private int _epsilonCount;
public TokenRegExpParser(string pattern) : this(pattern, false)
{
}
public TokenRegExpParser(string pattern, bool ignoreCase)
{
this._pattern = pattern;
this._ignoreCase = ignoreCase;
this._pos = 0;
this.End = ParseExpr(Start);
if (_pos < pattern.Length)
{
throw new RegExpException(
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
_pos,
pattern);
}
}
public string GetDebugInfo()
{
if (_stateCount == 0)
{
UpdateStats(Start, new Hashtable());
}
return _stateCount + " states, " +
_transitionCount + " transitions, " +
_epsilonCount + " epsilons";
}
private void UpdateStats(NFAState state, Hashtable visited)
{
if (!visited.ContainsKey(state))
{
visited.Add(state, state);
_stateCount++;
for (int i = 0; i < state.Outgoing.Length; i++)
{
_transitionCount++;
if (state.Outgoing[i] is NFAEpsilonTransition)
{
_epsilonCount++;
}
UpdateStats(state.Outgoing[i].State, visited);
}
}
}
private NFAState ParseExpr(NFAState start)
{
NFAState end = new NFAState();
do
{
if (PeekChar(0) == '|')
{
ReadChar('|');
}
var subStart = new NFAState();
var subEnd = ParseTerm(subStart);
if (subStart.Incoming.Length == 0)
{
subStart.MergeInto(start);
}
else
{
start.AddOut(new NFAEpsilonTransition(subStart));
}
if (subEnd.Outgoing.Length == 0 ||
(!end.HasTransitions() && PeekChar(0) != '|'))
{
subEnd.MergeInto(end);
}
else
{
subEnd.AddOut(new NFAEpsilonTransition(end));
}
} while (PeekChar(0) == '|');
return end;
}
private NFAState ParseTerm(NFAState start)
{
var end = ParseFact(start);
while (true)
{
switch (PeekChar(0))
{
case -1:
case ')':
case ']':
case '{':
case '}':
case '?':
case '+':
case '|':
return end;
default:
end = ParseFact(end);
break;
}
}
}
private NFAState ParseFact(NFAState start)
{
NFAState placeholder = new NFAState();
var end = ParseAtom(placeholder);
switch (PeekChar(0))
{
case '?':
case '*':
case '+':
case '{':
end = ParseAtomModifier(placeholder, end);
break;
}
if (placeholder.Incoming.Length > 0 && start.Outgoing.Length > 0)
{
start.AddOut(new NFAEpsilonTransition(placeholder));
return end;
}
else
{
placeholder.MergeInto(start);
return (end == placeholder) ? start : end;
}
}
private NFAState ParseAtom(NFAState start)
{
NFAState end;
switch (PeekChar(0))
{
case '.':
ReadChar('.');
return start.AddOut(new NFADotTransition(new NFAState()));
case '(':
ReadChar('(');
end = ParseExpr(start);
ReadChar(')');
return end;
case '[':
ReadChar('[');
end = ParseCharSet(start);
ReadChar(']');
return end;
case -1:
case ')':
case ']':
case '{':
case '}':
case '?':
case '*':
case '+':
case '|':
throw new RegExpException(
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
_pos,
_pattern);
default:
return ParseChar(start);
}
}
private NFAState ParseAtomModifier(NFAState start, NFAState end)
{
int min = 0;
int max = -1;
int firstPos = _pos;
// Read min and max
switch (ReadChar())
{
case '?':
min = 0;
max = 1;
break;
case '*':
min = 0;
max = -1;
break;
case '+':
min = 1;
max = -1;
break;
case '{':
min = ReadNumber();
max = min;
if (PeekChar(0) == ',')
{
ReadChar(',');
max = -1;
if (PeekChar(0) != '}')
{
max = ReadNumber();
}
}
ReadChar('}');
if (max == 0 || (max > 0 && min > max))
{
throw new RegExpException(
RegExpException.ErrorType.INVALID_REPEAT_COUNT,
firstPos,
_pattern);
}
break;
default:
throw new RegExpException(
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
_pos - 1,
_pattern);
}
// Read possessive or reluctant modifiers
if (PeekChar(0) == '?')
{
throw new RegExpException(
RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
_pos,
_pattern);
}
else if (PeekChar(0) == '+')
{
throw new RegExpException(
RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
_pos,
_pattern);
}
// Handle supported repeaters
if (min == 0 && max == 1)
{
return start.AddOut(new NFAEpsilonTransition(end));
}
else if (min == 0 && max == -1)
{
if (end.Outgoing.Length == 0)
{
end.MergeInto(start);
}
else
{
end.AddOut(new NFAEpsilonTransition(start));
}
return start;
}
else if (min == 1 && max == -1)
{
if (start.Outgoing.Length == 1 &&
end.Outgoing.Length == 0 &&
end.Incoming.Length == 1 &&
start.Outgoing[0] == end.Incoming[0])
{
end.AddOut(start.Outgoing[0].Copy(end));
}
else
{
end.AddOut(new NFAEpsilonTransition(start));
}
return end;
}
else
{
throw new RegExpException(
RegExpException.ErrorType.INVALID_REPEAT_COUNT,
firstPos,
_pattern);
}
}
private NFAState ParseCharSet(NFAState start)
{
NFAState end = new NFAState();
NFACharRangeTransition range;
if (PeekChar(0) == '^')
{
ReadChar('^');
range = new NFACharRangeTransition(true, _ignoreCase, end);
}
else
{
range = new NFACharRangeTransition(false, _ignoreCase, end);
}
start.AddOut(range);
while (PeekChar(0) > 0)
{
var min = (char)PeekChar(0);
switch (min)
{
case ']':
return end;
case '\\':
range.AddCharacter(ReadEscapeChar());
break;
default:
ReadChar(min);
if (PeekChar(0) == '-' &&
PeekChar(1) > 0 &&
PeekChar(1) != ']')
{
ReadChar('-');
var max = ReadChar();
range.AddRange(min, max);
}
else
{
range.AddCharacter(min);
}
break;
}
}
return end;
}
private NFAState ParseChar(NFAState start)
{
switch (PeekChar(0))
{
case '\\':
return ParseEscapeChar(start);
case '^':
case '$':
throw new RegExpException(
RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
_pos,
_pattern);
default:
return start.AddOut(ReadChar(), _ignoreCase, new NFAState());
}
}
private NFAState ParseEscapeChar(NFAState start)
{
NFAState end = new NFAState();
if (PeekChar(0) == '\\' && PeekChar(1) > 0)
{
switch ((char)PeekChar(1))
{
case 'd':
ReadChar();
ReadChar();
return start.AddOut(new NFADigitTransition(end));
case 'D':
ReadChar();
ReadChar();
return start.AddOut(new NFANonDigitTransition(end));
case 's':
ReadChar();
ReadChar();
return start.AddOut(new NFAWhitespaceTransition(end));
case 'S':
ReadChar();
ReadChar();
return start.AddOut(new NFANonWhitespaceTransition(end));
case 'w':
ReadChar();
ReadChar();
return start.AddOut(new NFAWordTransition(end));
case 'W':
ReadChar();
ReadChar();
return start.AddOut(new NFANonWordTransition(end));
}
}
return start.AddOut(ReadEscapeChar(), _ignoreCase, end);
}
private char ReadEscapeChar()
{
string str;
int value;
ReadChar('\\');
var c = ReadChar();
switch (c)
{
case '0':
c = ReadChar();
if (c < '0' || c > '3')
{
throw new RegExpException(
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
_pos - 3,
_pattern);
}
value = c - '0';
c = (char)PeekChar(0);
if ('0' <= c && c <= '7')
{
value *= 8;
value += ReadChar() - '0';
c = (char)PeekChar(0);
if ('0' <= c && c <= '7')
{
value *= 8;
value += ReadChar() - '0';
}
}
return (char)value;
case 'x':
str = ReadChar().ToString() + ReadChar().ToString();
try
{
value = Int32.Parse(str, NumberStyles.AllowHexSpecifier);
return (char)value;
}
catch (FormatException)
{
throw new RegExpException(
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
_pos - str.Length - 2,
_pattern);
}
case 'u':
str = ReadChar().ToString() +
ReadChar().ToString() +
ReadChar().ToString() +
ReadChar().ToString();
try
{
value = Int32.Parse(str, NumberStyles.AllowHexSpecifier);
return (char)value;
}
catch (FormatException)
{
throw new RegExpException(
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
_pos - str.Length - 2,
_pattern);
}
case 't':
return '\t';
case 'n':
return '\n';
case 'r':
return '\r';
case 'f':
return '\f';
case 'a':
return '\u0007';
case 'e':
return '\u001B';
default:
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'))
{
throw new RegExpException(
RegExpException.ErrorType.UNSUPPORTED_ESCAPE_CHARACTER,
_pos - 2,
_pattern);
}
return c;
}
}
private int ReadNumber()
{
StringBuilder buf = new StringBuilder();
int c;
c = PeekChar(0);
while ('0' <= c && c <= '9')
{
buf.Append(ReadChar());
c = PeekChar(0);
}
if (buf.Length <= 0)
{
throw new RegExpException(
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
_pos,
_pattern);
}
return Int32.Parse(buf.ToString());
}
private char ReadChar()
{
int c = PeekChar(0);
if (c < 0)
{
throw new RegExpException(
RegExpException.ErrorType.UNTERMINATED_PATTERN,
_pos,
_pattern);
}
else
{
_pos++;
return (char)c;
}
}
private char ReadChar(char c)
{
if (c != ReadChar())
{
throw new RegExpException(
RegExpException.ErrorType.UNEXPECTED_CHARACTER,
_pos - 1,
_pattern);
}
return c;
}
private int PeekChar(int count)
{
if (_pos + count < _pattern.Length)
{
return _pattern[_pos + count];
}
else
{
return -1;
}
}
}
}

213
Parsing/TokenStringDFA.cs Normal file
View File

@@ -0,0 +1,213 @@
using System.Text;
namespace Flee.Parsing
{
/**
* A deterministic finite state automaton for matching exact strings.
* It uses a sorted binary tree representation of the state
* transitions in order to enable quick matches with a minimal memory
* footprint. It only supports a single character transition between
* states, but may be run in an all case-insensitive mode.
*/
internal class TokenStringDFA
{
private readonly DFAState[] _ascii = new DFAState[128];
private readonly DFAState _nonAscii = new DFAState();
public TokenStringDFA()
{
}
public void AddMatch(string str, bool caseInsensitive, TokenPattern value)
{
DFAState state;
char c = str[0];
int start = 0;
if (caseInsensitive)
{
c = Char.ToLower(c);
}
if (c < 128)
{
state = _ascii[c];
if (state == null)
{
state = _ascii[c] = new DFAState();
}
start++;
}
else
{
state = _nonAscii;
}
for (int i = start; i < str.Length; i++)
{
var next = state.Tree.Find(str[i], caseInsensitive);
if (next == null)
{
next = new DFAState();
state.Tree.Add(str[i], caseInsensitive, next);
}
state = next;
}
state.Value = value;
}
public TokenPattern Match(ReaderBuffer buffer, bool caseInsensitive)
{
TokenPattern result = null;
DFAState state;
int pos = 0;
var c = buffer.Peek(0);
if (c < 0)
{
return null;
}
if (caseInsensitive)
{
c = Char.ToLower((char)c);
}
if (c < 128)
{
state = _ascii[c];
if (state == null)
{
return null;
}
else if (state.Value != null)
{
result = state.Value;
}
pos++;
}
else
{
state = _nonAscii;
}
while ((c = buffer.Peek(pos)) >= 0)
{
state = state.Tree.Find((char)c, caseInsensitive);
if (state == null)
{
break;
}
else if (state.Value != null)
{
result = state.Value;
}
pos++;
}
return result;
}
public override string ToString()
{
StringBuilder buffer = new StringBuilder();
for (int i = 0; i < _ascii.Length; i++)
{
if (_ascii[i] != null)
{
buffer.Append((char)i);
if (_ascii[i].Value != null)
{
buffer.Append(": ");
buffer.Append(_ascii[i].Value);
buffer.Append("\n");
}
_ascii[i].Tree.PrintTo(buffer, " ");
}
}
_nonAscii.Tree.PrintTo(buffer, "");
return buffer.ToString();
}
}
internal class DFAState
{
internal TokenPattern Value;
internal TransitionTree Tree = new TransitionTree();
}
internal class TransitionTree
{
private char _value = '\0';
private DFAState _state;
private TransitionTree _left;
private TransitionTree _right;
public TransitionTree()
{
}
public DFAState Find(char c, bool lowerCase)
{
if (lowerCase)
{
c = Char.ToLower(c);
}
if (_value == '\0' || _value == c)
{
return _state;
}
else if (_value > c)
{
return _left.Find(c, false);
}
else
{
return _right.Find(c, false);
}
}
public void Add(char c, bool lowerCase, DFAState state)
{
if (lowerCase)
{
c = Char.ToLower(c);
}
if (_value == '\0')
{
this._value = c;
this._state = state;
this._left = new TransitionTree();
this._right = new TransitionTree();
}
else if (_value > c)
{
_left.Add(c, false, state);
}
else
{
_right.Add(c, false, state);
}
}
public void PrintTo(StringBuilder buffer, String indent)
{
_left?.PrintTo(buffer, indent);
if (this._value != '\0')
{
if (buffer.Length > 0 && buffer[buffer.Length - 1] == '\n')
{
buffer.Append(indent);
}
buffer.Append(this._value);
if (this._state.Value != null)
{
buffer.Append(": ");
buffer.Append(this._state.Value);
buffer.Append("\n");
}
this._state.Tree.PrintTo(buffer, indent + " ");
}
_right?.PrintTo(buffer, indent);
}
}
}

444
Parsing/Tokenizer.cs Normal file
View File

@@ -0,0 +1,444 @@
using System.Text;
using System.Text.RegularExpressions;
namespace Flee.Parsing
{
/**
* A character stream tokenizer. This class groups the characters read
* from the stream together into tokens ("words"). The grouping is
* controlled by token patterns that contain either a fixed string to
* search for, or a regular expression. If the stream of characters
* don't match any of the token patterns, a parse exception is thrown.
*/
internal class Tokenizer
{
private bool _useTokenList = false;
private readonly StringDFAMatcher _stringDfaMatcher;
private readonly NFAMatcher _nfaMatcher;
private readonly RegExpMatcher _regExpMatcher;
private ReaderBuffer _buffer = null;
private readonly TokenMatch _lastMatch = new TokenMatch();
private Token _previousToken = null;
public Tokenizer(TextReader input)
: this(input, false)
{
}
public Tokenizer(TextReader input, bool ignoreCase)
{
this._stringDfaMatcher = new StringDFAMatcher(ignoreCase);
this._nfaMatcher = new NFAMatcher(ignoreCase);
this._regExpMatcher = new RegExpMatcher(ignoreCase);
this._buffer = new ReaderBuffer(input);
}
public bool UseTokenList
{
get
{
return _useTokenList;
}
set
{
_useTokenList = value;
}
}
public bool GetUseTokenList()
{
return _useTokenList;
}
public void SetUseTokenList(bool useTokenList)
{
this._useTokenList = useTokenList;
}
public string GetPatternDescription(int id)
{
var pattern = _stringDfaMatcher.GetPattern(id);
if (pattern == null)
{
pattern = _nfaMatcher.GetPattern(id);
}
if (pattern == null)
{
pattern = _regExpMatcher.GetPattern(id);
}
return pattern?.ToShortString();
}
public int GetCurrentLine()
{
return _buffer.LineNumber;
}
public int GetCurrentColumn()
{
return _buffer.ColumnNumber;
}
/**
* nfa - true to attempt as an nfa pattern for regexp. This handles most things except the complex repeates, ie {1,4}
*/
public void AddPattern(TokenPattern pattern, bool nfa=true)
{
switch (pattern.Type)
{
case TokenPattern.PatternType.STRING:
try
{
_stringDfaMatcher.AddPattern(pattern);
}
catch (Exception e)
{
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_TOKEN,
pattern.Name,
"error adding string token: " +
e.Message);
}
break;
case TokenPattern.PatternType.REGEXP:
if (nfa)
{
try
{
_nfaMatcher.AddPattern(pattern);
}
catch (Exception)
{
nfa = false;
}
}
if (!nfa)
{
try
{
_regExpMatcher.AddPattern(pattern);
}
catch (Exception e)
{
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_TOKEN,
pattern.Name,
"regular expression contains error(s): " +
e.Message);
}
}
break;
default:
throw new ParserCreationException(
ParserCreationException.ErrorType.INVALID_TOKEN,
pattern.Name,
"pattern type " + pattern.Type +
" is undefined");
}
}
public void Reset(TextReader input)
{
//this.buffer.Dispose();
this._buffer = new ReaderBuffer(input);
this._previousToken = null;
this._lastMatch.Clear();
}
public Token Next()
{
Token token = null;
do
{
token = NextToken();
if (token == null)
{
_previousToken = null;
return null;
}
if (_useTokenList)
{
token.Previous = _previousToken;
_previousToken = token;
}
if (token.Pattern.Ignore)
{
token = null;
}
else if (token.Pattern.Error)
{
throw new ParseException(
ParseException.ErrorType.INVALID_TOKEN,
token.Pattern.ErrorMessage,
token.StartLine,
token.StartColumn);
}
} while (token == null);
return token;
}
private Token NextToken()
{
try
{
_lastMatch.Clear();
_stringDfaMatcher.Match(_buffer, _lastMatch);
_nfaMatcher.Match(_buffer, _lastMatch);
_regExpMatcher.Match(_buffer, _lastMatch);
int line;
int column;
if (_lastMatch.Length > 0)
{
line = _buffer.LineNumber;
column = _buffer.ColumnNumber;
var str = _buffer.Read(_lastMatch.Length);
return NewToken(_lastMatch.Pattern, str, line, column);
}
else if (_buffer.Peek(0) < 0)
{
return null;
}
else
{
line = _buffer.LineNumber;
column = _buffer.ColumnNumber;
throw new ParseException(
ParseException.ErrorType.UNEXPECTED_CHAR,
_buffer.Read(1),
line,
column);
}
}
catch (IOException e)
{
throw new ParseException(ParseException.ErrorType.IO,
e.Message,
-1,
-1);
}
}
protected virtual Token NewToken(TokenPattern pattern,
string image,
int line,
int column)
{
return new Token(pattern, image, line, column);
}
public override string ToString()
{
StringBuilder buffer = new StringBuilder();
buffer.Append(_stringDfaMatcher);
buffer.Append(_nfaMatcher);
buffer.Append(_regExpMatcher);
return buffer.ToString();
}
}
internal abstract class TokenMatcher
{
protected TokenPattern[] Patterns = new TokenPattern[0];
protected bool IgnoreCase = false;
protected TokenMatcher(bool ignoreCase)
{
IgnoreCase = ignoreCase;
}
public abstract void Match(ReaderBuffer buffer, TokenMatch match);
public TokenPattern GetPattern(int id)
{
for (int i = 0; i < Patterns.Length; i++)
{
if (Patterns[i].Id == id)
{
return Patterns[i];
}
}
return null;
}
public virtual void AddPattern(TokenPattern pattern)
{
Array.Resize(ref Patterns, Patterns.Length + 1);
Patterns[Patterns.Length - 1] = pattern;
}
public override string ToString()
{
StringBuilder buffer = new StringBuilder();
for (int i = 0; i < Patterns.Length; i++)
{
buffer.Append(Patterns[i]);
buffer.Append("\n\n");
}
return buffer.ToString();
}
}
internal class StringDFAMatcher : TokenMatcher
{
private readonly TokenStringDFA _automaton = new TokenStringDFA();
public StringDFAMatcher(bool ignoreCase) : base(ignoreCase)
{
}
public override void AddPattern(TokenPattern pattern)
{
_automaton.AddMatch(pattern.Pattern, IgnoreCase, pattern);
base.AddPattern(pattern);
}
public override void Match(ReaderBuffer buffer, TokenMatch match)
{
TokenPattern res = _automaton.Match(buffer, IgnoreCase);
if (res != null)
{
match.Update(res.Pattern.Length, res);
}
}
}
internal class NFAMatcher : TokenMatcher
{
private readonly TokenNFA _automaton = new TokenNFA();
public NFAMatcher(bool ignoreCase) : base(ignoreCase)
{
}
public override void AddPattern(TokenPattern pattern)
{
if (pattern.Type == TokenPattern.PatternType.STRING)
{
_automaton.AddTextMatch(pattern.Pattern, IgnoreCase, pattern);
}
else
{
_automaton.AddRegExpMatch(pattern.Pattern, IgnoreCase, pattern);
}
base.AddPattern(pattern);
}
public override void Match(ReaderBuffer buffer, TokenMatch match)
{
_automaton.Match(buffer, match);
}
}
internal class RegExpMatcher : TokenMatcher
{
private REHandler[] _regExps = new REHandler[0];
public RegExpMatcher(bool ignoreCase) : base(ignoreCase)
{
}
public override void AddPattern(TokenPattern pattern)
{
REHandler re;
try
{
re = new GrammaticaRE(pattern.Pattern, IgnoreCase);
pattern.DebugInfo = "Grammatica regexp\n" + re;
}
catch (Exception)
{
re = new SystemRE(pattern.Pattern, IgnoreCase);
pattern.DebugInfo = "native .NET regexp";
}
Array.Resize(ref _regExps, _regExps.Length + 1);
_regExps[_regExps.Length - 1] = re;
base.AddPattern(pattern);
}
public override void Match(ReaderBuffer buffer, TokenMatch match)
{
for (int i = 0; i < _regExps.Length; i++)
{
int length = _regExps[i].Match(buffer);
if (length > 0)
{
match.Update(length, Patterns[i]);
}
}
}
}
internal abstract class REHandler
{
public abstract int Match(ReaderBuffer buffer);
}
internal class GrammaticaRE : REHandler
{
private readonly RegExp _regExp;
private Matcher _matcher = null;
public GrammaticaRE(string regex, bool ignoreCase)
{
_regExp = new RegExp(regex, ignoreCase);
}
public override int Match(ReaderBuffer buffer)
{
if (_matcher == null)
{
_matcher = _regExp.Matcher(buffer);
}
else
{
_matcher.Reset(buffer);
}
return _matcher.MatchFromBeginning() ? _matcher.Length() : 0;
}
}
internal class SystemRE : REHandler
{
private readonly Regex _reg;
public SystemRE(string regex, bool ignoreCase)
{
if (ignoreCase)
{
_reg = new Regex(regex, RegexOptions.IgnoreCase);
}
else
{
_reg = new Regex(regex);
}
}
public override int Match(ReaderBuffer buffer)
{
Match m;
// Ugly hack since .NET doesn't have a flag for when the
// end of the input string was encountered...
buffer.Peek(1024 * 16);
// Also, there is no API to limit the search to the specified
// position, so we double-check the index afterwards instead.
m = _reg.Match(buffer.ToString(), buffer.Position);
if (m.Success && m.Index == buffer.Position)
{
return m.Length;
}
else
{
return 0;
}
}
}
}