Files
Flee/Parsing/TokenStringDFA.cs

215 lines
5.6 KiB
C#

using System;
using System.Text;
namespace Flee.Parsing
{
/**
* A deterministic finite state automaton for matching exact strings.
* It uses a sorted binary tree representation of the state
* transitions in order to enable quick matches with a minimal memory
* footprint. It only supports a single character transition between
* states, but may be run in an all case-insensitive mode.
*/
internal class TokenStringDFA
{
private readonly DFAState[] _ascii = new DFAState[128];
private readonly DFAState _nonAscii = new DFAState();
public TokenStringDFA()
{
}
public void AddMatch(string str, bool caseInsensitive, TokenPattern value)
{
DFAState state;
char c = str[0];
int start = 0;
if (caseInsensitive)
{
c = Char.ToLower(c);
}
if (c < 128)
{
state = _ascii[c];
if (state == null)
{
state = _ascii[c] = new DFAState();
}
start++;
}
else
{
state = _nonAscii;
}
for (int i = start; i < str.Length; i++)
{
var next = state.Tree.Find(str[i], caseInsensitive);
if (next == null)
{
next = new DFAState();
state.Tree.Add(str[i], caseInsensitive, next);
}
state = next;
}
state.Value = value;
}
public TokenPattern Match(ReaderBuffer buffer, bool caseInsensitive)
{
TokenPattern result = null;
DFAState state;
int pos = 0;
var c = buffer.Peek(0);
if (c < 0)
{
return null;
}
if (caseInsensitive)
{
c = Char.ToLower((char)c);
}
if (c < 128)
{
state = _ascii[c];
if (state == null)
{
return null;
}
else if (state.Value != null)
{
result = state.Value;
}
pos++;
}
else
{
state = _nonAscii;
}
while ((c = buffer.Peek(pos)) >= 0)
{
state = state.Tree.Find((char)c, caseInsensitive);
if (state == null)
{
break;
}
else if (state.Value != null)
{
result = state.Value;
}
pos++;
}
return result;
}
public override string ToString()
{
StringBuilder buffer = new StringBuilder();
for (int i = 0; i < _ascii.Length; i++)
{
if (_ascii[i] != null)
{
buffer.Append((char)i);
if (_ascii[i].Value != null)
{
buffer.Append(": ");
buffer.Append(_ascii[i].Value);
buffer.Append("\n");
}
_ascii[i].Tree.PrintTo(buffer, " ");
}
}
_nonAscii.Tree.PrintTo(buffer, "");
return buffer.ToString();
}
}
internal class DFAState
{
internal TokenPattern Value;
internal TransitionTree Tree = new TransitionTree();
}
internal class TransitionTree
{
private char _value = '\0';
private DFAState _state;
private TransitionTree _left;
private TransitionTree _right;
public TransitionTree()
{
}
public DFAState Find(char c, bool lowerCase)
{
if (lowerCase)
{
c = Char.ToLower(c);
}
if (_value == '\0' || _value == c)
{
return _state;
}
else if (_value > c)
{
return _left.Find(c, false);
}
else
{
return _right.Find(c, false);
}
}
public void Add(char c, bool lowerCase, DFAState state)
{
if (lowerCase)
{
c = Char.ToLower(c);
}
if (_value == '\0')
{
this._value = c;
this._state = state;
this._left = new TransitionTree();
this._right = new TransitionTree();
}
else if (_value > c)
{
_left.Add(c, false, state);
}
else
{
_right.Add(c, false, state);
}
}
public void PrintTo(StringBuilder buffer, String indent)
{
_left?.PrintTo(buffer, indent);
if (this._value != '\0')
{
if (buffer.Length > 0 && buffer[buffer.Length - 1] == '\n')
{
buffer.Append(indent);
}
buffer.Append(this._value);
if (this._state.Value != null)
{
buffer.Append(": ");
buffer.Append(this._state.Value);
buffer.Append("\n");
}
this._state.Tree.PrintTo(buffer, indent + " ");
}
_right?.PrintTo(buffer, indent);
}
}
}