270 lines
7.0 KiB
C#
270 lines
7.0 KiB
C#
using System;
|
|
using System.Collections;
|
|
using System.IO;
|
|
using System.Text;
|
|
|
|
namespace Flee.Parsing
|
|
{
|
|
/**
|
|
* A regular expression character set element. This element
|
|
* matches a single character inside (or outside) a character set.
|
|
* The character set is user defined and may contain ranges of
|
|
* characters. The set may also be inverted, meaning that only
|
|
* characters not inside the set will be considered to match.
|
|
*/
|
|
internal class CharacterSetElement : Element
|
|
{
|
|
public static CharacterSetElement Dot = new CharacterSetElement(false);
|
|
public static CharacterSetElement Digit = new CharacterSetElement(false);
|
|
public static CharacterSetElement NonDigit = new CharacterSetElement(true);
|
|
public static CharacterSetElement Whitespace = new CharacterSetElement(false);
|
|
public static CharacterSetElement NonWhitespace = new CharacterSetElement(true);
|
|
public static CharacterSetElement Word = new CharacterSetElement(false);
|
|
public static CharacterSetElement NonWord = new CharacterSetElement(true);
|
|
private readonly bool _inverted;
|
|
private readonly ArrayList _contents = new ArrayList();
|
|
|
|
public CharacterSetElement(bool inverted)
|
|
{
|
|
this._inverted = inverted;
|
|
}
|
|
|
|
public void AddCharacter(char c)
|
|
{
|
|
_contents.Add(c);
|
|
}
|
|
|
|
public void AddCharacters(string str)
|
|
{
|
|
for (int i = 0; i < str.Length; i++)
|
|
{
|
|
AddCharacter(str[i]);
|
|
}
|
|
}
|
|
|
|
public void AddCharacters(StringElement elem)
|
|
{
|
|
AddCharacters(elem.GetString());
|
|
}
|
|
|
|
public void AddRange(char min, char max)
|
|
{
|
|
_contents.Add(new Range(min, max));
|
|
}
|
|
|
|
public void AddCharacterSet(CharacterSetElement elem)
|
|
{
|
|
_contents.Add(elem);
|
|
}
|
|
|
|
public override object Clone()
|
|
{
|
|
return this;
|
|
}
|
|
|
|
public override int Match(Matcher m,
|
|
ReaderBuffer buffer,
|
|
int start,
|
|
int skip)
|
|
{
|
|
|
|
int c;
|
|
|
|
if (skip != 0)
|
|
{
|
|
return -1;
|
|
}
|
|
c = buffer.Peek(start);
|
|
if (c < 0)
|
|
{
|
|
m.SetReadEndOfString();
|
|
return -1;
|
|
}
|
|
if (m.IsCaseInsensitive())
|
|
{
|
|
c = (int)Char.ToLower((char)c);
|
|
}
|
|
return InSet((char)c) ? 1 : -1;
|
|
}
|
|
|
|
private bool InSet(char c)
|
|
{
|
|
if (this == Dot)
|
|
{
|
|
return InDotSet(c);
|
|
}
|
|
else if (this == Digit || this == NonDigit)
|
|
{
|
|
return InDigitSet(c) != _inverted;
|
|
}
|
|
else if (this == Whitespace || this == NonWhitespace)
|
|
{
|
|
return InWhitespaceSet(c) != _inverted;
|
|
}
|
|
else if (this == Word || this == NonWord)
|
|
{
|
|
return InWordSet(c) != _inverted;
|
|
}
|
|
else
|
|
{
|
|
return InUserSet(c) != _inverted;
|
|
}
|
|
}
|
|
|
|
private bool InDotSet(char c)
|
|
{
|
|
switch (c)
|
|
{
|
|
case '\n':
|
|
case '\r':
|
|
case '\u0085':
|
|
case '\u2028':
|
|
case '\u2029':
|
|
return false;
|
|
default:
|
|
return true;
|
|
}
|
|
}
|
|
|
|
private bool InDigitSet(char c)
|
|
{
|
|
return '0' <= c && c <= '9';
|
|
}
|
|
|
|
private bool InWhitespaceSet(char c)
|
|
{
|
|
switch (c)
|
|
{
|
|
case ' ':
|
|
case '\t':
|
|
case '\n':
|
|
case '\f':
|
|
case '\r':
|
|
case (char)11:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private bool InWordSet(char c)
|
|
{
|
|
return ('a' <= c && c <= 'z')
|
|
|| ('A' <= c && c <= 'Z')
|
|
|| ('0' <= c && c <= '9')
|
|
|| c == '_';
|
|
}
|
|
|
|
private bool InUserSet(char value)
|
|
{
|
|
for (int i = 0; i < _contents.Count; i++)
|
|
{
|
|
var obj = _contents[i];
|
|
if (obj is char)
|
|
{
|
|
var c = (char)obj;
|
|
if (c == value)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
else if (obj is Range)
|
|
{
|
|
var r = (Range)obj;
|
|
if (r.Inside(value))
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
else if (obj is CharacterSetElement)
|
|
{
|
|
var e = (CharacterSetElement)obj;
|
|
if (e.InSet(value))
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
public override void PrintTo(TextWriter output, string indent)
|
|
{
|
|
output.WriteLine(indent + ToString());
|
|
}
|
|
|
|
public override string ToString()
|
|
{
|
|
// Handle predefined character sets
|
|
if (this == Dot)
|
|
{
|
|
return ".";
|
|
}
|
|
else if (this == Digit)
|
|
{
|
|
return "\\d";
|
|
}
|
|
else if (this == NonDigit)
|
|
{
|
|
return "\\D";
|
|
}
|
|
else if (this == Whitespace)
|
|
{
|
|
return "\\s";
|
|
}
|
|
else if (this == NonWhitespace)
|
|
{
|
|
return "\\S";
|
|
}
|
|
else if (this == Word)
|
|
{
|
|
return "\\w";
|
|
}
|
|
else if (this == NonWord)
|
|
{
|
|
return "\\W";
|
|
}
|
|
|
|
// Handle user-defined character sets
|
|
var buffer = new StringBuilder();
|
|
if (_inverted)
|
|
{
|
|
buffer.Append("^[");
|
|
}
|
|
else
|
|
{
|
|
buffer.Append("[");
|
|
}
|
|
for (int i = 0; i < _contents.Count; i++)
|
|
{
|
|
buffer.Append(_contents[i]);
|
|
}
|
|
buffer.Append("]");
|
|
|
|
return buffer.ToString();
|
|
}
|
|
|
|
private class Range
|
|
{
|
|
private readonly char _min;
|
|
private readonly char _max;
|
|
|
|
public Range(char min, char max)
|
|
{
|
|
this._min = min;
|
|
this._max = max;
|
|
}
|
|
|
|
public bool Inside(char c)
|
|
{
|
|
return _min <= c && c <= _max;
|
|
}
|
|
|
|
public override string ToString()
|
|
{
|
|
return _min + "-" + _max;
|
|
}
|
|
}
|
|
}
|
|
}
|