public abstract class RegexLexer extends Object
| Modifier and Type | Class and Description |
|---|---|
protected static interface |
RegexLexer.ErrorHandler |
static class |
RegexLexer.ParseGroupNameResult |
protected static class |
RegexLexer.ParseGroupNameResultState |
| Modifier and Type | Field and Description |
|---|---|
protected Map<String,Integer> |
namedCaptureGroups |
protected String |
pattern
The source of the input pattern.
|
protected int |
position
The index of the next character in
RegexLexer.pattern to be parsed. |
RegexSource |
source |
| Constructor and Description |
|---|
RegexLexer(RegexSource source) |
| Modifier and Type | Method and Description |
|---|---|
protected void |
advance() |
protected void |
advance(int len) |
protected boolean |
atEnd() |
protected abstract void |
caseFold(CodePointSetAccumulator charClass)
Case folds a given character class.
|
protected Token |
charClass(int codePoint) |
protected char |
consumeChar() |
protected boolean |
consumingLookahead(Predicate<Character> predicate,
int length) |
protected boolean |
consumingLookahead(String match) |
protected int |
count(Predicate<Character> predicate) |
protected int |
count(Predicate<Character> predicate,
int fromIndex,
int toIndex) |
protected int |
countDecimalDigits() |
protected int |
countFrom(Predicate<Character> predicate,
int fromIndex) |
protected int |
countUpTo(Predicate<Character> predicate,
int max) |
protected char |
curChar() |
protected abstract boolean |
featureEnabledAZPositionAssertions()
Returns
true if \A and \Z position assertions are supported. |
protected abstract boolean |
featureEnabledBoundedQuantifierEmptyMin()
Returns
true if empty minimum values in bounded quantifiers (e.g. |
protected abstract boolean |
featureEnabledCharClassFirstBracketIsLiteral()
Returns
true if the first character in a character class must be interpreted as part
of the character set, even if it is the closing bracket ']'. |
protected abstract boolean |
featureEnabledForwardReferences()
Returns
true if forward references are allowed. |
protected abstract boolean |
featureEnabledGroupComments()
Returns
true if group comments (e.g. |
protected abstract boolean |
featureEnabledIgnoreCase()
Returns
true if ignore-case mode is currently enabled. |
protected abstract boolean |
featureEnabledLineComments()
Returns
true if line comments (e.g. |
protected abstract boolean |
featureEnabledOctalEscapes()
Returns
true if octal escapes (e.g. |
protected abstract boolean |
featureEnabledUnicodePropertyEscapes()
Returns
true if unicode property escapes (e.g. |
protected boolean |
findChars(char... chars) |
protected int |
finishSurrogatePair(char c) |
protected abstract CodePointSet |
getDotCodePointSet()
Returns the code point set represented by the dot operator.
|
protected abstract CodePointSet |
getIdContinue()
Returns the set of all codepoints a group identifier may continue with.
|
protected abstract CodePointSet |
getIdStart()
Returns the set of all codepoints a group identifier may begin with.
|
protected int |
getLastAtomPosition() |
int |
getLastTokenPosition()
Returns the last token's position in the pattern string.
|
protected abstract int |
getMaxBackReferenceDigits()
Returns the maximum number of digits to parse when parsing a back-reference.
|
Map<String,Integer> |
getNamedCaptureGroups() |
protected abstract CodePointSet |
getPredefinedCharClass(char c)
Returns the CodePointSet associated with the given predefined character class (e.g.
|
protected abstract RegexSyntaxException |
handleBoundedQuantifierOutOfOrder()
Handle
{2,1}. |
protected abstract Token |
handleBoundedQuantifierSyntaxError()
Handle syntax errors in bounded quantifiers (missing }, non-digit characters).
|
protected abstract RegexSyntaxException |
handleCCRangeOutOfOrder(int startPos)
Handle out of order character class range elements, e.g.
|
protected abstract void |
handleCCRangeWithPredefCharClass(int startPos)
Handle non-codepoint character class range elements, e.g.
|
protected abstract RegexSyntaxException |
handleEmptyGroupName()
Handle empty group name in group references.
|
protected abstract RegexSyntaxException |
handleGroupRedefinition(String name,
int newId,
int oldId) |
protected abstract void |
handleIncompleteEscapeX()
Handle incomplete hex escapes, e.g.
|
protected abstract void |
handleInvalidBackReference(int reference)
Handle group references to non-existent groups.
|
protected abstract void |
handleInvalidBackReference(String reference)
Handle group references to non-existent groups.
|
protected abstract RegexSyntaxException |
handleInvalidGroupBeginQ()
Handle groups starting with
(? and invalid next char. |
protected abstract void |
handleOctalOutOfRange()
Handle octal values larger than 255.
|
protected abstract void |
handleUnfinishedEscape()
Handle unfinished escape (e.g.
|
protected abstract void |
handleUnfinishedGroupComment()
Handle unfinished group comment
(#...). |
protected abstract RegexSyntaxException |
handleUnfinishedGroupQ()
Handle unfinished group with question mark
(?. |
protected abstract RegexSyntaxException |
handleUnmatchedLeftBracket()
Handle unmatched
[. |
protected abstract void |
handleUnmatchedRightBrace()
Handle unmatched }.
|
protected abstract void |
handleUnmatchedRightBracket()
Handle unmatched
]. |
protected boolean |
hasNamedCaptureGroups()
Checks whether this regular expression contains any named capture groups.
|
boolean |
hasNext() |
protected static boolean |
isDecimalDigit(int c) |
protected boolean |
isEscaped() |
protected static boolean |
isHexDigit(int c) |
protected static boolean |
isOctalDigit(int c) |
protected boolean |
lookahead(Predicate<Character> predicate,
int length) |
protected boolean |
lookahead(String match) |
protected boolean |
lookbehind(char c) |
Token |
next() |
int |
numberOfCaptureGroupsSoFar() |
protected abstract int |
parseCodePointInGroupName()
Parse the next codepoint in a group name and return it.
|
protected abstract Token |
parseCustomEscape(char c)
Parse any escape sequence starting with
\ and the argument c. |
protected abstract int |
parseCustomEscapeChar(char c,
boolean inCharClass)
Parse an escape character sequence (inside character class, or other escapes have already
been tried) starting with
\ and the argument {code c}. |
protected abstract int |
parseCustomEscapeCharFallback(int c,
boolean inCharClass)
Parse an escape character sequence (inside character class, or other escapes have already
been tried) starting with
\ and the code point c.This method is called after
all other means of parsing the escape sequence have been exhausted. |
protected abstract Token |
parseCustomGroupBeginQ(char charAfterQuestionMark)
Parse group starting with
(?. |
protected abstract Token |
parseGroupLt()
Parse group starting with
(<. |
protected RegexLexer.ParseGroupNameResult |
parseGroupName(char terminator)
Parse a
GroupName, i.e. |
protected int |
parseIntSaturated(int firstDigit,
int length,
int returnOnOverflow) |
protected int |
parseOctal(int firstDigit) |
protected void |
registerNamedCaptureGroup(String name) |
protected void |
retreat() |
RegexSyntaxException |
syntaxError(String msg) |
int |
totalNumberOfCaptureGroups() |
public final RegexSource source
protected final String pattern
protected int position
RegexLexer.pattern to be parsed.public RegexLexer(RegexSource source)
protected abstract boolean featureEnabledIgnoreCase()
true if ignore-case mode is currently enabled.protected abstract boolean featureEnabledAZPositionAssertions()
true if \A and \Z position assertions are supported.protected abstract boolean featureEnabledBoundedQuantifierEmptyMin()
true if empty minimum values in bounded quantifiers (e.g. {,1}) are
allowed and treated as zero.protected abstract boolean featureEnabledCharClassFirstBracketIsLiteral()
true if the first character in a character class must be interpreted as part
of the character set, even if it is the closing bracket ']'.protected abstract boolean featureEnabledForwardReferences()
true if forward references are allowed.protected abstract boolean featureEnabledGroupComments()
true if group comments (e.g. (# ... )) are supported.protected abstract boolean featureEnabledLineComments()
true if line comments (e.g. # ... ) are supported.protected abstract boolean featureEnabledOctalEscapes()
true if octal escapes (e.g. \012) are supported.protected abstract boolean featureEnabledUnicodePropertyEscapes()
true if unicode property escapes (e.g. \p{...}) are supported.protected abstract void caseFold(CodePointSetAccumulator charClass)
protected abstract CodePointSet getDotCodePointSet()
protected abstract CodePointSet getIdStart()
protected abstract CodePointSet getIdContinue()
protected abstract int getMaxBackReferenceDigits()
protected abstract CodePointSet getPredefinedCharClass(char c)
\d).
Note that the CodePointSet returned by this function has already been case-folded and negated.
protected abstract RegexSyntaxException handleBoundedQuantifierOutOfOrder()
{2,1}.protected abstract Token handleBoundedQuantifierSyntaxError()
protected abstract RegexSyntaxException handleCCRangeOutOfOrder(int startPos)
[b-a].protected abstract void handleCCRangeWithPredefCharClass(int startPos)
[\w-a].protected abstract RegexSyntaxException handleEmptyGroupName()
protected abstract RegexSyntaxException handleGroupRedefinition(String name, int newId, int oldId)
protected abstract void handleIncompleteEscapeX()
\x1.protected abstract void handleInvalidBackReference(int reference)
protected abstract void handleInvalidBackReference(String reference)
protected abstract RegexSyntaxException handleInvalidGroupBeginQ()
(? and invalid next char.protected abstract void handleOctalOutOfRange()
protected abstract void handleUnfinishedEscape()
\).protected abstract void handleUnfinishedGroupComment()
(#...).protected abstract RegexSyntaxException handleUnfinishedGroupQ()
(?.protected abstract void handleUnmatchedRightBrace()
protected abstract RegexSyntaxException handleUnmatchedLeftBracket()
[.protected abstract void handleUnmatchedRightBracket()
].protected abstract int parseCodePointInGroupName()
throws RegexSyntaxException
RegexSyntaxExceptionprotected abstract Token parseCustomEscape(char c)
\ and the argument c.protected abstract int parseCustomEscapeChar(char c,
boolean inCharClass)
\ and the argument {code c}.protected abstract int parseCustomEscapeCharFallback(int c,
boolean inCharClass)
\ and the code point c.This method is called after
all other means of parsing the escape sequence have been exhausted.protected abstract Token parseCustomGroupBeginQ(char charAfterQuestionMark)
(?.protected abstract Token parseGroupLt()
(<.protected boolean findChars(char... chars)
protected void advance()
protected void retreat()
public boolean hasNext()
public Token next() throws RegexSyntaxException
RegexSyntaxExceptionpublic int getLastTokenPosition()
protected int getLastAtomPosition()
protected char curChar()
protected char consumeChar()
protected void advance(int len)
protected boolean lookahead(String match)
protected boolean consumingLookahead(String match)
protected boolean lookbehind(char c)
protected boolean isEscaped()
protected boolean atEnd()
public int totalNumberOfCaptureGroups()
throws RegexSyntaxException
RegexSyntaxExceptionpublic int numberOfCaptureGroupsSoFar()
public Map<String,Integer> getNamedCaptureGroups() throws RegexSyntaxException
RegexSyntaxExceptionprotected boolean hasNamedCaptureGroups()
throws RegexSyntaxException
This method is a way to check whether we are parsing the goal symbol Pattern[~U, +N] or Pattern[~U, ~N] (see the ECMAScript RegExp grammar).
RegexSyntaxExceptionprotected void registerNamedCaptureGroup(String name)
protected Token charClass(int codePoint)
protected RegexLexer.ParseGroupNameResult parseGroupName(char terminator) throws RegexSyntaxException
GroupName, i.e. <RegExpIdentifierName>, assuming that the opening
< bracket was already read.RegExpIdentifierNameRegexSyntaxExceptionprotected int parseIntSaturated(int firstDigit,
int length,
int returnOnOverflow)
protected int countDecimalDigits()
protected int finishSurrogatePair(char c)
protected int parseOctal(int firstDigit)
public RegexSyntaxException syntaxError(String msg)
protected static boolean isDecimalDigit(int c)
protected static boolean isOctalDigit(int c)
protected static boolean isHexDigit(int c)