Class LexingCommon

java.lang.Object
org.jruby.lexer.LexingCommon
Direct Known Subclasses:
RubyLexer, RubyLexer

public abstract class LexingCommon extends Object
Code and constants common to both ripper and main parser.
  • Field Details

    • EXPR_BEG

      public static final int EXPR_BEG
      See Also:
    • EXPR_END

      public static final int EXPR_END
      See Also:
    • EXPR_ENDARG

      public static final int EXPR_ENDARG
      See Also:
    • EXPR_ENDFN

      public static final int EXPR_ENDFN
      See Also:
    • EXPR_ARG

      public static final int EXPR_ARG
      See Also:
    • EXPR_CMDARG

      public static final int EXPR_CMDARG
      See Also:
    • EXPR_MID

      public static final int EXPR_MID
      See Also:
    • EXPR_FNAME

      public static final int EXPR_FNAME
      See Also:
    • EXPR_DOT

      public static final int EXPR_DOT
      See Also:
    • EXPR_CLASS

      public static final int EXPR_CLASS
      See Also:
    • EXPR_LABEL

      public static final int EXPR_LABEL
      See Also:
    • EXPR_LABELED

      public static final int EXPR_LABELED
      See Also:
    • EXPR_FITEM

      public static final int EXPR_FITEM
      See Also:
    • EXPR_VALUE

      public static final int EXPR_VALUE
      See Also:
    • EXPR_BEG_ANY

      public static final int EXPR_BEG_ANY
      See Also:
    • EXPR_ARG_ANY

      public static final int EXPR_ARG_ANY
      See Also:
    • EXPR_END_ANY

      public static final int EXPR_END_ANY
      See Also:
    • braceNest

      protected int braceNest
    • commandStart

      public boolean commandStart
    • conditionState

      protected StackState conditionState
    • cmdArgumentState

      protected StackState cmdArgumentState
    • __end__seen

      protected boolean __end__seen
    • eofp

      public boolean eofp
    • has_shebang

      protected boolean has_shebang
    • heredoc_end

      protected int heredoc_end
    • heredoc_indent

      protected int heredoc_indent
    • heredoc_line_indent

      protected int heredoc_line_indent
    • last_cr_line

      protected int last_cr_line
    • last_state

      protected int last_state
    • lexb

      public ByteList lexb
    • lex_lastline

      public ByteList lex_lastline
    • lex_nextline

      protected ByteList lex_nextline
    • lex_p

      public int lex_p
    • lex_pbeg

      protected int lex_pbeg
    • lex_pend

      public int lex_pend
    • lex_state

      protected int lex_state
    • line_count

      protected int line_count
    • line_offset

      protected int line_offset
    • parenNest

      protected int parenNest
    • ruby_sourceline

      protected int ruby_sourceline
    • src

      protected LexerSource src
    • token

      protected int token
    • tokenSeen

      protected boolean tokenSeen
    • tokline

      public int tokline
    • tokp

      public int tokp
    • yaccValue

      protected Object yaccValue
    • start

      public long start
    • end

      public long end
    • AND_KEYWORD

      public static final ByteList AND_KEYWORD
    • BACKTICK

      public static final ByteList BACKTICK
    • EQ_EQ_EQ

      public static final ByteList EQ_EQ_EQ
    • EQ_EQ

      public static final ByteList EQ_EQ
    • EQ_TILDE

      public static final ByteList EQ_TILDE
    • EQ_GT

      public static final ByteList EQ_GT
    • EQ

      public static final ByteList EQ
    • AMPERSAND_AMPERSAND

      public static final ByteList AMPERSAND_AMPERSAND
    • AMPERSAND

      public static final ByteList AMPERSAND
    • AMPERSAND_DOT

      public static final ByteList AMPERSAND_DOT
    • BANG

      public static final ByteList BANG
    • BANG_EQ

      public static final ByteList BANG_EQ
    • BANG_TILDE

      public static final ByteList BANG_TILDE
    • CARET

      public static final ByteList CARET
    • COLON_COLON

      public static final ByteList COLON_COLON
    • COLON

      public static final ByteList COLON
    • COMMA

      public static final ByteList COMMA
    • DOT_DOT_DOT

      public static final ByteList DOT_DOT_DOT
    • DOT_DOT

      public static final ByteList DOT_DOT
    • DOT

      public static final ByteList DOT
    • GT_EQ

      public static final ByteList GT_EQ
    • GT_GT

      public static final ByteList GT_GT
    • GT

      public static final ByteList GT
    • LBRACKET_RBRACKET_EQ

      public static final ByteList LBRACKET_RBRACKET_EQ
    • LBRACKET_RBRACKET

      public static final ByteList LBRACKET_RBRACKET
    • LBRACKET

      public static final ByteList LBRACKET
    • LCURLY

      public static final ByteList LCURLY
    • LT_EQ_RT

      public static final ByteList LT_EQ_RT
    • LT_EQ

      public static final ByteList LT_EQ
    • LT_LT

      public static final ByteList LT_LT
    • LT

      public static final ByteList LT
    • MINUS_AT

      public static final ByteList MINUS_AT
    • MINUS

      public static final ByteList MINUS
    • MINUS_GT

      public static final ByteList MINUS_GT
    • NIL

      public static final ByteList NIL
    • PERCENT

      public static final ByteList PERCENT
    • OR_OR

      public static final ByteList OR_OR
    • OR

      public static final ByteList OR
    • OR_KEYWORD

      public static final ByteList OR_KEYWORD
    • PLUS_AT

      public static final ByteList PLUS_AT
    • PLUS

      public static final ByteList PLUS
    • QUESTION

      public static final ByteList QUESTION
    • RBRACKET

      public static final ByteList RBRACKET
    • RCURLY

      public static final ByteList RCURLY
    • RPAREN

      public static final ByteList RPAREN
    • Q

      public static final ByteList Q
    • SLASH

      public static final ByteList SLASH
    • STAR

      public static final ByteList STAR
    • STAR_STAR

      public static final ByteList STAR_STAR
    • TILDE

      public static final ByteList TILDE
    • QQ

      public static final ByteList QQ
    • SEMICOLON

      public static final ByteList SEMICOLON
    • BACKSLASH

      public static final ByteList BACKSLASH
    • CALL

      public static final ByteList CALL
    • DOLLAR_BANG

      public static final ByteList DOLLAR_BANG
    • DOLLAR_UNDERSCORE

      public static final ByteList DOLLAR_UNDERSCORE
    • DOLLAR_DOT

      public static final ByteList DOLLAR_DOT
    • KWNOREST

      public static final ByteList KWNOREST
    • TAB_WIDTH

      public static final int TAB_WIDTH
      See Also:
    • STR_FUNC_ESCAPE

      public static final int STR_FUNC_ESCAPE
      See Also:
    • STR_FUNC_EXPAND

      public static final int STR_FUNC_EXPAND
      See Also:
    • STR_FUNC_REGEXP

      public static final int STR_FUNC_REGEXP
      See Also:
    • STR_FUNC_QWORDS

      public static final int STR_FUNC_QWORDS
      See Also:
    • STR_FUNC_SYMBOL

      public static final int STR_FUNC_SYMBOL
      See Also:
    • STR_FUNC_INDENT

      public static final int STR_FUNC_INDENT
      See Also:
    • STR_FUNC_LABEL

      public static final int STR_FUNC_LABEL
      See Also:
    • STR_FUNC_LIST

      public static final int STR_FUNC_LIST
      See Also:
    • STR_FUNC_TERM

      public static final int STR_FUNC_TERM
      See Also:
    • str_label

      public static final int str_label
      See Also:
    • str_squote

      public static final int str_squote
      See Also:
    • str_dquote

      public static final int str_dquote
      See Also:
    • str_xquote

      public static final int str_xquote
      See Also:
    • str_regexp

      public static final int str_regexp
      See Also:
    • str_sword

      public static final int str_sword
      See Also:
    • str_dword

      public static final int str_dword
      See Also:
    • str_ssym

      public static final int str_ssym
      See Also:
    • str_dsym

      public static final int str_dsym
      See Also:
    • EOF

      public static final int EOF
      See Also:
    • END_MARKER

      public static ByteList END_MARKER
    • BEGIN_DOC_MARKER

      public static ByteList BEGIN_DOC_MARKER
    • END_DOC_MARKER

      public static ByteList END_DOC_MARKER
    • CODING

      public static ByteList CODING
    • UTF8_ENCODING

      public static final org.jcodings.Encoding UTF8_ENCODING
    • USASCII_ENCODING

      public static final org.jcodings.Encoding USASCII_ENCODING
    • ASCII8BIT_ENCODING

      public static final org.jcodings.Encoding ASCII8BIT_ENCODING
    • SUFFIX_R

      public static final int SUFFIX_R
      See Also:
    • SUFFIX_I

      public static final int SUFFIX_I
      See Also:
    • SUFFIX_ALL

      public static final int SUFFIX_ALL
      See Also:
  • Constructor Details

  • Method Details

    • column

      public int column()
    • set_yylval_id

      protected void set_yylval_id(ByteList id)
    • set_yylval_name

      protected void set_yylval_name(ByteList name)
    • id

      public ByteList id()
    • updateTokenPosition

      protected void updateTokenPosition()
    • updateStartPosition

      protected void updateStartPosition(int column)
    • compile_error_pos

      public void compile_error_pos(String message)
    • comment_at_top

      protected boolean comment_at_top()
    • getRubySourceline

      public int getRubySourceline()
    • setRubySourceline

      public void setRubySourceline(int line)
    • createTokenByteList

      public ByteList createTokenByteList()
    • createTokenByteList

      public ByteList createTokenByteList(int start)
    • createTokenString

      public String createTokenString(int start)
    • createAsEncodedString

      public String createAsEncodedString(byte[] bytes, int start, int length)
    • createTokenString

      public String createTokenString()
    • dedent_string

      public static int dedent_string(ByteList string, int width)
    • flush

      public void flush()
    • getLexContext

      public LexContext getLexContext()
    • setLexContext

      public void setLexContext(LexContext context)
    • getBraceNest

      public int getBraceNest()
    • getCmdArgumentState

      public StackState getCmdArgumentState()
    • getConditionState

      public StackState getConditionState()
    • getCurrentArg

      public ByteList getCurrentArg()
    • getCurrentLine

      public String getCurrentLine()
    • getEncoding

      public org.jcodings.Encoding getEncoding()
    • getFile

      public String getFile()
    • getHeredocIndent

      public int getHeredocIndent()
    • getHeredocLineIndent

      public int getHeredocLineIndent()
    • getLeftParenBegin

      public int getLeftParenBegin()
    • isLambdaBeginning

      protected boolean isLambdaBeginning()
    • getLineOffset

      public int getLineOffset()
    • getState

      public int getState()
    • getTokenCR

      public int getTokenCR()
    • getParenNest

      public int getParenNest()
    • incrementParenNest

      public int incrementParenNest()
    • isEndSeen

      public boolean isEndSeen()
    • isLookingAtEOL

      public boolean isLookingAtEOL()
    • isASCII

      public boolean isASCII()
    • isASCII

      public static boolean isASCII(int c)
    • peekVariableName

      public int peekVariableName(int tSTRING_DVAR, int tSTRING_DBEG) throws IOException
      Throws:
      IOException
    • isGlobalCharPunct

      public boolean isGlobalCharPunct(int c)
    • isIdentifierChar

      public static boolean isIdentifierChar(int c)
      This is a valid character for an identifier?
      Parameters:
      c - is character to be compared
      Returns:
      whether c is an identifier or not mri: is_identchar
    • lex_goto_eol

      public void lex_goto_eol()
    • lineno

      public int lineno()
    • magicCommentEncoding

      protected void magicCommentEncoding(ByteList encoding)
    • newtok

      public void newtok(boolean unreadOnce)
    • numberLiteralSuffix

      protected int numberLiteralSuffix(int mask)
    • parser_prepare

      public void parser_prepare()
    • p

      public int p(int offset)
    • peek

      public boolean peek(int c)
    • peek

      protected boolean peek(int c, int n)
    • precise_mbclen

      public int precise_mbclen()
    • printState

      public void printState()
    • pushback

      public void pushback(int c)
    • reset

      public void reset()
    • resetStacks

      public void resetStacks()
    • scanOct

      protected char scanOct(int count) throws IOException
      Throws:
      IOException
    • setCurrentArg

      public void setCurrentArg(ByteList current_arg)
    • setCurrentEncoding

      public void setCurrentEncoding(org.jcodings.Encoding encoding)
    • setEncoding

      public void setEncoding(org.jcodings.Encoding encoding)
    • set_file_encoding

      protected void set_file_encoding(int str, int send)
    • setHeredocLineIndent

      public void setHeredocLineIndent(int heredoc_line_indent)
    • setHeredocIndent

      public void setHeredocIndent(int heredoc_indent)
    • setBraceNest

      public void setBraceNest(int nest)
    • setLeftParenBegin

      public void setLeftParenBegin(int value)
    • setSource

      public void setSource(LexerSource source)
      Allow the parser to set the source for its lexer.
      Parameters:
      source - where the lexer gets raw data
    • setState

      public void setState(int state)
    • setValue

      public void setValue(Object yaccValue)
    • strncmp

      protected boolean strncmp(ByteList one, ByteList two, int length)
    • tokAdd

      public void tokAdd(int first_byte, ByteList buffer)
    • tokCopy

      public void tokCopy(int length, ByteList buffer)
    • tokadd_ident

      public boolean tokadd_ident(int c)
    • tokadd_mbchar

      public boolean tokadd_mbchar(int first_byte)
      This differs from MRI in a few ways. This version does not apply value to a separate token buffer. It is for use when we know we will not be omitting or including ant non-syntactical characters. Use tokadd_mbchar(int, ByteList) if the string differs from actual source. Secondly, this returns a boolean instead of the first byte passed. MRI only used the return value as a success/failure code to return EOF. Because this version does not use a separate token buffer we only just increment lex_p. When we reach end of the token it will just get the bytes directly from source directly.
    • tokadd_mbchar

      public boolean tokadd_mbchar(int first_byte, ByteList buffer)
    • tokaddmbc

      public void tokaddmbc(int codepoint, ByteList buffer)
      This looks deceptively like tokadd_mbchar(int, ByteList) but it differs in that it uses the bytelists encoding and the first parameter is a full codepoint and not the first byte of a mbc sequence.
    • token

      public int token()
      Last token read from the lexer at the end of a call to yylex()
      Returns:
      last token read
    • update_heredoc_indent

      public boolean update_heredoc_indent(int c)
    • validateFormalIdentifier

      public void validateFormalIdentifier(ByteList identifier)
    • validateFormalIdentifier

      @Deprecated public void validateFormalIdentifier(String identifier)
      Deprecated.
    • value

      public Object value()
      Value of last token (if it is a token which has a value).
      Returns:
      value of last value-laden token
    • warn_balanced

      protected int warn_balanced(int c, boolean spaceSeen, int token, String op, String syn)
    • was_bol

      public boolean was_bol()
    • whole_match_p

      public boolean whole_match_p(ByteList eos, boolean indent)
    • ambiguousOperator

      protected abstract void ambiguousOperator(String op, String syn)
    • compile_error

      public abstract void compile_error(String message)
    • parse_error

      public abstract void parse_error(String message)
    • nextc

      public abstract int nextc()
    • setCompileOptionFlag

      protected abstract void setCompileOptionFlag(String name, ByteList value)
    • setEncoding

      protected abstract void setEncoding(ByteList name)
    • setTokenInfo

      protected abstract void setTokenInfo(String name, ByteList value)
    • tokenize_ident

      public abstract int tokenize_ident(int result)
    • isHexChar

      public static boolean isHexChar(int c)
      Parameters:
      c - the character to test
      Returns:
      true if character is a hex value (0-9a-f)
    • IS_lex_state

      public static boolean IS_lex_state(int state, int mask)
    • IS_lex_state_all

      protected boolean IS_lex_state_all(int state, int mask)
    • ISSPACE

      protected static boolean ISSPACE(int c)
    • IS_ARG

      protected boolean IS_ARG()
    • IS_END

      protected boolean IS_END()
    • IS_BEG

      protected boolean IS_BEG()
    • IS_SPCARG

      protected boolean IS_SPCARG(int c, boolean spaceSeen)
    • IS_LABEL_POSSIBLE

      protected boolean IS_LABEL_POSSIBLE(boolean commandState)
    • IS_LABEL_SUFFIX

      public boolean IS_LABEL_SUFFIX()
    • IS_AFTER_OPERATOR

      protected boolean IS_AFTER_OPERATOR()
    • isNext_identchar

      protected boolean isNext_identchar() throws IOException
      Throws:
      IOException
    • isOctChar

      public static boolean isOctChar(int c)
      Parameters:
      c - the character to test
      Returns:
      true if character is an octal value (0-7)
    • isSpace

      public static boolean isSpace(int c)
    • magicCommentMarker

      public static int magicCommentMarker(ByteList str, int begin)
    • parser_magic_comment

      public boolean parser_magic_comment(ByteList magicLine)
    • onMagicComment

      protected boolean onMagicComment(String name, ByteList value)
    • parseRegexpFlags

      protected abstract RegexpOptions parseRegexpFlags() throws IOException
      Throws:
      IOException
    • parseRegexpFlags

      protected RegexpOptions parseRegexpFlags(StringBuilder unknownFlags) throws IOException
      Throws:
      IOException
    • checkRegexpFragment

      public void checkRegexpFragment(Ruby runtime, ByteList value, RegexpOptions options)
    • checkRegexpSyntax

      public void checkRegexpSyntax(Ruby runtime, ByteList value, RegexpOptions options)
    • mismatchedRegexpEncodingError

      protected abstract void mismatchedRegexpEncodingError(org.jcodings.Encoding optionEncoding, org.jcodings.Encoding encoding)
    • setRegexpEncoding

      public void setRegexpEncoding(Ruby runtime, ByteList value, RegexpOptions options)
    • optionsEncodingChar

      protected char optionsEncodingChar(org.jcodings.Encoding optionEncoding)
    • scanHex

      protected int scanHex(int count, boolean strict, String errorMessage)
      Read up to count hexadecimal digits. If strict is provided then count number of hex digits must be present. If no digits can be read a syntax exception will be thrown.
    • readEscape

      public int readEscape() throws IOException
      Throws:
      IOException
    • scanHexLiteral

      protected char scanHexLiteral(ByteList buffer, int count, boolean strict, String errorMessage)
      Read up to count hexadecimal digits and store those digits in a token numberBuffer. If strict is provided then count number of hex digits must be present. If no digits can be read a syntax exception will be thrown. This will also return the codepoint as a value so codepoint ranges can be checked.
    • tokHex

      protected int tokHex(int count, String errorMessage)
    • readUTF8EscapeIntoBuffer

      protected void readUTF8EscapeIntoBuffer(int codepoint, ByteList buffer, boolean stringLiteral, boolean[] encodingDetermined) throws IOException
      Throws:
      IOException
    • readUTFEscape

      public int readUTFEscape(ByteList buffer, boolean stringLiteral, boolean[] encodingDetermined) throws IOException
      Throws:
      IOException
    • readUTFEscapeRegexpLiteral

      public void readUTFEscapeRegexpLiteral(ByteList buffer)