Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 60 additions & 49 deletions jlexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,21 @@ import (
"github.com/josharian/intern"
)

// tokenKind determines type of a token.
type tokenKind byte
// TokenKind determines type of a token.
type TokenKind byte

const (
tokenUndef tokenKind = iota // No token.
tokenDelim // Delimiter: one of '{', '}', '[' or ']'.
tokenString // A string literal, e.g. "abc\u1234"
tokenNumber // Number literal, e.g. 1.5e5
tokenBool // Boolean literal: true or false.
tokenNull // null keyword.
TokenUndef TokenKind = iota // No token.
TokenDelim // Delimiter: one of '{', '}', '[' or ']'.
TokenString // A string literal, e.g. "abc\u1234"
TokenNumber // Number literal, e.g. 1.5e5
TokenBool // Boolean literal: true or false.
TokenNull // null keyword.
)

// token describes a single token: type, position in the input and value.
type token struct {
kind tokenKind // Type of a token.
kind TokenKind // Type of a token.

boolValue bool // Value if a boolean literal token.
byteValueCloned bool // true if byteValue was allocated and does not refer to original json body
Expand All @@ -59,7 +59,7 @@ type Lexer struct {

// FetchToken scans the input for the next token.
func (r *Lexer) FetchToken() {
r.token.kind = tokenUndef
r.token.kind = TokenUndef
r.start = r.pos

// Check if r.Data has r.pos element
Expand Down Expand Up @@ -90,7 +90,7 @@ func (r *Lexer) FetchToken() {
r.errSyntax()
}

r.token.kind = tokenString
r.token.kind = TokenString
r.fetchString()
return

Expand All @@ -99,7 +99,7 @@ func (r *Lexer) FetchToken() {
r.errSyntax()
}
r.firstElement = true
r.token.kind = tokenDelim
r.token.kind = TokenDelim
r.token.delimValue = r.Data[r.pos]
r.pos++
return
Expand All @@ -109,7 +109,7 @@ func (r *Lexer) FetchToken() {
r.errSyntax()
}
r.wantSep = 0
r.token.kind = tokenDelim
r.token.kind = TokenDelim
r.token.delimValue = r.Data[r.pos]
r.pos++
return
Expand All @@ -118,7 +118,7 @@ func (r *Lexer) FetchToken() {
if r.wantSep != 0 {
r.errSyntax()
}
r.token.kind = tokenNumber
r.token.kind = TokenNumber
r.fetchNumber()
return

Expand All @@ -127,7 +127,7 @@ func (r *Lexer) FetchToken() {
r.errSyntax()
}

r.token.kind = tokenNull
r.token.kind = TokenNull
r.fetchNull()
return

Expand All @@ -136,7 +136,7 @@ func (r *Lexer) FetchToken() {
r.errSyntax()
}

r.token.kind = tokenBool
r.token.kind = TokenBool
r.token.boolValue = true
r.fetchTrue()
return
Expand All @@ -146,7 +146,7 @@ func (r *Lexer) FetchToken() {
r.errSyntax()
}

r.token.kind = tokenBool
r.token.kind = TokenBool
r.token.boolValue = false
r.fetchFalse()
return
Expand Down Expand Up @@ -391,7 +391,7 @@ func (r *Lexer) fetchString() {

// scanToken scans the next token if no token is currently available in the lexer.
func (r *Lexer) scanToken() {
if r.token.kind != tokenUndef || r.fatalError != nil {
if r.token.kind != TokenUndef || r.fatalError != nil {
return
}

Expand All @@ -400,7 +400,7 @@ func (r *Lexer) scanToken() {

// consume resets the current token to allow scanning the next one.
func (r *Lexer) consume() {
r.token.kind = tokenUndef
r.token.kind = TokenUndef
r.token.delimValue = 0
}

Expand Down Expand Up @@ -442,10 +442,10 @@ func (r *Lexer) errInvalidToken(expected string) {
switch expected {
case "[":
r.token.delimValue = ']'
r.token.kind = tokenDelim
r.token.kind = TokenDelim
case "{":
r.token.delimValue = '}'
r.token.kind = tokenDelim
r.token.kind = TokenDelim
}
r.addNonfatalError(&LexerError{
Reason: fmt.Sprintf("expected %s", expected),
Expand Down Expand Up @@ -474,7 +474,7 @@ func (r *Lexer) GetPos() int {

// Delim consumes a token and verifies that it is the given delimiter.
func (r *Lexer) Delim(c byte) {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}

Expand All @@ -488,34 +488,34 @@ func (r *Lexer) Delim(c byte) {

// IsDelim returns true if there was no scanning error and next token is the given delimiter.
func (r *Lexer) IsDelim(c byte) bool {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
return !r.Ok() || r.token.delimValue == c
}

// Null verifies that the next token is null and consumes it.
func (r *Lexer) Null() {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() || r.token.kind != tokenNull {
if !r.Ok() || r.token.kind != TokenNull {
r.errInvalidToken("null")
}
r.consume()
}

// IsNull returns true if the next token is a null keyword.
func (r *Lexer) IsNull() bool {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
return r.Ok() && r.token.kind == tokenNull
return r.Ok() && r.token.kind == TokenNull
}

// Skip skips a single token.
func (r *Lexer) Skip() {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
r.consume()
Expand Down Expand Up @@ -611,10 +611,10 @@ func (r *Lexer) Consumed() {
}

func (r *Lexer) unsafeString(skipUnescape bool) (string, []byte) {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() || r.token.kind != tokenString {
if !r.Ok() || r.token.kind != TokenString {
r.errInvalidToken("string")
return "", nil
}
Expand Down Expand Up @@ -654,10 +654,10 @@ func (r *Lexer) UnsafeFieldName(skipUnescape bool) string {

// String reads a string literal.
func (r *Lexer) String() string {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() || r.token.kind != tokenString {
if !r.Ok() || r.token.kind != TokenString {
r.errInvalidToken("string")
return ""
}
Expand All @@ -677,10 +677,10 @@ func (r *Lexer) String() string {

// StringIntern reads a string literal, and performs string interning on it.
func (r *Lexer) StringIntern() string {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() || r.token.kind != tokenString {
if !r.Ok() || r.token.kind != TokenString {
r.errInvalidToken("string")
return ""
}
Expand All @@ -695,10 +695,10 @@ func (r *Lexer) StringIntern() string {

// Bytes reads a string literal and base64 decodes it into a byte slice.
func (r *Lexer) Bytes() []byte {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() || r.token.kind != tokenString {
if !r.Ok() || r.token.kind != TokenString {
r.errInvalidToken("string")
return nil
}
Expand All @@ -717,10 +717,10 @@ func (r *Lexer) Bytes() []byte {

// Bool reads a true or false boolean keyword.
func (r *Lexer) Bool() bool {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() || r.token.kind != tokenBool {
if !r.Ok() || r.token.kind != TokenBool {
r.errInvalidToken("bool")
return false
}
Expand All @@ -730,10 +730,10 @@ func (r *Lexer) Bool() bool {
}

func (r *Lexer) number() string {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() || r.token.kind != tokenNumber {
if !r.Ok() || r.token.kind != TokenNumber {
r.errInvalidToken("number")
return ""
}
Expand Down Expand Up @@ -1137,7 +1137,7 @@ func (r *Lexer) GetNonFatalErrors() []*LexerError {
// JsonNumber fetches and json.Number from 'encoding/json' package.
// Both int, float or string, contains them are valid values
func (r *Lexer) JsonNumber() json.Number {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() {
Expand All @@ -1146,11 +1146,11 @@ func (r *Lexer) JsonNumber() json.Number {
}

switch r.token.kind {
case tokenString:
case TokenString:
return json.Number(r.String())
case tokenNumber:
case TokenNumber:
return json.Number(r.Raw())
case tokenNull:
case TokenNull:
r.Null()
return json.Number("")
default:
Expand All @@ -1161,21 +1161,21 @@ func (r *Lexer) JsonNumber() json.Number {

// Interface fetches an interface{} analogous to the 'encoding/json' package.
func (r *Lexer) Interface() interface{} {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}

if !r.Ok() {
return nil
}
switch r.token.kind {
case tokenString:
case TokenString:
return r.String()
case tokenNumber:
case TokenNumber:
return r.Float64()
case tokenBool:
case TokenBool:
return r.Bool()
case tokenNull:
case TokenNull:
r.Null()
return nil
}
Expand Down Expand Up @@ -1228,3 +1228,14 @@ func (r *Lexer) WantColon() {
r.wantSep = ':'
r.firstElement = false
}

// CurrentToken returns current token kind if there were no errors and TokenUndef otherwise
func (r *Lexer) CurrentToken() TokenKind {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() {
return TokenUndef
}
return r.token.kind
}
13 changes: 13 additions & 0 deletions jlexer/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,3 +368,16 @@ func TestFetchStringUnterminatedString(t *testing.T) {
}
}
}

func TestCurrentToken(t *testing.T) {
data := []byte(`{"foo"`)
tokens := []TokenKind{TokenDelim, TokenString, TokenUndef}
l := Lexer{Data: data}
for _, want := range tokens {
got := l.CurrentToken()
if got != want {
t.Errorf("CurrentToken() = %v; want %v (err %s)", got, want, l.Error())
}
l.Skip()
}
}