Blame vendor/github.com/jmespath/go-jmespath/lexer.go

Packit 63bb0d
package jmespath
Packit 63bb0d
Packit 63bb0d
import (
Packit 63bb0d
	"bytes"
Packit 63bb0d
	"encoding/json"
Packit 63bb0d
	"fmt"
Packit 63bb0d
	"strconv"
Packit 63bb0d
	"strings"
Packit 63bb0d
	"unicode/utf8"
Packit 63bb0d
)
Packit 63bb0d
Packit 63bb0d
type token struct {
Packit 63bb0d
	tokenType tokType
Packit 63bb0d
	value     string
Packit 63bb0d
	position  int
Packit 63bb0d
	length    int
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
type tokType int
Packit 63bb0d
Packit 63bb0d
const eof = -1
Packit 63bb0d
Packit 63bb0d
// Lexer contains information about the expression being tokenized.
Packit 63bb0d
type Lexer struct {
Packit 63bb0d
	expression string       // The expression provided by the user.
Packit 63bb0d
	currentPos int          // The current position in the string.
Packit 63bb0d
	lastWidth  int          // The width of the current rune.  This
Packit 63bb0d
	buf        bytes.Buffer // Internal buffer used for building up values.
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
// SyntaxError is the main error used whenever a lexing or parsing error occurs.
Packit 63bb0d
type SyntaxError struct {
Packit 63bb0d
	msg        string // Error message displayed to user
Packit 63bb0d
	Expression string // Expression that generated a SyntaxError
Packit 63bb0d
	Offset     int    // The location in the string where the error occurred
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
func (e SyntaxError) Error() string {
Packit 63bb0d
	// In the future, it would be good to underline the specific
Packit 63bb0d
	// location where the error occurred.
Packit 63bb0d
	return "SyntaxError: " + e.msg
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
// HighlightLocation will show where the syntax error occurred.
Packit 63bb0d
// It will place a "^" character on a line below the expression
Packit 63bb0d
// at the point where the syntax error occurred.
Packit 63bb0d
func (e SyntaxError) HighlightLocation() string {
Packit 63bb0d
	return e.Expression + "\n" + strings.Repeat(" ", e.Offset) + "^"
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
//go:generate stringer -type=tokType
Packit 63bb0d
const (
Packit 63bb0d
	tUnknown tokType = iota
Packit 63bb0d
	tStar
Packit 63bb0d
	tDot
Packit 63bb0d
	tFilter
Packit 63bb0d
	tFlatten
Packit 63bb0d
	tLparen
Packit 63bb0d
	tRparen
Packit 63bb0d
	tLbracket
Packit 63bb0d
	tRbracket
Packit 63bb0d
	tLbrace
Packit 63bb0d
	tRbrace
Packit 63bb0d
	tOr
Packit 63bb0d
	tPipe
Packit 63bb0d
	tNumber
Packit 63bb0d
	tUnquotedIdentifier
Packit 63bb0d
	tQuotedIdentifier
Packit 63bb0d
	tComma
Packit 63bb0d
	tColon
Packit 63bb0d
	tLT
Packit 63bb0d
	tLTE
Packit 63bb0d
	tGT
Packit 63bb0d
	tGTE
Packit 63bb0d
	tEQ
Packit 63bb0d
	tNE
Packit 63bb0d
	tJSONLiteral
Packit 63bb0d
	tStringLiteral
Packit 63bb0d
	tCurrent
Packit 63bb0d
	tExpref
Packit 63bb0d
	tAnd
Packit 63bb0d
	tNot
Packit 63bb0d
	tEOF
Packit 63bb0d
)
Packit 63bb0d
Packit 63bb0d
var basicTokens = map[rune]tokType{
Packit 63bb0d
	'.': tDot,
Packit 63bb0d
	'*': tStar,
Packit 63bb0d
	',': tComma,
Packit 63bb0d
	':': tColon,
Packit 63bb0d
	'{': tLbrace,
Packit 63bb0d
	'}': tRbrace,
Packit 63bb0d
	']': tRbracket, // tLbracket not included because it could be "[]"
Packit 63bb0d
	'(': tLparen,
Packit 63bb0d
	')': tRparen,
Packit 63bb0d
	'@': tCurrent,
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
// Bit mask for [a-zA-Z_] shifted down 64 bits to fit in a single uint64.
Packit 63bb0d
// When using this bitmask just be sure to shift the rune down 64 bits
Packit 63bb0d
// before checking against identifierStartBits.
Packit 63bb0d
const identifierStartBits uint64 = 576460745995190270
Packit 63bb0d
Packit 63bb0d
// Bit mask for [a-zA-Z0-9], 128 bits -> 2 uint64s.
Packit 63bb0d
var identifierTrailingBits = [2]uint64{287948901175001088, 576460745995190270}
Packit 63bb0d
Packit 63bb0d
var whiteSpace = map[rune]bool{
Packit 63bb0d
	' ': true, '\t': true, '\n': true, '\r': true,
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
func (t token) String() string {
Packit 63bb0d
	return fmt.Sprintf("Token{%+v, %s, %d, %d}",
Packit 63bb0d
		t.tokenType, t.value, t.position, t.length)
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
// NewLexer creates a new JMESPath lexer.
Packit 63bb0d
func NewLexer() *Lexer {
Packit 63bb0d
	lexer := Lexer{}
Packit 63bb0d
	return &lexer
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
func (lexer *Lexer) next() rune {
Packit 63bb0d
	if lexer.currentPos >= len(lexer.expression) {
Packit 63bb0d
		lexer.lastWidth = 0
Packit 63bb0d
		return eof
Packit 63bb0d
	}
Packit 63bb0d
	r, w := utf8.DecodeRuneInString(lexer.expression[lexer.currentPos:])
Packit 63bb0d
	lexer.lastWidth = w
Packit 63bb0d
	lexer.currentPos += w
Packit 63bb0d
	return r
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
func (lexer *Lexer) back() {
Packit 63bb0d
	lexer.currentPos -= lexer.lastWidth
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
func (lexer *Lexer) peek() rune {
Packit 63bb0d
	t := lexer.next()
Packit 63bb0d
	lexer.back()
Packit 63bb0d
	return t
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
// tokenize takes an expression and returns corresponding tokens.
Packit 63bb0d
func (lexer *Lexer) tokenize(expression string) ([]token, error) {
Packit 63bb0d
	var tokens []token
Packit 63bb0d
	lexer.expression = expression
Packit 63bb0d
	lexer.currentPos = 0
Packit 63bb0d
	lexer.lastWidth = 0
Packit 63bb0d
loop:
Packit 63bb0d
	for {
Packit 63bb0d
		r := lexer.next()
Packit 63bb0d
		if identifierStartBits&(1<<(uint64(r)-64)) > 0 {
Packit 63bb0d
			t := lexer.consumeUnquotedIdentifier()
Packit 63bb0d
			tokens = append(tokens, t)
Packit 63bb0d
		} else if val, ok := basicTokens[r]; ok {
Packit 63bb0d
			// Basic single char token.
Packit 63bb0d
			t := token{
Packit 63bb0d
				tokenType: val,
Packit 63bb0d
				value:     string(r),
Packit 63bb0d
				position:  lexer.currentPos - lexer.lastWidth,
Packit 63bb0d
				length:    1,
Packit 63bb0d
			}
Packit 63bb0d
			tokens = append(tokens, t)
Packit 63bb0d
		} else if r == '-' || (r >= '0' && r <= '9') {
Packit 63bb0d
			t := lexer.consumeNumber()
Packit 63bb0d
			tokens = append(tokens, t)
Packit 63bb0d
		} else if r == '[' {
Packit 63bb0d
			t := lexer.consumeLBracket()
Packit 63bb0d
			tokens = append(tokens, t)
Packit 63bb0d
		} else if r == '"' {
Packit 63bb0d
			t, err := lexer.consumeQuotedIdentifier()
Packit 63bb0d
			if err != nil {
Packit 63bb0d
				return tokens, err
Packit 63bb0d
			}
Packit 63bb0d
			tokens = append(tokens, t)
Packit 63bb0d
		} else if r == '\'' {
Packit 63bb0d
			t, err := lexer.consumeRawStringLiteral()
Packit 63bb0d
			if err != nil {
Packit 63bb0d
				return tokens, err
Packit 63bb0d
			}
Packit 63bb0d
			tokens = append(tokens, t)
Packit 63bb0d
		} else if r == '`' {
Packit 63bb0d
			t, err := lexer.consumeLiteral()
Packit 63bb0d
			if err != nil {
Packit 63bb0d
				return tokens, err
Packit 63bb0d
			}
Packit 63bb0d
			tokens = append(tokens, t)
Packit 63bb0d
		} else if r == '|' {
Packit 63bb0d
			t := lexer.matchOrElse(r, '|', tOr, tPipe)
Packit 63bb0d
			tokens = append(tokens, t)
Packit 63bb0d
		} else if r == '<' {
Packit 63bb0d
			t := lexer.matchOrElse(r, '=', tLTE, tLT)
Packit 63bb0d
			tokens = append(tokens, t)
Packit 63bb0d
		} else if r == '>' {
Packit 63bb0d
			t := lexer.matchOrElse(r, '=', tGTE, tGT)
Packit 63bb0d
			tokens = append(tokens, t)
Packit 63bb0d
		} else if r == '!' {
Packit 63bb0d
			t := lexer.matchOrElse(r, '=', tNE, tNot)
Packit 63bb0d
			tokens = append(tokens, t)
Packit 63bb0d
		} else if r == '=' {
Packit 63bb0d
			t := lexer.matchOrElse(r, '=', tEQ, tUnknown)
Packit 63bb0d
			tokens = append(tokens, t)
Packit 63bb0d
		} else if r == '&' {
Packit 63bb0d
			t := lexer.matchOrElse(r, '&', tAnd, tExpref)
Packit 63bb0d
			tokens = append(tokens, t)
Packit 63bb0d
		} else if r == eof {
Packit 63bb0d
			break loop
Packit 63bb0d
		} else if _, ok := whiteSpace[r]; ok {
Packit 63bb0d
			// Ignore whitespace
Packit 63bb0d
		} else {
Packit 63bb0d
			return tokens, lexer.syntaxError(fmt.Sprintf("Unknown char: %s", strconv.QuoteRuneToASCII(r)))
Packit 63bb0d
		}
Packit 63bb0d
	}
Packit 63bb0d
	tokens = append(tokens, token{tEOF, "", len(lexer.expression), 0})
Packit 63bb0d
	return tokens, nil
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
// Consume characters until the ending rune "r" is reached.
Packit 63bb0d
// If the end of the expression is reached before seeing the
Packit 63bb0d
// terminating rune "r", then an error is returned.
Packit 63bb0d
// If no error occurs then the matching substring is returned.
Packit 63bb0d
// The returned string will not include the ending rune.
Packit 63bb0d
func (lexer *Lexer) consumeUntil(end rune) (string, error) {
Packit 63bb0d
	start := lexer.currentPos
Packit 63bb0d
	current := lexer.next()
Packit 63bb0d
	for current != end && current != eof {
Packit 63bb0d
		if current == '\\' && lexer.peek() != eof {
Packit 63bb0d
			lexer.next()
Packit 63bb0d
		}
Packit 63bb0d
		current = lexer.next()
Packit 63bb0d
	}
Packit 63bb0d
	if lexer.lastWidth == 0 {
Packit 63bb0d
		// Then we hit an EOF so we never reached the closing
Packit 63bb0d
		// delimiter.
Packit 63bb0d
		return "", SyntaxError{
Packit 63bb0d
			msg:        "Unclosed delimiter: " + string(end),
Packit 63bb0d
			Expression: lexer.expression,
Packit 63bb0d
			Offset:     len(lexer.expression),
Packit 63bb0d
		}
Packit 63bb0d
	}
Packit 63bb0d
	return lexer.expression[start : lexer.currentPos-lexer.lastWidth], nil
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
func (lexer *Lexer) consumeLiteral() (token, error) {
Packit 63bb0d
	start := lexer.currentPos
Packit 63bb0d
	value, err := lexer.consumeUntil('`')
Packit 63bb0d
	if err != nil {
Packit 63bb0d
		return token{}, err
Packit 63bb0d
	}
Packit 63bb0d
	value = strings.Replace(value, "\\`", "`", -1)
Packit 63bb0d
	return token{
Packit 63bb0d
		tokenType: tJSONLiteral,
Packit 63bb0d
		value:     value,
Packit 63bb0d
		position:  start,
Packit 63bb0d
		length:    len(value),
Packit 63bb0d
	}, nil
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
func (lexer *Lexer) consumeRawStringLiteral() (token, error) {
Packit 63bb0d
	start := lexer.currentPos
Packit 63bb0d
	currentIndex := start
Packit 63bb0d
	current := lexer.next()
Packit 63bb0d
	for current != '\'' && lexer.peek() != eof {
Packit 63bb0d
		if current == '\\' && lexer.peek() == '\'' {
Packit 63bb0d
			chunk := lexer.expression[currentIndex : lexer.currentPos-1]
Packit 63bb0d
			lexer.buf.WriteString(chunk)
Packit 63bb0d
			lexer.buf.WriteString("'")
Packit 63bb0d
			lexer.next()
Packit 63bb0d
			currentIndex = lexer.currentPos
Packit 63bb0d
		}
Packit 63bb0d
		current = lexer.next()
Packit 63bb0d
	}
Packit 63bb0d
	if lexer.lastWidth == 0 {
Packit 63bb0d
		// Then we hit an EOF so we never reached the closing
Packit 63bb0d
		// delimiter.
Packit 63bb0d
		return token{}, SyntaxError{
Packit 63bb0d
			msg:        "Unclosed delimiter: '",
Packit 63bb0d
			Expression: lexer.expression,
Packit 63bb0d
			Offset:     len(lexer.expression),
Packit 63bb0d
		}
Packit 63bb0d
	}
Packit 63bb0d
	if currentIndex < lexer.currentPos {
Packit 63bb0d
		lexer.buf.WriteString(lexer.expression[currentIndex : lexer.currentPos-1])
Packit 63bb0d
	}
Packit 63bb0d
	value := lexer.buf.String()
Packit 63bb0d
	// Reset the buffer so it can reused again.
Packit 63bb0d
	lexer.buf.Reset()
Packit 63bb0d
	return token{
Packit 63bb0d
		tokenType: tStringLiteral,
Packit 63bb0d
		value:     value,
Packit 63bb0d
		position:  start,
Packit 63bb0d
		length:    len(value),
Packit 63bb0d
	}, nil
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
func (lexer *Lexer) syntaxError(msg string) SyntaxError {
Packit 63bb0d
	return SyntaxError{
Packit 63bb0d
		msg:        msg,
Packit 63bb0d
		Expression: lexer.expression,
Packit 63bb0d
		Offset:     lexer.currentPos - 1,
Packit 63bb0d
	}
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
// Checks for a two char token, otherwise matches a single character
Packit 63bb0d
// token. This is used whenever a two char token overlaps a single
Packit 63bb0d
// char token, e.g. "||" -> tPipe, "|" -> tOr.
Packit 63bb0d
func (lexer *Lexer) matchOrElse(first rune, second rune, matchedType tokType, singleCharType tokType) token {
Packit 63bb0d
	start := lexer.currentPos - lexer.lastWidth
Packit 63bb0d
	nextRune := lexer.next()
Packit 63bb0d
	var t token
Packit 63bb0d
	if nextRune == second {
Packit 63bb0d
		t = token{
Packit 63bb0d
			tokenType: matchedType,
Packit 63bb0d
			value:     string(first) + string(second),
Packit 63bb0d
			position:  start,
Packit 63bb0d
			length:    2,
Packit 63bb0d
		}
Packit 63bb0d
	} else {
Packit 63bb0d
		lexer.back()
Packit 63bb0d
		t = token{
Packit 63bb0d
			tokenType: singleCharType,
Packit 63bb0d
			value:     string(first),
Packit 63bb0d
			position:  start,
Packit 63bb0d
			length:    1,
Packit 63bb0d
		}
Packit 63bb0d
	}
Packit 63bb0d
	return t
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
func (lexer *Lexer) consumeLBracket() token {
Packit 63bb0d
	// There's three options here:
Packit 63bb0d
	// 1. A filter expression "[?"
Packit 63bb0d
	// 2. A flatten operator "[]"
Packit 63bb0d
	// 3. A bare rbracket "["
Packit 63bb0d
	start := lexer.currentPos - lexer.lastWidth
Packit 63bb0d
	nextRune := lexer.next()
Packit 63bb0d
	var t token
Packit 63bb0d
	if nextRune == '?' {
Packit 63bb0d
		t = token{
Packit 63bb0d
			tokenType: tFilter,
Packit 63bb0d
			value:     "[?",
Packit 63bb0d
			position:  start,
Packit 63bb0d
			length:    2,
Packit 63bb0d
		}
Packit 63bb0d
	} else if nextRune == ']' {
Packit 63bb0d
		t = token{
Packit 63bb0d
			tokenType: tFlatten,
Packit 63bb0d
			value:     "[]",
Packit 63bb0d
			position:  start,
Packit 63bb0d
			length:    2,
Packit 63bb0d
		}
Packit 63bb0d
	} else {
Packit 63bb0d
		t = token{
Packit 63bb0d
			tokenType: tLbracket,
Packit 63bb0d
			value:     "[",
Packit 63bb0d
			position:  start,
Packit 63bb0d
			length:    1,
Packit 63bb0d
		}
Packit 63bb0d
		lexer.back()
Packit 63bb0d
	}
Packit 63bb0d
	return t
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
func (lexer *Lexer) consumeQuotedIdentifier() (token, error) {
Packit 63bb0d
	start := lexer.currentPos
Packit 63bb0d
	value, err := lexer.consumeUntil('"')
Packit 63bb0d
	if err != nil {
Packit 63bb0d
		return token{}, err
Packit 63bb0d
	}
Packit 63bb0d
	var decoded string
Packit 63bb0d
	asJSON := []byte("\"" + value + "\"")
Packit 63bb0d
	if err := json.Unmarshal([]byte(asJSON), &decoded); err != nil {
Packit 63bb0d
		return token{}, err
Packit 63bb0d
	}
Packit 63bb0d
	return token{
Packit 63bb0d
		tokenType: tQuotedIdentifier,
Packit 63bb0d
		value:     decoded,
Packit 63bb0d
		position:  start - 1,
Packit 63bb0d
		length:    len(decoded),
Packit 63bb0d
	}, nil
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
func (lexer *Lexer) consumeUnquotedIdentifier() token {
Packit 63bb0d
	// Consume runes until we reach the end of an unquoted
Packit 63bb0d
	// identifier.
Packit 63bb0d
	start := lexer.currentPos - lexer.lastWidth
Packit 63bb0d
	for {
Packit 63bb0d
		r := lexer.next()
Packit 63bb0d
		if r < 0 || r > 128 || identifierTrailingBits[uint64(r)/64]&(1<<(uint64(r)%64)) == 0 {
Packit 63bb0d
			lexer.back()
Packit 63bb0d
			break
Packit 63bb0d
		}
Packit 63bb0d
	}
Packit 63bb0d
	value := lexer.expression[start:lexer.currentPos]
Packit 63bb0d
	return token{
Packit 63bb0d
		tokenType: tUnquotedIdentifier,
Packit 63bb0d
		value:     value,
Packit 63bb0d
		position:  start,
Packit 63bb0d
		length:    lexer.currentPos - start,
Packit 63bb0d
	}
Packit 63bb0d
}
Packit 63bb0d
Packit 63bb0d
func (lexer *Lexer) consumeNumber() token {
Packit 63bb0d
	// Consume runes until we reach something that's not a number.
Packit 63bb0d
	start := lexer.currentPos - lexer.lastWidth
Packit 63bb0d
	for {
Packit 63bb0d
		r := lexer.next()
Packit 63bb0d
		if r < '0' || r > '9' {
Packit 63bb0d
			lexer.back()
Packit 63bb0d
			break
Packit 63bb0d
		}
Packit 63bb0d
	}
Packit 63bb0d
	value := lexer.expression[start:lexer.currentPos]
Packit 63bb0d
	return token{
Packit 63bb0d
		tokenType: tNumber,
Packit 63bb0d
		value:     value,
Packit 63bb0d
		position:  start,
Packit 63bb0d
		length:    lexer.currentPos - start,
Packit 63bb0d
	}
Packit 63bb0d
}