forked from cerc-io/ipld-eth-server
369 lines
9.3 KiB
Go
369 lines
9.3 KiB
Go
|
package json
|
||
|
|
||
|
// License note: the string and numeric parsers here borrow
|
||
|
// heavily from the golang stdlib json parser scanner.
|
||
|
// That code is originally Copyright 2010 The Go Authors,
|
||
|
// and is governed by a BSD-style license.
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"strconv"
|
||
|
"unicode"
|
||
|
"unicode/utf16"
|
||
|
"unicode/utf8"
|
||
|
|
||
|
"github.com/polydawn/refmt/tok"
|
||
|
)
|
||
|
|
||
|
func (d *Decoder) decodeString() (string, error) {
|
||
|
// First quote has already been eaten.
|
||
|
// Start tracking the byte slice; real string starts here.
|
||
|
d.r.Track()
|
||
|
// Scan until scanner tells us end of string.
|
||
|
for step := strscan_normal; step != nil; {
|
||
|
majorByte, err := d.r.Readn1()
|
||
|
if err != nil {
|
||
|
return "", err
|
||
|
}
|
||
|
step, err = step(majorByte)
|
||
|
if err != nil {
|
||
|
return "", err
|
||
|
}
|
||
|
}
|
||
|
// Unread one. The scan loop consumed the trailing quote already,
|
||
|
// which we don't want to pass onto the parser.
|
||
|
d.r.Unreadn1()
|
||
|
// Parse!
|
||
|
s, ok := parseString(d.r.StopTrack())
|
||
|
if !ok {
|
||
|
//return string(s), fmt.Errorf("string parse misc fail")
|
||
|
}
|
||
|
// Swallow the trailing quote again.
|
||
|
d.r.Readn1()
|
||
|
return string(s), nil
|
||
|
}
|
||
|
|
||
|
// Scan steps are looped over the stream to find how long the string is.
|
||
|
// A nil step func is returned to indicate the string is done.
|
||
|
// Actually parsing the string is done by 'parseString()'.
|
||
|
type strscanStep func(c byte) (strscanStep, error)
|
||
|
|
||
|
// The default string scanning step state. Starts here.
|
||
|
func strscan_normal(c byte) (strscanStep, error) {
|
||
|
if c == '"' { // done!
|
||
|
return nil, nil
|
||
|
}
|
||
|
if c == '\\' {
|
||
|
return strscan_esc, nil
|
||
|
}
|
||
|
if c < 0x20 { // Unprintable bytes are invalid in a json string.
|
||
|
return nil, fmt.Errorf("invalid unprintable byte in string literal: 0x%x", c)
|
||
|
}
|
||
|
return strscan_normal, nil
|
||
|
}
|
||
|
|
||
|
// "esc" is the state after reading `"\` during a quoted string.
|
||
|
func strscan_esc(c byte) (strscanStep, error) {
|
||
|
switch c {
|
||
|
case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
|
||
|
return strscan_normal, nil
|
||
|
case 'u':
|
||
|
return strscan_escU, nil
|
||
|
}
|
||
|
return nil, fmt.Errorf("invalid byte in string escape sequence: 0x%x", c)
|
||
|
}
|
||
|
|
||
|
// "escU" is the state after reading `"\u` during a quoted string.
|
||
|
func strscan_escU(c byte) (strscanStep, error) {
|
||
|
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
|
||
|
return strscan_escU1, nil
|
||
|
}
|
||
|
return nil, fmt.Errorf("invalid byte in \\u hexadecimal character escape: 0x%x", c)
|
||
|
}
|
||
|
|
||
|
// "escU1" is the state after reading `"\u1` during a quoted string.
|
||
|
func strscan_escU1(c byte) (strscanStep, error) {
|
||
|
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
|
||
|
return strscan_escU12, nil
|
||
|
}
|
||
|
return nil, fmt.Errorf("invalid byte in \\u hexadecimal character escape: 0x%x", c)
|
||
|
}
|
||
|
|
||
|
// "escU12" is the state after reading `"\u12` during a quoted string.
|
||
|
func strscan_escU12(c byte) (strscanStep, error) {
|
||
|
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
|
||
|
return strscan_escU123, nil
|
||
|
}
|
||
|
return nil, fmt.Errorf("invalid byte in \\u hexadecimal character escape: 0x%x", c)
|
||
|
}
|
||
|
|
||
|
// "escU123" is the state after reading `"\u123` during a quoted string.
|
||
|
func strscan_escU123(c byte) (strscanStep, error) {
|
||
|
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
|
||
|
return strscan_normal, nil
|
||
|
}
|
||
|
return nil, fmt.Errorf("invalid byte in \\u hexadecimal character escape: 0x%x", c)
|
||
|
}
|
||
|
|
||
|
// Convert a json string byte sequence that is a complete string (quotes from
|
||
|
// the outside dropped) bytes ready to be flipped into a go string.
|
||
|
func parseString(s []byte) (t []byte, ok bool) {
|
||
|
// Check for unusual characters. If there are none,
|
||
|
// then no unquoting is needed, so return a slice of the
|
||
|
// original bytes.
|
||
|
r := 0
|
||
|
for r < len(s) {
|
||
|
c := s[r]
|
||
|
if c == '\\' || c == '"' || c < ' ' {
|
||
|
break
|
||
|
}
|
||
|
if c < utf8.RuneSelf {
|
||
|
r++
|
||
|
continue
|
||
|
}
|
||
|
rr, size := utf8.DecodeRune(s[r:])
|
||
|
if rr == utf8.RuneError && size == 1 {
|
||
|
break
|
||
|
}
|
||
|
r += size
|
||
|
}
|
||
|
if r == len(s) {
|
||
|
return s, true
|
||
|
}
|
||
|
|
||
|
b := make([]byte, len(s)+2*utf8.UTFMax)
|
||
|
w := copy(b, s[0:r])
|
||
|
for r < len(s) {
|
||
|
// Out of room? Can only happen if s is full of
|
||
|
// malformed UTF-8 and we're replacing each
|
||
|
// byte with RuneError.
|
||
|
if w >= len(b)-2*utf8.UTFMax {
|
||
|
nb := make([]byte, (len(b)+utf8.UTFMax)*2)
|
||
|
copy(nb, b[0:w])
|
||
|
b = nb
|
||
|
}
|
||
|
switch c := s[r]; {
|
||
|
case c == '\\':
|
||
|
r++
|
||
|
if r >= len(s) {
|
||
|
return
|
||
|
}
|
||
|
switch s[r] {
|
||
|
default:
|
||
|
return
|
||
|
case '"', '\\', '/', '\'':
|
||
|
b[w] = s[r]
|
||
|
r++
|
||
|
w++
|
||
|
case 'b':
|
||
|
b[w] = '\b'
|
||
|
r++
|
||
|
w++
|
||
|
case 'f':
|
||
|
b[w] = '\f'
|
||
|
r++
|
||
|
w++
|
||
|
case 'n':
|
||
|
b[w] = '\n'
|
||
|
r++
|
||
|
w++
|
||
|
case 'r':
|
||
|
b[w] = '\r'
|
||
|
r++
|
||
|
w++
|
||
|
case 't':
|
||
|
b[w] = '\t'
|
||
|
r++
|
||
|
w++
|
||
|
case 'u':
|
||
|
r--
|
||
|
rr := getu4(s[r:])
|
||
|
if rr < 0 {
|
||
|
return
|
||
|
}
|
||
|
r += 6
|
||
|
if utf16.IsSurrogate(rr) {
|
||
|
rr1 := getu4(s[r:])
|
||
|
if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
|
||
|
// A valid pair; consume.
|
||
|
r += 6
|
||
|
w += utf8.EncodeRune(b[w:], dec)
|
||
|
break
|
||
|
}
|
||
|
// Invalid surrogate; fall back to replacement rune.
|
||
|
rr = unicode.ReplacementChar
|
||
|
}
|
||
|
w += utf8.EncodeRune(b[w:], rr)
|
||
|
}
|
||
|
|
||
|
// Quote, control characters are invalid.
|
||
|
case c == '"', c < ' ':
|
||
|
return
|
||
|
|
||
|
// ASCII
|
||
|
case c < utf8.RuneSelf:
|
||
|
b[w] = c
|
||
|
r++
|
||
|
w++
|
||
|
|
||
|
// Coerce to well-formed UTF-8.
|
||
|
default:
|
||
|
rr, size := utf8.DecodeRune(s[r:])
|
||
|
r += size
|
||
|
w += utf8.EncodeRune(b[w:], rr)
|
||
|
}
|
||
|
}
|
||
|
return b[0:w], true
|
||
|
}
|
||
|
|
||
|
// getu4 decodes \uXXXX from the beginning of s, returning the hex value,
|
||
|
// or it returns -1.
|
||
|
func getu4(s []byte) rune {
|
||
|
if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
|
||
|
return -1
|
||
|
}
|
||
|
r, err := strconv.ParseUint(string(s[2:6]), 16, 64)
|
||
|
if err != nil {
|
||
|
return -1
|
||
|
}
|
||
|
return rune(r)
|
||
|
}
|
||
|
|
||
|
// Returns *either* an int or a float -- json is ambigous.
|
||
|
// An int is preferred if possible.
|
||
|
func (d *Decoder) decodeNumber(majorByte byte) (tok.TokenType, int64, float64, error) {
|
||
|
// First byte has already been eaten.
|
||
|
// Easiest to unread1, so we can use track, then swallow it again.
|
||
|
d.r.Unreadn1()
|
||
|
d.r.Track()
|
||
|
d.r.Readn1()
|
||
|
// Scan until scanner tells us end of numeric.
|
||
|
// Pick the first scanner stepfunc based on the leading byte.
|
||
|
var step numscanStep
|
||
|
switch majorByte {
|
||
|
case '-':
|
||
|
step = numscan_neg
|
||
|
case '0':
|
||
|
step = numscan_0
|
||
|
case '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||
|
step = numscan_1
|
||
|
default:
|
||
|
panic("unreachable")
|
||
|
}
|
||
|
for {
|
||
|
b, err := d.r.Readn1()
|
||
|
if err == io.EOF {
|
||
|
break
|
||
|
}
|
||
|
if err != nil {
|
||
|
return 0, 0, 0, err
|
||
|
}
|
||
|
step, err = step(b)
|
||
|
if step == nil {
|
||
|
// Unread one. The scan loop consumed one char beyond the end
|
||
|
// (this is necessary in json!),
|
||
|
// which the next part of the decoder will need elsewhere.
|
||
|
d.r.Unreadn1()
|
||
|
break
|
||
|
}
|
||
|
if err != nil {
|
||
|
return 0, 0, 0, err
|
||
|
}
|
||
|
}
|
||
|
// Parse!
|
||
|
// *This is not a fast parse*.
|
||
|
// Try int first; if it fails try float; if that fails return the float error.
|
||
|
s := string(d.r.StopTrack())
|
||
|
if i, err := strconv.ParseInt(s, 10, 64); err == nil {
|
||
|
return tok.TInt, i, 0, nil
|
||
|
}
|
||
|
f, err := strconv.ParseFloat(s, 64)
|
||
|
return tok.TFloat64, 0, f, err
|
||
|
}
|
||
|
|
||
|
// Scan steps are looped over the stream to find how long the number is.
|
||
|
// A nil step func is returned to indicate the string is done.
|
||
|
// Actually parsing the string is done by 'parseString()'.
|
||
|
type numscanStep func(c byte) (numscanStep, error)
|
||
|
|
||
|
// numscan_neg is the state after reading `-` during a number.
|
||
|
func numscan_neg(c byte) (numscanStep, error) {
|
||
|
if c == '0' {
|
||
|
return numscan_0, nil
|
||
|
}
|
||
|
if '1' <= c && c <= '9' {
|
||
|
return numscan_1, nil
|
||
|
}
|
||
|
return nil, fmt.Errorf("invalid byte in numeric literal: 0x%x", c)
|
||
|
}
|
||
|
|
||
|
// numscan_1 is the state after reading a non-zero integer during a number,
|
||
|
// such as after reading `1` or `100` but not `0`.
|
||
|
func numscan_1(c byte) (numscanStep, error) {
|
||
|
if '0' <= c && c <= '9' {
|
||
|
return numscan_1, nil
|
||
|
}
|
||
|
return numscan_0(c)
|
||
|
}
|
||
|
|
||
|
// numscan_0 is the state after reading `0` during a number.
|
||
|
func numscan_0(c byte) (numscanStep, error) {
|
||
|
if c == '.' {
|
||
|
return numscan_dot, nil
|
||
|
}
|
||
|
if c == 'e' || c == 'E' {
|
||
|
return numscan_e, nil
|
||
|
}
|
||
|
return nil, nil
|
||
|
}
|
||
|
|
||
|
// numscan_dot is the state after reading the integer and decimal point in a number,
|
||
|
// such as after reading `1.`.
|
||
|
func numscan_dot(c byte) (numscanStep, error) {
|
||
|
if '0' <= c && c <= '9' {
|
||
|
return numscan_dot0, nil
|
||
|
}
|
||
|
return nil, fmt.Errorf("invalid byte after decimal in numeric literal: 0x%x", c)
|
||
|
}
|
||
|
|
||
|
// numscan_dot0 is the state after reading the integer, decimal point, and subsequent
|
||
|
// digits of a number, such as after reading `3.14`.
|
||
|
func numscan_dot0(c byte) (numscanStep, error) {
|
||
|
if '0' <= c && c <= '9' {
|
||
|
return numscan_dot0, nil
|
||
|
}
|
||
|
if c == 'e' || c == 'E' {
|
||
|
return numscan_e, nil
|
||
|
}
|
||
|
return nil, nil
|
||
|
}
|
||
|
|
||
|
// numscan_e is the state after reading the mantissa and e in a number,
|
||
|
// such as after reading `314e` or `0.314e`.
|
||
|
func numscan_e(c byte) (numscanStep, error) {
|
||
|
if c == '+' || c == '-' {
|
||
|
return numscan_eSign, nil
|
||
|
}
|
||
|
return numscan_eSign(c)
|
||
|
}
|
||
|
|
||
|
// numscan_eSign is the state after reading the mantissa, e, and sign in a number,
|
||
|
// such as after reading `314e-` or `0.314e+`.
|
||
|
func numscan_eSign(c byte) (numscanStep, error) {
|
||
|
if '0' <= c && c <= '9' {
|
||
|
return numscan_e0, nil
|
||
|
}
|
||
|
return nil, fmt.Errorf("invalid byte in exponent of numeric literal: 0x%x", c)
|
||
|
}
|
||
|
|
||
|
// numscan_e0 is the state after reading the mantissa, e, optional sign,
|
||
|
// and at least one digit of the exponent in a number,
|
||
|
// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
|
||
|
func numscan_e0(c byte) (numscanStep, error) {
|
||
|
if '0' <= c && c <= '9' {
|
||
|
return numscan_e0, nil
|
||
|
}
|
||
|
return nil, nil
|
||
|
}
|