289b30715d
This commit converts the dependency management from Godeps to the vendor folder, also switching the tool from godep to trash. Since the upstream tool lacks a few features proposed via a few PRs, until those PRs are merged in (if), use github.com/karalabe/trash. You can update dependencies via trash --update. All dependencies have been updated to their latest version. Parts of the build system are reworked to drop old notions of Godeps and invocation of the go vet command so that it doesn't run against the vendor folder, as that will just blow up during vetting. The conversion drops OpenCL (and hence GPU mining support) from ethash and our codebase. The short reasoning is that there's noone to maintain and having opencl libs in our deps messes up builds as go install ./... tries to build them, failing with unsatisfied link errors for the C OpenCL deps. golang.org/x/net/context is not vendored in. We expect it to be fetched by the user (i.e. using go get). To keep ci.go builds reproducible the package is "vendored" in build/_vendor.
359 lines
7.4 KiB
Go
359 lines
7.4 KiB
Go
package parser
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"strconv"
|
|
)
|
|
|
|
type _RegExp_parser struct {
|
|
str string
|
|
length int
|
|
|
|
chr rune // The current character
|
|
chrOffset int // The offset of current character
|
|
offset int // The offset after current character (may be greater than 1)
|
|
|
|
errors []error
|
|
invalid bool // The input is an invalid JavaScript RegExp
|
|
|
|
goRegexp *bytes.Buffer
|
|
}
|
|
|
|
// TransformRegExp transforms a JavaScript pattern into a Go "regexp" pattern.
|
|
//
|
|
// re2 (Go) cannot do backtracking, so the presence of a lookahead (?=) (?!) or
|
|
// backreference (\1, \2, ...) will cause an error.
|
|
//
|
|
// re2 (Go) has a different definition for \s: [\t\n\f\r ].
|
|
// The JavaScript definition, on the other hand, also includes \v, Unicode "Separator, Space", etc.
|
|
//
|
|
// If the pattern is invalid (not valid even in JavaScript), then this function
|
|
// returns the empty string and an error.
|
|
//
|
|
// If the pattern is valid, but incompatible (contains a lookahead or backreference),
|
|
// then this function returns the transformation (a non-empty string) AND an error.
|
|
func TransformRegExp(pattern string) (string, error) {
|
|
|
|
if pattern == "" {
|
|
return "", nil
|
|
}
|
|
|
|
// TODO If without \, if without (?=, (?!, then another shortcut
|
|
|
|
parser := _RegExp_parser{
|
|
str: pattern,
|
|
length: len(pattern),
|
|
goRegexp: bytes.NewBuffer(make([]byte, 0, 3*len(pattern)/2)),
|
|
}
|
|
parser.read() // Pull in the first character
|
|
parser.scan()
|
|
var err error
|
|
if len(parser.errors) > 0 {
|
|
err = parser.errors[0]
|
|
}
|
|
if parser.invalid {
|
|
return "", err
|
|
}
|
|
|
|
// Might not be re2 compatible, but is still a valid JavaScript RegExp
|
|
return parser.goRegexp.String(), err
|
|
}
|
|
|
|
func (self *_RegExp_parser) scan() {
|
|
for self.chr != -1 {
|
|
switch self.chr {
|
|
case '\\':
|
|
self.read()
|
|
self.scanEscape(false)
|
|
case '(':
|
|
self.pass()
|
|
self.scanGroup()
|
|
case '[':
|
|
self.pass()
|
|
self.scanBracket()
|
|
case ')':
|
|
self.error(-1, "Unmatched ')'")
|
|
self.invalid = true
|
|
self.pass()
|
|
default:
|
|
self.pass()
|
|
}
|
|
}
|
|
}
|
|
|
|
// (...)
|
|
func (self *_RegExp_parser) scanGroup() {
|
|
str := self.str[self.chrOffset:]
|
|
if len(str) > 1 { // A possibility of (?= or (?!
|
|
if str[0] == '?' {
|
|
if str[1] == '=' || str[1] == '!' {
|
|
self.error(-1, "re2: Invalid (%s) <lookahead>", self.str[self.chrOffset:self.chrOffset+2])
|
|
}
|
|
}
|
|
}
|
|
for self.chr != -1 && self.chr != ')' {
|
|
switch self.chr {
|
|
case '\\':
|
|
self.read()
|
|
self.scanEscape(false)
|
|
case '(':
|
|
self.pass()
|
|
self.scanGroup()
|
|
case '[':
|
|
self.pass()
|
|
self.scanBracket()
|
|
default:
|
|
self.pass()
|
|
continue
|
|
}
|
|
}
|
|
if self.chr != ')' {
|
|
self.error(-1, "Unterminated group")
|
|
self.invalid = true
|
|
return
|
|
}
|
|
self.pass()
|
|
}
|
|
|
|
// [...]
|
|
func (self *_RegExp_parser) scanBracket() {
|
|
for self.chr != -1 {
|
|
if self.chr == ']' {
|
|
break
|
|
} else if self.chr == '\\' {
|
|
self.read()
|
|
self.scanEscape(true)
|
|
continue
|
|
}
|
|
self.pass()
|
|
}
|
|
if self.chr != ']' {
|
|
self.error(-1, "Unterminated character class")
|
|
self.invalid = true
|
|
return
|
|
}
|
|
self.pass()
|
|
}
|
|
|
|
// \...
|
|
func (self *_RegExp_parser) scanEscape(inClass bool) {
|
|
offset := self.chrOffset
|
|
|
|
var length, base uint32
|
|
switch self.chr {
|
|
|
|
case '0', '1', '2', '3', '4', '5', '6', '7':
|
|
var value int64
|
|
size := 0
|
|
for {
|
|
digit := int64(digitValue(self.chr))
|
|
if digit >= 8 {
|
|
// Not a valid digit
|
|
break
|
|
}
|
|
value = value*8 + digit
|
|
self.read()
|
|
size += 1
|
|
}
|
|
if size == 1 { // The number of characters read
|
|
_, err := self.goRegexp.Write([]byte{'\\', byte(value) + '0'})
|
|
if err != nil {
|
|
self.errors = append(self.errors, err)
|
|
}
|
|
if value != 0 {
|
|
// An invalid backreference
|
|
self.error(-1, "re2: Invalid \\%d <backreference>", value)
|
|
}
|
|
return
|
|
}
|
|
tmp := []byte{'\\', 'x', '0', 0}
|
|
if value >= 16 {
|
|
tmp = tmp[0:2]
|
|
} else {
|
|
tmp = tmp[0:3]
|
|
}
|
|
tmp = strconv.AppendInt(tmp, value, 16)
|
|
_, err := self.goRegexp.Write(tmp)
|
|
if err != nil {
|
|
self.errors = append(self.errors, err)
|
|
}
|
|
return
|
|
|
|
case '8', '9':
|
|
size := 0
|
|
for {
|
|
digit := digitValue(self.chr)
|
|
if digit >= 10 {
|
|
// Not a valid digit
|
|
break
|
|
}
|
|
self.read()
|
|
size += 1
|
|
}
|
|
err := self.goRegexp.WriteByte('\\')
|
|
if err != nil {
|
|
self.errors = append(self.errors, err)
|
|
}
|
|
_, err = self.goRegexp.WriteString(self.str[offset:self.chrOffset])
|
|
if err != nil {
|
|
self.errors = append(self.errors, err)
|
|
}
|
|
self.error(-1, "re2: Invalid \\%s <backreference>", self.str[offset:self.chrOffset])
|
|
return
|
|
|
|
case 'x':
|
|
self.read()
|
|
length, base = 2, 16
|
|
|
|
case 'u':
|
|
self.read()
|
|
length, base = 4, 16
|
|
|
|
case 'b':
|
|
if inClass {
|
|
_, err := self.goRegexp.Write([]byte{'\\', 'x', '0', '8'})
|
|
if err != nil {
|
|
self.errors = append(self.errors, err)
|
|
}
|
|
self.read()
|
|
return
|
|
}
|
|
fallthrough
|
|
|
|
case 'B':
|
|
fallthrough
|
|
|
|
case 'd', 'D', 's', 'S', 'w', 'W':
|
|
// This is slightly broken, because ECMAScript
|
|
// includes \v in \s, \S, while re2 does not
|
|
fallthrough
|
|
|
|
case '\\':
|
|
fallthrough
|
|
|
|
case 'f', 'n', 'r', 't', 'v':
|
|
err := self.goRegexp.WriteByte('\\')
|
|
if err != nil {
|
|
self.errors = append(self.errors, err)
|
|
}
|
|
self.pass()
|
|
return
|
|
|
|
case 'c':
|
|
self.read()
|
|
var value int64
|
|
if 'a' <= self.chr && self.chr <= 'z' {
|
|
value = int64(self.chr) - 'a' + 1
|
|
} else if 'A' <= self.chr && self.chr <= 'Z' {
|
|
value = int64(self.chr) - 'A' + 1
|
|
} else {
|
|
err := self.goRegexp.WriteByte('c')
|
|
if err != nil {
|
|
self.errors = append(self.errors, err)
|
|
}
|
|
return
|
|
}
|
|
tmp := []byte{'\\', 'x', '0', 0}
|
|
if value >= 16 {
|
|
tmp = tmp[0:2]
|
|
} else {
|
|
tmp = tmp[0:3]
|
|
}
|
|
tmp = strconv.AppendInt(tmp, value, 16)
|
|
_, err := self.goRegexp.Write(tmp)
|
|
if err != nil {
|
|
self.errors = append(self.errors, err)
|
|
}
|
|
self.read()
|
|
return
|
|
|
|
default:
|
|
// $ is an identifier character, so we have to have
|
|
// a special case for it here
|
|
if self.chr == '$' || !isIdentifierPart(self.chr) {
|
|
// A non-identifier character needs escaping
|
|
err := self.goRegexp.WriteByte('\\')
|
|
if err != nil {
|
|
self.errors = append(self.errors, err)
|
|
}
|
|
} else {
|
|
// Unescape the character for re2
|
|
}
|
|
self.pass()
|
|
return
|
|
}
|
|
|
|
// Otherwise, we're a \u.... or \x...
|
|
valueOffset := self.chrOffset
|
|
|
|
var value uint32
|
|
{
|
|
length := length
|
|
for ; length > 0; length-- {
|
|
digit := uint32(digitValue(self.chr))
|
|
if digit >= base {
|
|
// Not a valid digit
|
|
goto skip
|
|
}
|
|
value = value*base + digit
|
|
self.read()
|
|
}
|
|
}
|
|
|
|
if length == 4 {
|
|
_, err := self.goRegexp.Write([]byte{
|
|
'\\',
|
|
'x',
|
|
'{',
|
|
self.str[valueOffset+0],
|
|
self.str[valueOffset+1],
|
|
self.str[valueOffset+2],
|
|
self.str[valueOffset+3],
|
|
'}',
|
|
})
|
|
if err != nil {
|
|
self.errors = append(self.errors, err)
|
|
}
|
|
} else if length == 2 {
|
|
_, err := self.goRegexp.Write([]byte{
|
|
'\\',
|
|
'x',
|
|
self.str[valueOffset+0],
|
|
self.str[valueOffset+1],
|
|
})
|
|
if err != nil {
|
|
self.errors = append(self.errors, err)
|
|
}
|
|
} else {
|
|
// Should never, ever get here...
|
|
self.error(-1, "re2: Illegal branch in scanEscape")
|
|
goto skip
|
|
}
|
|
|
|
return
|
|
|
|
skip:
|
|
_, err := self.goRegexp.WriteString(self.str[offset:self.chrOffset])
|
|
if err != nil {
|
|
self.errors = append(self.errors, err)
|
|
}
|
|
}
|
|
|
|
func (self *_RegExp_parser) pass() {
|
|
if self.chr != -1 {
|
|
_, err := self.goRegexp.WriteRune(self.chr)
|
|
if err != nil {
|
|
self.errors = append(self.errors, err)
|
|
}
|
|
}
|
|
self.read()
|
|
}
|
|
|
|
// TODO Better error reporting, use the offset, etc.
|
|
func (self *_RegExp_parser) error(offset int, msg string, msgValues ...interface{}) error {
|
|
err := fmt.Errorf(msg, msgValues...)
|
|
self.errors = append(self.errors, err)
|
|
return err
|
|
}
|