forked from cerc-io/ipld-eth-server
230 lines
4.9 KiB
Go
230 lines
4.9 KiB
Go
// Copyright 2012 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package present
|
|
|
|
import (
|
|
"errors"
|
|
"regexp"
|
|
"strconv"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// This file is stolen from go/src/cmd/godoc/codewalk.go.
|
|
// It's an evaluator for the file address syntax implemented by acme and sam,
|
|
// but using Go-native regular expressions.
|
|
// To keep things reasonably close, this version uses (?m:re) for all user-provided
|
|
// regular expressions. That is the only change to the code from codewalk.go.
|
|
// See http://plan9.bell-labs.com/sys/doc/sam/sam.html Table II
|
|
// for details on the syntax.
|
|
|
|
// addrToByte evaluates the given address starting at offset start in data.
|
|
// It returns the lo and hi byte offset of the matched region within data.
|
|
func addrToByteRange(addr string, start int, data []byte) (lo, hi int, err error) {
|
|
if addr == "" {
|
|
lo, hi = start, len(data)
|
|
return
|
|
}
|
|
var (
|
|
dir byte
|
|
prevc byte
|
|
charOffset bool
|
|
)
|
|
lo = start
|
|
hi = start
|
|
for addr != "" && err == nil {
|
|
c := addr[0]
|
|
switch c {
|
|
default:
|
|
err = errors.New("invalid address syntax near " + string(c))
|
|
case ',':
|
|
if len(addr) == 1 {
|
|
hi = len(data)
|
|
} else {
|
|
_, hi, err = addrToByteRange(addr[1:], hi, data)
|
|
}
|
|
return
|
|
|
|
case '+', '-':
|
|
if prevc == '+' || prevc == '-' {
|
|
lo, hi, err = addrNumber(data, lo, hi, prevc, 1, charOffset)
|
|
}
|
|
dir = c
|
|
|
|
case '$':
|
|
lo = len(data)
|
|
hi = len(data)
|
|
if len(addr) > 1 {
|
|
dir = '+'
|
|
}
|
|
|
|
case '#':
|
|
charOffset = true
|
|
|
|
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
|
var i int
|
|
for i = 1; i < len(addr); i++ {
|
|
if addr[i] < '0' || addr[i] > '9' {
|
|
break
|
|
}
|
|
}
|
|
var n int
|
|
n, err = strconv.Atoi(addr[0:i])
|
|
if err != nil {
|
|
break
|
|
}
|
|
lo, hi, err = addrNumber(data, lo, hi, dir, n, charOffset)
|
|
dir = 0
|
|
charOffset = false
|
|
prevc = c
|
|
addr = addr[i:]
|
|
continue
|
|
|
|
case '/':
|
|
var i, j int
|
|
Regexp:
|
|
for i = 1; i < len(addr); i++ {
|
|
switch addr[i] {
|
|
case '\\':
|
|
i++
|
|
case '/':
|
|
j = i + 1
|
|
break Regexp
|
|
}
|
|
}
|
|
if j == 0 {
|
|
j = i
|
|
}
|
|
pattern := addr[1:i]
|
|
lo, hi, err = addrRegexp(data, lo, hi, dir, pattern)
|
|
prevc = c
|
|
addr = addr[j:]
|
|
continue
|
|
}
|
|
prevc = c
|
|
addr = addr[1:]
|
|
}
|
|
|
|
if err == nil && dir != 0 {
|
|
lo, hi, err = addrNumber(data, lo, hi, dir, 1, charOffset)
|
|
}
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
return lo, hi, nil
|
|
}
|
|
|
|
// addrNumber applies the given dir, n, and charOffset to the address lo, hi.
|
|
// dir is '+' or '-', n is the count, and charOffset is true if the syntax
|
|
// used was #n. Applying +n (or +#n) means to advance n lines
|
|
// (or characters) after hi. Applying -n (or -#n) means to back up n lines
|
|
// (or characters) before lo.
|
|
// The return value is the new lo, hi.
|
|
func addrNumber(data []byte, lo, hi int, dir byte, n int, charOffset bool) (int, int, error) {
|
|
switch dir {
|
|
case 0:
|
|
lo = 0
|
|
hi = 0
|
|
fallthrough
|
|
|
|
case '+':
|
|
if charOffset {
|
|
pos := hi
|
|
for ; n > 0 && pos < len(data); n-- {
|
|
_, size := utf8.DecodeRune(data[pos:])
|
|
pos += size
|
|
}
|
|
if n == 0 {
|
|
return pos, pos, nil
|
|
}
|
|
break
|
|
}
|
|
// find next beginning of line
|
|
if hi > 0 {
|
|
for hi < len(data) && data[hi-1] != '\n' {
|
|
hi++
|
|
}
|
|
}
|
|
lo = hi
|
|
if n == 0 {
|
|
return lo, hi, nil
|
|
}
|
|
for ; hi < len(data); hi++ {
|
|
if data[hi] != '\n' {
|
|
continue
|
|
}
|
|
switch n--; n {
|
|
case 1:
|
|
lo = hi + 1
|
|
case 0:
|
|
return lo, hi + 1, nil
|
|
}
|
|
}
|
|
|
|
case '-':
|
|
if charOffset {
|
|
// Scan backward for bytes that are not UTF-8 continuation bytes.
|
|
pos := lo
|
|
for ; pos > 0 && n > 0; pos-- {
|
|
if data[pos]&0xc0 != 0x80 {
|
|
n--
|
|
}
|
|
}
|
|
if n == 0 {
|
|
return pos, pos, nil
|
|
}
|
|
break
|
|
}
|
|
// find earlier beginning of line
|
|
for lo > 0 && data[lo-1] != '\n' {
|
|
lo--
|
|
}
|
|
hi = lo
|
|
if n == 0 {
|
|
return lo, hi, nil
|
|
}
|
|
for ; lo >= 0; lo-- {
|
|
if lo > 0 && data[lo-1] != '\n' {
|
|
continue
|
|
}
|
|
switch n--; n {
|
|
case 1:
|
|
hi = lo
|
|
case 0:
|
|
return lo, hi, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0, 0, errors.New("address out of range")
|
|
}
|
|
|
|
// addrRegexp searches for pattern in the given direction starting at lo, hi.
|
|
// The direction dir is '+' (search forward from hi) or '-' (search backward from lo).
|
|
// Backward searches are unimplemented.
|
|
func addrRegexp(data []byte, lo, hi int, dir byte, pattern string) (int, int, error) {
|
|
// We want ^ and $ to work as in sam/acme, so use ?m.
|
|
re, err := regexp.Compile("(?m:" + pattern + ")")
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
if dir == '-' {
|
|
// Could implement reverse search using binary search
|
|
// through file, but that seems like overkill.
|
|
return 0, 0, errors.New("reverse search not implemented")
|
|
}
|
|
m := re.FindIndex(data[hi:])
|
|
if len(m) > 0 {
|
|
m[0] += hi
|
|
m[1] += hi
|
|
} else if hi > 0 {
|
|
// No match. Wrap to beginning of data.
|
|
m = re.FindIndex(data)
|
|
}
|
|
if len(m) == 0 {
|
|
return 0, 0, errors.New("no match for " + pattern)
|
|
}
|
|
return m[0], m[1], nil
|
|
}
|