resolves git conflicts from #3896 (credit to @belak, in case github doesn't keep original author during squash) Co-Authored-By: Matti Ranta <techknowlogick@gitea.io>
		
			
				
	
	
		
			194 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
			
		
		
	
	
			194 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
// Package shlex provides a simple lexical analysis like Unix shell.
 | 
						|
package shlex
 | 
						|
 | 
						|
import (
 | 
						|
	"bufio"
 | 
						|
	"errors"
 | 
						|
	"io"
 | 
						|
	"strings"
 | 
						|
	"unicode"
 | 
						|
)
 | 
						|
 | 
						|
var (
 | 
						|
	ErrNoClosing = errors.New("No closing quotation")
 | 
						|
	ErrNoEscaped = errors.New("No escaped character")
 | 
						|
)
 | 
						|
 | 
						|
// Tokenizer is the interface that classifies a token according to
 | 
						|
// words, whitespaces, quotations, escapes and escaped quotations.
 | 
						|
type Tokenizer interface {
 | 
						|
	IsWord(rune) bool
 | 
						|
	IsWhitespace(rune) bool
 | 
						|
	IsQuote(rune) bool
 | 
						|
	IsEscape(rune) bool
 | 
						|
	IsEscapedQuote(rune) bool
 | 
						|
}
 | 
						|
 | 
						|
// DefaultTokenizer implements a simple tokenizer like Unix shell.
 | 
						|
type DefaultTokenizer struct{}
 | 
						|
 | 
						|
func (t *DefaultTokenizer) IsWord(r rune) bool {
 | 
						|
	return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r)
 | 
						|
}
 | 
						|
func (t *DefaultTokenizer) IsQuote(r rune) bool {
 | 
						|
	switch r {
 | 
						|
	case '\'', '"':
 | 
						|
		return true
 | 
						|
	default:
 | 
						|
		return false
 | 
						|
	}
 | 
						|
}
 | 
						|
func (t *DefaultTokenizer) IsWhitespace(r rune) bool {
 | 
						|
	return unicode.IsSpace(r)
 | 
						|
}
 | 
						|
func (t *DefaultTokenizer) IsEscape(r rune) bool {
 | 
						|
	return r == '\\'
 | 
						|
}
 | 
						|
func (t *DefaultTokenizer) IsEscapedQuote(r rune) bool {
 | 
						|
	return r == '"'
 | 
						|
}
 | 
						|
 | 
						|
// Lexer represents a lexical analyzer.
 | 
						|
type Lexer struct {
 | 
						|
	reader          *bufio.Reader
 | 
						|
	tokenizer       Tokenizer
 | 
						|
	posix           bool
 | 
						|
	whitespacesplit bool
 | 
						|
}
 | 
						|
 | 
						|
// NewLexer creates a new Lexer reading from io.Reader.  This Lexer
 | 
						|
// has a DefaultTokenizer according to posix and whitespacesplit
 | 
						|
// rules.
 | 
						|
func NewLexer(r io.Reader, posix, whitespacesplit bool) *Lexer {
 | 
						|
	return &Lexer{
 | 
						|
		reader:          bufio.NewReader(r),
 | 
						|
		tokenizer:       &DefaultTokenizer{},
 | 
						|
		posix:           posix,
 | 
						|
		whitespacesplit: whitespacesplit,
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// NewLexerString creates a new Lexer reading from a string.  This
 | 
						|
// Lexer has a DefaultTokenizer according to posix and whitespacesplit
 | 
						|
// rules.
 | 
						|
func NewLexerString(s string, posix, whitespacesplit bool) *Lexer {
 | 
						|
	return NewLexer(strings.NewReader(s), posix, whitespacesplit)
 | 
						|
}
 | 
						|
 | 
						|
// Split splits a string according to posix or non-posix rules.
 | 
						|
func Split(s string, posix bool) ([]string, error) {
 | 
						|
	return NewLexerString(s, posix, true).Split()
 | 
						|
}
 | 
						|
 | 
						|
// SetTokenizer sets a Tokenizer.
 | 
						|
func (l *Lexer) SetTokenizer(t Tokenizer) {
 | 
						|
	l.tokenizer = t
 | 
						|
}
 | 
						|
 | 
						|
func (l *Lexer) Split() ([]string, error) {
 | 
						|
	result := make([]string, 0)
 | 
						|
	for {
 | 
						|
		token, err := l.readToken()
 | 
						|
		if token != "" {
 | 
						|
			result = append(result, token)
 | 
						|
		}
 | 
						|
 | 
						|
		if err == io.EOF {
 | 
						|
			break
 | 
						|
		} else if err != nil {
 | 
						|
			return result, err
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return result, nil
 | 
						|
}
 | 
						|
 | 
						|
func (l *Lexer) readToken() (string, error) {
 | 
						|
	t := l.tokenizer
 | 
						|
	token := ""
 | 
						|
	quoted := false
 | 
						|
	state := ' '
 | 
						|
	escapedstate := ' '
 | 
						|
scanning:
 | 
						|
	for {
 | 
						|
		next, _, err := l.reader.ReadRune()
 | 
						|
		if err != nil {
 | 
						|
			if t.IsQuote(state) {
 | 
						|
				return token, ErrNoClosing
 | 
						|
			} else if t.IsEscape(state) {
 | 
						|
				return token, ErrNoEscaped
 | 
						|
			}
 | 
						|
			return token, err
 | 
						|
		}
 | 
						|
 | 
						|
		switch {
 | 
						|
		case t.IsWhitespace(state):
 | 
						|
			switch {
 | 
						|
			case t.IsWhitespace(next):
 | 
						|
				break scanning
 | 
						|
			case l.posix && t.IsEscape(next):
 | 
						|
				escapedstate = 'a'
 | 
						|
				state = next
 | 
						|
			case t.IsWord(next):
 | 
						|
				token += string(next)
 | 
						|
				state = 'a'
 | 
						|
			case t.IsQuote(next):
 | 
						|
				if !l.posix {
 | 
						|
					token += string(next)
 | 
						|
				}
 | 
						|
				state = next
 | 
						|
			default:
 | 
						|
				token = string(next)
 | 
						|
				if l.whitespacesplit {
 | 
						|
					state = 'a'
 | 
						|
				} else if token != "" || (l.posix && quoted) {
 | 
						|
					break scanning
 | 
						|
				}
 | 
						|
			}
 | 
						|
		case t.IsQuote(state):
 | 
						|
			quoted = true
 | 
						|
			switch {
 | 
						|
			case next == state:
 | 
						|
				if !l.posix {
 | 
						|
					token += string(next)
 | 
						|
					break scanning
 | 
						|
				} else {
 | 
						|
					state = 'a'
 | 
						|
				}
 | 
						|
			case l.posix && t.IsEscape(next) && t.IsEscapedQuote(state):
 | 
						|
				escapedstate = state
 | 
						|
				state = next
 | 
						|
			default:
 | 
						|
				token += string(next)
 | 
						|
			}
 | 
						|
		case t.IsEscape(state):
 | 
						|
			if t.IsQuote(escapedstate) && next != state && next != escapedstate {
 | 
						|
				token += string(state)
 | 
						|
			}
 | 
						|
			token += string(next)
 | 
						|
			state = escapedstate
 | 
						|
		case t.IsWord(state):
 | 
						|
			switch {
 | 
						|
			case t.IsWhitespace(next):
 | 
						|
				if token != "" || (l.posix && quoted) {
 | 
						|
					break scanning
 | 
						|
				}
 | 
						|
			case l.posix && t.IsQuote(next):
 | 
						|
				state = next
 | 
						|
			case l.posix && t.IsEscape(next):
 | 
						|
				escapedstate = 'a'
 | 
						|
				state = next
 | 
						|
			case t.IsWord(next) || t.IsQuote(next):
 | 
						|
				token += string(next)
 | 
						|
			default:
 | 
						|
				if l.whitespacesplit {
 | 
						|
					token += string(next)
 | 
						|
				} else if token != "" {
 | 
						|
					l.reader.UnreadRune()
 | 
						|
					break scanning
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return token, nil
 | 
						|
}
 |