Add antlr-based grammar and test it.

This commit is contained in:
Christian Parpart 2020-03-11 13:03:57 +01:00 committed by chriseth
parent 27b26a69db
commit 0e66e07e5c
3 changed files with 596 additions and 0 deletions

View File

@ -307,6 +307,18 @@ jobs:
name: Linting Python Scripts
command: ./scripts/pylint_all.py
chk_antlr_grammar:
docker:
- image: buildpack-deps:eoan
steps:
- checkout
- run:
name: Install Java
command: apt -q update && apt install -y openjdk-14-jdk
- run:
name: Run tests
command: ./scripts/test_antlr_grammar.sh
chk_buglist:
docker:
- image: circleci/node
@ -773,6 +785,7 @@ workflows:
- chk_buglist: *workflow_trigger_on_tags
- chk_proofs: *workflow_trigger_on_tags
- chk_pylint: *workflow_trigger_on_tags
- chk_antlr_grammar: *workflow_trigger_on_tags
# build-only
- b_docs: *workflow_trigger_on_tags

482
docs/Solidity.g4 Normal file
View File

@ -0,0 +1,482 @@
// Copyright 2020 Gonçalo Sá <goncalo.sa@consensys.net>
// Copyright 2016-2019 Federico Bond <federicobond@gmail.com>
// Licensed under the MIT license. See LICENSE file in the project root for details.
// This grammar is much less strict than what Solidity currently parses
// to allow this to pass with older versions of Solidity.
grammar Solidity;
sourceUnit
: (pragmaDirective | importDirective | structDefinition | enumDefinition | contractDefinition)* EOF ;
pragmaDirective
: 'pragma' pragmaName pragmaValue ';' ;
pragmaName
: identifier ;
pragmaValue
: version | expression ;
version
: versionConstraint versionConstraint? ;
versionConstraint
: versionOperator? VersionLiteral ;
versionOperator
: '^' | '~' | '>=' | '>' | '<' | '<=' | '=' ;
importDirective
: 'import' StringLiteralFragment ('as' identifier)? ';'
| 'import' ('*' | identifier) ('as' identifier)? 'from' StringLiteralFragment ';'
| 'import' '{' importDeclaration ( ',' importDeclaration )* '}' 'from' StringLiteralFragment ';' ;
importDeclaration
: identifier ('as' identifier)? ;
contractDefinition
: 'abstract'? ( 'contract' | 'interface' | 'library' ) identifier
( 'is' inheritanceSpecifier (',' inheritanceSpecifier )* )?
'{' contractPart* '}' ;
inheritanceSpecifier
: userDefinedTypeName ( '(' expressionList? ')' )? ;
contractPart
: stateVariableDeclaration
| usingForDeclaration
| structDefinition
| modifierDefinition
| functionDefinition
| eventDefinition
| enumDefinition ;
stateVariableDeclaration
: typeName
( PublicKeyword | InternalKeyword | PrivateKeyword | ConstantKeyword | ImmutableKeyword | overrideSpecifier )*
identifier ('=' expression)? ';' ;
overrideSpecifier : 'override' ( '(' userDefinedTypeName (',' userDefinedTypeName)* ')' )? ;
usingForDeclaration
: 'using' identifier 'for' ('*' | typeName) ';' ;
structDefinition
: 'struct' identifier
'{' ( variableDeclaration ';' (variableDeclaration ';')* )? '}' ;
modifierDefinition
: 'modifier' identifier parameterList? ( VirtualKeyword | overrideSpecifier )* block ;
functionDefinition
: functionDescriptor parameterList modifierList returnParameters? ( ';' | block ) ;
functionDescriptor
: 'function' ( identifier | ReceiveKeyword | FallbackKeyword )?
| ConstructorKeyword
| FallbackKeyword
| ReceiveKeyword ;
returnParameters
: 'returns' parameterList ;
modifierList
: ( modifierInvocation | stateMutability | ExternalKeyword
| PublicKeyword | InternalKeyword | PrivateKeyword | VirtualKeyword | overrideSpecifier )* ;
modifierInvocation
: identifier ( '(' expressionList? ')' )? ;
eventDefinition
: 'event' identifier eventParameterList AnonymousKeyword? ';' ;
enumDefinition
: 'enum' identifier '{' enumValue? (',' enumValue)* '}' ;
enumValue
: identifier ;
parameterList
: '(' ( parameter (',' parameter)* )? ')' ;
parameter
: typeName storageLocation? identifier? ;
eventParameterList
: '(' ( eventParameter (',' eventParameter)* )? ')' ;
eventParameter
: typeName IndexedKeyword? identifier? ;
variableDeclaration
: typeName storageLocation? identifier ;
typeName
: elementaryTypeName
| userDefinedTypeName
| mapping
| typeName '[' expression? ']'
| functionTypeName ;
userDefinedTypeName
: identifier ( '.' identifier )* ;
mapping
: 'mapping' '(' (elementaryTypeName | userDefinedTypeName) '=>' typeName ')' ;
functionTypeName
: 'function' parameterList modifierList returnParameters? ;
storageLocation
: 'memory' | 'storage' | 'calldata';
stateMutability
: PureKeyword | ConstantKeyword | ViewKeyword | PayableKeyword ;
block
: '{' statement* '}' ;
statement
: ifStatement
| tryStatement
| whileStatement
| forStatement
| block
| inlineAssemblyStatement
| doWhileStatement
| continueStatement
| breakStatement
| returnStatement
| throwStatement
| emitStatement
| simpleStatement ;
expressionStatement
: expression ';' ;
ifStatement
: 'if' '(' expression ')' statement ( 'else' statement )? ;
tryStatement : 'try' expression returnParameters? block catchClause+ ;
// In reality catch clauses still are not processed as below
// the identifier can only be a set string: "Error". But plans
// of the Solidity team include possible expansion so we'll
// leave this as is, befitting with the Solidity docs.
catchClause : 'catch' ( identifier? parameterList )? block ;
whileStatement
: 'while' '(' expression ')' statement ;
forStatement
: 'for' '(' ( simpleStatement | ';' ) ( expressionStatement | ';' ) expression? ')' statement ;
simpleStatement
: ( variableDeclarationStatement | expressionStatement ) ;
inlineAssemblyStatement
: 'assembly' StringLiteralFragment? assemblyBlock ;
doWhileStatement
: 'do' statement 'while' '(' expression ')' ';' ;
continueStatement
: 'continue' ';' ;
breakStatement
: 'break' ';' ;
returnStatement
: 'return' expression? ';' ;
// throw is no longer supported by latest Solidity.
throwStatement
: 'throw' ';' ;
emitStatement
: 'emit' functionCall ';' ;
// 'var' is no longer supported by latest Solidity.
variableDeclarationStatement
: ( 'var' identifierList | variableDeclaration | '(' variableDeclarationList ')' ) ( '=' expression )? ';';
variableDeclarationList
: variableDeclaration? (',' variableDeclaration? )* ;
identifierList
: '(' ( identifier? ',' )* identifier? ')' ;
elementaryTypeName
: 'address' PayableKeyword? | 'bool' | 'string' | 'var' | Int | Uint | 'byte' | Byte | Fixed | Ufixed ;
Int
: 'int' | 'int8' | 'int16' | 'int24' | 'int32' | 'int40' | 'int48' | 'int56' | 'int64' | 'int72' | 'int80' | 'int88' | 'int96' | 'int104' | 'int112' | 'int120' | 'int128' | 'int136' | 'int144' | 'int152' | 'int160' | 'int168' | 'int176' | 'int184' | 'int192' | 'int200' | 'int208' | 'int216' | 'int224' | 'int232' | 'int240' | 'int248' | 'int256' ;
Uint
: 'uint' | 'uint8' | 'uint16' | 'uint24' | 'uint32' | 'uint40' | 'uint48' | 'uint56' | 'uint64' | 'uint72' | 'uint80' | 'uint88' | 'uint96' | 'uint104' | 'uint112' | 'uint120' | 'uint128' | 'uint136' | 'uint144' | 'uint152' | 'uint160' | 'uint168' | 'uint176' | 'uint184' | 'uint192' | 'uint200' | 'uint208' | 'uint216' | 'uint224' | 'uint232' | 'uint240' | 'uint248' | 'uint256' ;
Byte
: 'bytes' | 'bytes1' | 'bytes2' | 'bytes3' | 'bytes4' | 'bytes5' | 'bytes6' | 'bytes7' | 'bytes8' | 'bytes9' | 'bytes10' | 'bytes11' | 'bytes12' | 'bytes13' | 'bytes14' | 'bytes15' | 'bytes16' | 'bytes17' | 'bytes18' | 'bytes19' | 'bytes20' | 'bytes21' | 'bytes22' | 'bytes23' | 'bytes24' | 'bytes25' | 'bytes26' | 'bytes27' | 'bytes28' | 'bytes29' | 'bytes30' | 'bytes31' | 'bytes32' ;
Fixed
: 'fixed' | ( 'fixed' [0-9]+ 'x' [0-9]+ ) ;
Ufixed
: 'ufixed' | ( 'ufixed' [0-9]+ 'x' [0-9]+ ) ;
expression
: expression ('++' | '--')
| 'new' typeName
| expression '[' expression? ']'
| expression '[' expression? ':' expression? ']'
| expression '.' identifier
| expression '{' nameValueList '}'
| expression '(' functionCallArguments ')'
| PayableKeyword '(' expression ')'
| '(' expression ')'
| ('++' | '--') expression
| ('+' | '-') expression
| ('after' | 'delete') expression
| '!' expression
| '~' expression
| expression '**' expression
| expression ('*' | '/' | '%') expression
| expression ('+' | '-') expression
| expression ('<<' | '>>') expression
| expression '&' expression
| expression '^' expression
| expression '|' expression
| expression ('<' | '>' | '<=' | '>=') expression
| expression ('==' | '!=') expression
| expression '&&' expression
| expression '||' expression
| expression '?' expression ':' expression
| expression ('=' | '|=' | '^=' | '&=' | '<<=' | '>>=' | '+=' | '-=' | '*=' | '/=' | '%=') expression
| primaryExpression ;
primaryExpression
: BooleanLiteral
| numberLiteral
| hexLiteral
| stringLiteral
| identifier ('[' ']')?
| TypeKeyword
| tupleExpression
| typeNameExpression ('[' ']')? ;
expressionList
: expression (',' expression)* ;
nameValueList
: nameValue (',' nameValue)* ','? ;
nameValue
: identifier ':' expression ;
functionCallArguments
: '{' nameValueList? '}'
| expressionList? ;
functionCall
: expression '(' functionCallArguments ')' ;
tupleExpression
: '(' ( expression? ( ',' expression? )* ) ')'
| '[' ( expression ( ',' expression )* )? ']' ;
typeNameExpression
: elementaryTypeName
| userDefinedTypeName ;
assemblyItem
: identifier
| assemblyBlock
| assemblyExpression
| assemblyLocalDefinition
| assemblyAssignment
| assemblyStackAssignment
| labelDefinition
| assemblySwitch
| assemblyFunctionDefinition
| assemblyFor
| assemblyIf
| BreakKeyword
| ContinueKeyword
| LeaveKeyword
| subAssembly
| numberLiteral
| stringLiteral
| hexLiteral ;
assemblyBlock
: '{' assemblyItem* '}' ;
assemblyExpression
: assemblyCall | assemblyLiteral ;
assemblyCall
: ( 'return' | 'address' | 'byte' | identifier ) ( '(' assemblyExpression? ( ',' assemblyExpression )* ')' )? ;
assemblyLocalDefinition
: 'let' assemblyIdentifierList ( ':=' assemblyExpression )? ;
assemblyAssignment
: assemblyIdentifierList ':=' assemblyExpression ;
assemblyIdentifierList
: identifier ( ',' identifier )* ;
assemblyStackAssignment
: '=:' identifier ;
labelDefinition
: identifier ':' ;
assemblySwitch
: 'switch' assemblyExpression assemblyCase* ;
assemblyCase
: 'case' assemblyLiteral assemblyType? assemblyBlock
| 'default' assemblyBlock ;
assemblyFunctionDefinition
: 'function' identifier '(' assemblyTypedVariableList? ')'
assemblyFunctionReturns? assemblyBlock ;
assemblyFunctionReturns
: ( '-' '>' assemblyTypedVariableList ) ;
assemblyFor
: 'for' assemblyBlock assemblyExpression assemblyBlock assemblyBlock ;
assemblyIf
: 'if' assemblyExpression assemblyBlock ;
assemblyLiteral
: ( stringLiteral | DecimalNumber | HexNumber | hexLiteral | BooleanLiteral ) assemblyType? ;
assemblyTypedVariableList
: identifier assemblyType? ( ',' assemblyTypedVariableList )? ;
assemblyType
: ':' identifier ;
subAssembly
: 'assembly' identifier assemblyBlock ;
numberLiteral
: (DecimalNumber | HexNumber) NumberUnit? ;
identifier
: ('from' | 'calldata' | 'address' | Identifier) ;
BooleanLiteral
: 'true' | 'false' ;
DecimalNumber
: ( DecimalDigits | (DecimalDigits? '.' DecimalDigits) ) ( [eE] '-'? DecimalDigits )? ;
fragment
DecimalDigits
: [0-9] ( '_'? [0-9] )* ;
HexNumber
: '0' [xX] HexDigits ;
fragment
HexDigits
: HexCharacter ( '_'? HexCharacter )* ;
NumberUnit
: 'wei' | 'szabo' | 'finney' | 'ether'
| 'seconds' | 'minutes' | 'hours' | 'days' | 'weeks' | 'years' ;
HexLiteralFragment
: 'hex' (('"' HexDigits? '"') | ('\'' HexDigits? '\'')) ;
hexLiteral : HexLiteralFragment+ ;
fragment
HexPair
: HexCharacter HexCharacter ;
fragment
HexCharacter
: [0-9A-Fa-f] ;
ReservedKeyword
: 'after'
| 'case'
| 'default'
| 'final'
| 'in'
| 'inline'
| 'let'
| 'match'
| 'null'
| 'of'
| 'relocatable'
| 'static'
| 'switch'
| 'typeof' ;
AnonymousKeyword : 'anonymous' ;
BreakKeyword : 'break' ;
ConstantKeyword : 'constant' ;
ImmutableKeyword : 'immutable' ;
ContinueKeyword : 'continue' ;
LeaveKeyword : 'leave' ;
ExternalKeyword : 'external' ;
IndexedKeyword : 'indexed' ;
InternalKeyword : 'internal' ;
PayableKeyword : 'payable' ;
PrivateKeyword : 'private' ;
PublicKeyword : 'public' ;
VirtualKeyword : 'virtual' ;
PureKeyword : 'pure' ;
TypeKeyword : 'type' ;
ViewKeyword : 'view' ;
ConstructorKeyword : 'constructor' ;
FallbackKeyword : 'fallback' ;
ReceiveKeyword : 'receive' ;
Identifier
: IdentifierStart IdentifierPart* ;
fragment
IdentifierStart
: [a-zA-Z$_] ;
fragment
IdentifierPart
: [a-zA-Z0-9$_] ;
stringLiteral
: StringLiteralFragment+ ;
StringLiteralFragment
: '"' DoubleQuotedStringCharacter* '"'
| '\'' SingleQuotedStringCharacter* '\'' ;
fragment
DoubleQuotedStringCharacter
: ~["\r\n\\] | ('\\' .) ;
fragment
SingleQuotedStringCharacter
: ~['\r\n\\] | ('\\' .) ;
VersionLiteral
: [0-9]+ '.' [0-9]+ ('.' [0-9]+)? ;
WS
: [ \t\r\n\u000C]+ -> skip ;
COMMENT
: '/*' .*? '*/' -> channel(HIDDEN) ;
LINE_COMMENT
: '//' ~[\r\n]* -> channel(HIDDEN) ;

101
scripts/test_antlr_grammar.sh Executable file
View File

@ -0,0 +1,101 @@
#!/usr/bin/env bash
set -e
ROOT_DIR="$(dirname "$0")"/..
WORKDIR="${ROOT_DIR}/build/antlr"
ANTLR_JAR="${ROOT_DIR}/build/deps/antlr4.jar"
ANTLR_JAR_URI="https://www.antlr.org/download/antlr-4.7.2-complete.jar"
GRAMMAR_FILE="$(readlink -f "${ROOT_DIR}/docs/Solidity.g4")"
SGR_RESET="\033[0m"
SGR_BOLD="\033[1m"
SGR_GREEN="\033[32m"
SGR_RED="\033[31m"
SGR_BLUE="\033[34m"
vt_cursor_up() { echo -ne "\033[A"; }
vt_cursor_begin_of_line() { echo -ne "\r"; }
download_antlr4()
{
if [[ ! -e "$ANTLR_JAR" ]]
then
curl -o "${ANTLR_JAR}" "${ANTLR_JAR_URI}"
fi
}
prepare_workdir()
{
mkdir -p "${ROOT_DIR}/build/deps"
mkdir -p "${WORKDIR}"
mkdir -p "${WORKDIR}/src"
mkdir -p "${WORKDIR}/target"
}
prepare_workdir
download_antlr4
if [[ ! -f "${WORKDIR}/target/SolidityParser.class" ]] || \
[ "${GRAMMAR_FILE}" -nt "${WORKDIR}/target/SolidityParser.class" ]
then
echo "Creating parser"
# Create lexer/parser from grammar
java -jar "${ANTLR_JAR}" "${GRAMMAR_FILE}" -o "${WORKDIR}/src/"
# Compile lexer/parser sources
javac -classpath "${ANTLR_JAR}" "${WORKDIR}/src/"*.java -d "${WORKDIR}/target/"
fi
# Run tests
failed_count=0
test_file()
{
local SOL_FILE
SOL_FILE="$(readlink -m "${1}")"
local cur=${2}
local max=${3}
echo -e "${SGR_BLUE}[${cur}/${max}] Testing ${SOL_FILE}${SGR_RESET} ..."
local output
output=$(
java \
-classpath "${ANTLR_JAR}:${WORKDIR}/target/" \
"org.antlr.v4.gui.TestRig" \
Solidity \
sourceUnit <"${SOL_FILE}" 2>&1
)
vt_cursor_up
vt_cursor_begin_of_line
if [[ "${output}" == "" ]]
then
echo -e "${SGR_BLUE}[${cur}/${max}] Testing ${SOL_FILE}${SGR_RESET} ${SGR_BOLD}${SGR_GREEN}OK${SGR_RESET}"
else
echo -e "${SGR_BLUE}[${cur}/${max}] Testing ${SOL_FILE}${SGR_RESET} ${SGR_BOLD}${SGR_RED}FAILED${SGR_RESET}"
echo "${output}"
failed_count=$((failed_count + 1))
exit 1
fi
}
# we only want to use files that do not contain errors or multi-source files.
SOL_FILES=()
while IFS='' read -r line
do
SOL_FILES+=("$line")
done < <(
grep -riL -E \
"^\/\/ (Syntax|Type|Parser|Declaration)Error|^==== Source:" \
"${ROOT_DIR}/test/libsolidity/syntaxTests" \
"${ROOT_DIR}/test/libsolidity/semanticTests" \
)
test_count=0
for SOL_FILE in "${SOL_FILES[@]}"
do
test_count=$((test_count + 1))
test_file "${SOL_FILE}" ${test_count} ${#SOL_FILES[*]}
done
echo "Summary: ${failed_count} of ${#SOL_FILES[*]} sources failed."
exit ${failed_count}