solidity/libyul/AsmParser.cpp
2023-09-05 11:50:14 +02:00

718 lines
20 KiB
C++

/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
// SPDX-License-Identifier: GPL-3.0
/**
* @author Christian <c@ethdev.com>
* @date 2016
* Solidity inline assembly parser.
*/
#include <libyul/AST.h>
#include <libyul/AsmParser.h>
#include <libyul/Exceptions.h>
#include <liblangutil/ErrorReporter.h>
#include <liblangutil/Exceptions.h>
#include <liblangutil/Scanner.h>
#include <libsolutil/Common.h>
#include <libsolutil/Visitor.h>
#include <range/v3/view/subrange.hpp>
#include <boost/algorithm/string.hpp>
#include <algorithm>
#include <regex>
using namespace solidity;
using namespace solidity::util;
using namespace solidity::langutil;
using namespace solidity::yul;
namespace
{
std::optional<int> toInt(std::string const& _value)
{
try
{
return stoi(_value);
}
catch (...)
{
return std::nullopt;
}
}
}
std::shared_ptr<DebugData const> Parser::createDebugData() const
{
switch (m_useSourceLocationFrom)
{
case UseSourceLocationFrom::Scanner:
return DebugData::create(ParserBase::currentLocation(), ParserBase::currentLocation());
case UseSourceLocationFrom::LocationOverride:
return DebugData::create(m_locationOverride, m_locationOverride);
case UseSourceLocationFrom::Comments:
return DebugData::create(ParserBase::currentLocation(), m_locationFromComment, m_astIDFromComment);
}
solAssert(false, "");
}
void Parser::updateLocationEndFrom(
std::shared_ptr<DebugData const>& _debugData,
SourceLocation const& _location
) const
{
solAssert(_debugData, "");
switch (m_useSourceLocationFrom)
{
case UseSourceLocationFrom::Scanner:
{
DebugData updatedDebugData = *_debugData;
updatedDebugData.nativeLocation.end = _location.end;
updatedDebugData.originLocation.end = _location.end;
_debugData = std::make_shared<DebugData const>(std::move(updatedDebugData));
break;
}
case UseSourceLocationFrom::LocationOverride:
// Ignore the update. The location we're overriding with is not supposed to change
break;
case UseSourceLocationFrom::Comments:
{
DebugData updatedDebugData = *_debugData;
updatedDebugData.nativeLocation.end = _location.end;
_debugData = std::make_shared<DebugData const>(std::move(updatedDebugData));
break;
}
}
}
std::unique_ptr<Block> Parser::parse(CharStream& _charStream)
{
m_scanner = std::make_shared<Scanner>(_charStream);
std::unique_ptr<Block> block = parseInline(m_scanner);
expectToken(Token::EOS);
return block;
}
std::unique_ptr<Block> Parser::parseInline(std::shared_ptr<Scanner> const& _scanner)
{
m_recursionDepth = 0;
auto previousScannerKind = _scanner->scannerKind();
_scanner->setScannerMode(ScannerKind::Yul);
ScopeGuard resetScanner([&]{ _scanner->setScannerMode(previousScannerKind); });
try
{
m_scanner = _scanner;
if (m_useSourceLocationFrom == UseSourceLocationFrom::Comments)
fetchDebugDataFromComment();
return std::make_unique<Block>(parseBlock());
}
catch (FatalError const&)
{
yulAssert(!m_errorReporter.errors().empty(), "Fatal error detected, but no error is reported.");
}
return nullptr;
}
langutil::Token Parser::advance()
{
auto const token = ParserBase::advance();
if (m_useSourceLocationFrom == UseSourceLocationFrom::Comments)
fetchDebugDataFromComment();
return token;
}
void Parser::fetchDebugDataFromComment()
{
solAssert(m_sourceNames.has_value(), "");
static std::regex const tagRegex = std::regex(
R"~~((?:^|\s+)(@[a-zA-Z0-9\-_]+)(?:\s+|$))~~", // tag, e.g: @src
std::regex_constants::ECMAScript | std::regex_constants::optimize
);
std::string_view commentLiteral = m_scanner->currentCommentLiteral();
std::match_results<std::string_view::const_iterator> match;
langutil::SourceLocation originLocation = m_locationFromComment;
// Empty for each new node.
std::optional<int> astID;
while (regex_search(commentLiteral.cbegin(), commentLiteral.cend(), match, tagRegex))
{
solAssert(match.size() == 2, "");
commentLiteral = commentLiteral.substr(static_cast<size_t>(match.position() + match.length()));
if (match[1] == "@src")
{
if (auto parseResult = parseSrcComment(commentLiteral, m_scanner->currentCommentLocation()))
tie(commentLiteral, originLocation) = *parseResult;
else
break;
}
else if (match[1] == "@ast-id")
{
if (auto parseResult = parseASTIDComment(commentLiteral, m_scanner->currentCommentLocation()))
tie(commentLiteral, astID) = *parseResult;
else
break;
}
else
// Ignore unrecognized tags.
continue;
}
m_locationFromComment = originLocation;
m_astIDFromComment = astID;
}
std::optional<std::pair<std::string_view, SourceLocation>> Parser::parseSrcComment(
std::string_view const _arguments,
langutil::SourceLocation const& _commentLocation
)
{
static std::regex const argsRegex = std::regex(
R"~~(^(-1|\d+):(-1|\d+):(-1|\d+)(?:\s+|$))~~" // index and location, e.g.: 1:234:-1
R"~~(("(?:[^"\\]|\\.)*"?)?)~~", // optional code snippet, e.g.: "string memory s = \"abc\";..."
std::regex_constants::ECMAScript | std::regex_constants::optimize
);
std::match_results<std::string_view::const_iterator> match;
if (!regex_search(_arguments.cbegin(), _arguments.cend(), match, argsRegex))
{
m_errorReporter.syntaxError(
8387_error,
_commentLocation,
"Invalid values in source location mapping. Could not parse location specification."
);
return std::nullopt;
}
solAssert(match.size() == 5, "");
std::string_view tail = _arguments.substr(static_cast<size_t>(match.position() + match.length()));
if (match[4].matched && (
!boost::algorithm::ends_with(match[4].str(), "\"") ||
boost::algorithm::ends_with(match[4].str(), "\\\"")
))
{
m_errorReporter.syntaxError(
1544_error,
_commentLocation,
"Invalid code snippet in source location mapping. Quote is not terminated."
);
return {{tail, SourceLocation{}}};
}
std::optional<int> const sourceIndex = toInt(match[1].str());
std::optional<int> const start = toInt(match[2].str());
std::optional<int> const end = toInt(match[3].str());
if (!sourceIndex.has_value() || !start.has_value() || !end.has_value())
m_errorReporter.syntaxError(
6367_error,
_commentLocation,
"Invalid value in source location mapping. "
"Expected non-negative integer values or -1 for source index and location."
);
else if (sourceIndex == -1)
return {{tail, SourceLocation{start.value(), end.value(), nullptr}}};
else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast<unsigned>(sourceIndex.value()))))
m_errorReporter.syntaxError(
2674_error,
_commentLocation,
"Invalid source mapping. Source index not defined via @use-src."
);
else
{
std::shared_ptr<std::string const> sourceName = m_sourceNames->at(static_cast<unsigned>(sourceIndex.value()));
solAssert(sourceName, "");
return {{tail, SourceLocation{start.value(), end.value(), std::move(sourceName)}}};
}
return {{tail, SourceLocation{}}};
}
std::optional<std::pair<std::string_view, std::optional<int>>> Parser::parseASTIDComment(
std::string_view _arguments,
langutil::SourceLocation const& _commentLocation
)
{
static std::regex const argRegex = std::regex(
R"~~(^(\d+)(?:\s|$))~~",
std::regex_constants::ECMAScript | std::regex_constants::optimize
);
std::match_results<std::string_view::const_iterator> match;
std::optional<int> astID;
bool matched = regex_search(_arguments.cbegin(), _arguments.cend(), match, argRegex);
std::string_view tail = _arguments;
if (matched)
{
solAssert(match.size() == 2, "");
tail = _arguments.substr(static_cast<size_t>(match.position() + match.length()));
astID = toInt(match[1].str());
}
if (!matched || !astID || *astID < 0 || static_cast<int64_t>(*astID) != *astID)
{
m_errorReporter.syntaxError(1749_error, _commentLocation, "Invalid argument for @ast-id.");
astID = std::nullopt;
}
if (matched)
return {{_arguments, astID}};
else
return std::nullopt;
}
Block Parser::parseBlock()
{
RecursionGuard recursionGuard(*this);
Block block = createWithLocation<Block>();
expectToken(Token::LBrace);
while (currentToken() != Token::RBrace)
block.statements.emplace_back(parseStatement());
updateLocationEndFrom(block.debugData, currentLocation());
advance();
return block;
}
Statement Parser::parseStatement()
{
RecursionGuard recursionGuard(*this);
switch (currentToken())
{
case Token::Let:
return parseVariableDeclaration();
case Token::Function:
return parseFunctionDefinition();
case Token::LBrace:
return parseBlock();
case Token::If:
{
If _if = createWithLocation<If>();
advance();
_if.condition = std::make_unique<Expression>(parseExpression());
_if.body = parseBlock();
updateLocationEndFrom(_if.debugData, nativeLocationOf(_if.body));
return Statement{std::move(_if)};
}
case Token::Switch:
{
Switch _switch = createWithLocation<Switch>();
advance();
_switch.expression = std::make_unique<Expression>(parseExpression());
while (currentToken() == Token::Case)
_switch.cases.emplace_back(parseCase());
if (currentToken() == Token::Default)
_switch.cases.emplace_back(parseCase());
if (currentToken() == Token::Default)
fatalParserError(6931_error, "Only one default case allowed.");
else if (currentToken() == Token::Case)
fatalParserError(4904_error, "Case not allowed after default case.");
if (_switch.cases.empty())
fatalParserError(2418_error, "Switch statement without any cases.");
updateLocationEndFrom(_switch.debugData, nativeLocationOf(_switch.cases.back().body));
return Statement{std::move(_switch)};
}
case Token::For:
return parseForLoop();
case Token::Break:
{
Statement stmt{createWithLocation<Break>()};
checkBreakContinuePosition("break");
advance();
return stmt;
}
case Token::Continue:
{
Statement stmt{createWithLocation<Continue>()};
checkBreakContinuePosition("continue");
advance();
return stmt;
}
case Token::Leave:
{
Statement stmt{createWithLocation<Leave>()};
if (!m_insideFunction)
m_errorReporter.syntaxError(8149_error, currentLocation(), "Keyword \"leave\" can only be used inside a function.");
advance();
return stmt;
}
default:
break;
}
// Options left:
// Expression/FunctionCall
// Assignment
std::variant<Literal, Identifier> elementary(parseLiteralOrIdentifier());
switch (currentToken())
{
case Token::LParen:
{
Expression expr = parseCall(std::move(elementary));
return ExpressionStatement{debugDataOf(expr), std::move(expr)};
}
case Token::Comma:
case Token::AssemblyAssign:
{
Assignment assignment;
assignment.debugData = debugDataOf(elementary);
while (true)
{
if (!std::holds_alternative<Identifier>(elementary))
{
auto const token = currentToken() == Token::Comma ? "," : ":=";
fatalParserError(
2856_error,
std::string("Variable name must precede \"") +
token +
"\"" +
(currentToken() == Token::Comma ? " in multiple assignment." : " in assignment.")
);
}
auto const& identifier = std::get<Identifier>(elementary);
if (m_dialect.builtin(identifier.name))
fatalParserError(6272_error, "Cannot assign to builtin function \"" + identifier.name.str() + "\".");
assignment.variableNames.emplace_back(identifier);
if (currentToken() != Token::Comma)
break;
expectToken(Token::Comma);
elementary = parseLiteralOrIdentifier();
}
expectToken(Token::AssemblyAssign);
assignment.value = std::make_unique<Expression>(parseExpression());
updateLocationEndFrom(assignment.debugData, nativeLocationOf(*assignment.value));
return Statement{std::move(assignment)};
}
default:
fatalParserError(6913_error, "Call or assignment expected.");
break;
}
yulAssert(false, "");
return {};
}
Case Parser::parseCase()
{
RecursionGuard recursionGuard(*this);
Case _case = createWithLocation<Case>();
if (currentToken() == Token::Default)
advance();
else if (currentToken() == Token::Case)
{
advance();
std::variant<Literal, Identifier> literal = parseLiteralOrIdentifier();
if (!std::holds_alternative<Literal>(literal))
fatalParserError(4805_error, "Literal expected.");
_case.value = std::make_unique<Literal>(std::get<Literal>(std::move(literal)));
}
else
yulAssert(false, "Case or default case expected.");
_case.body = parseBlock();
updateLocationEndFrom(_case.debugData, nativeLocationOf(_case.body));
return _case;
}
ForLoop Parser::parseForLoop()
{
RecursionGuard recursionGuard(*this);
ForLoopComponent outerForLoopComponent = m_currentForLoopComponent;
ForLoop forLoop = createWithLocation<ForLoop>();
expectToken(Token::For);
m_currentForLoopComponent = ForLoopComponent::ForLoopPre;
forLoop.pre = parseBlock();
m_currentForLoopComponent = ForLoopComponent::None;
forLoop.condition = std::make_unique<Expression>(parseExpression());
m_currentForLoopComponent = ForLoopComponent::ForLoopPost;
forLoop.post = parseBlock();
m_currentForLoopComponent = ForLoopComponent::ForLoopBody;
forLoop.body = parseBlock();
updateLocationEndFrom(forLoop.debugData, nativeLocationOf(forLoop.body));
m_currentForLoopComponent = outerForLoopComponent;
return forLoop;
}
Expression Parser::parseExpression()
{
RecursionGuard recursionGuard(*this);
std::variant<Literal, Identifier> operation = parseLiteralOrIdentifier();
return visit(GenericVisitor{
[&](Identifier& _identifier) -> Expression
{
if (currentToken() == Token::LParen)
return parseCall(std::move(operation));
if (m_dialect.builtin(_identifier.name))
fatalParserError(
7104_error,
nativeLocationOf(_identifier),
"Builtin function \"" + _identifier.name.str() + "\" must be called."
);
return std::move(_identifier);
},
[&](Literal& _literal) -> Expression
{
return std::move(_literal);
}
}, operation);
}
std::variant<Literal, Identifier> Parser::parseLiteralOrIdentifier()
{
RecursionGuard recursionGuard(*this);
switch (currentToken())
{
case Token::Identifier:
{
Identifier identifier{createDebugData(), YulString{currentLiteral()}};
advance();
return identifier;
}
case Token::StringLiteral:
case Token::HexStringLiteral:
case Token::Number:
case Token::TrueLiteral:
case Token::FalseLiteral:
{
LiteralKind kind = LiteralKind::Number;
switch (currentToken())
{
case Token::StringLiteral:
case Token::HexStringLiteral:
kind = LiteralKind::String;
break;
case Token::Number:
if (!isValidNumberLiteral(currentLiteral()))
fatalParserError(4828_error, "Invalid number literal.");
kind = LiteralKind::Number;
break;
case Token::TrueLiteral:
case Token::FalseLiteral:
kind = LiteralKind::Boolean;
break;
default:
break;
}
Literal literal{
createDebugData(),
kind,
YulString{currentLiteral()},
kind == LiteralKind::Boolean ? m_dialect.boolType : m_dialect.defaultType
};
advance();
if (currentToken() == Token::Colon)
{
expectToken(Token::Colon);
updateLocationEndFrom(literal.debugData, currentLocation());
literal.type = expectAsmIdentifier();
}
return literal;
}
case Token::Illegal:
fatalParserError(1465_error, "Illegal token: " + to_string(m_scanner->currentError()));
break;
default:
fatalParserError(1856_error, "Literal or identifier expected.");
}
return {};
}
VariableDeclaration Parser::parseVariableDeclaration()
{
RecursionGuard recursionGuard(*this);
VariableDeclaration varDecl = createWithLocation<VariableDeclaration>();
expectToken(Token::Let);
while (true)
{
varDecl.variables.emplace_back(parseTypedName());
if (currentToken() == Token::Comma)
expectToken(Token::Comma);
else
break;
}
if (currentToken() == Token::AssemblyAssign)
{
expectToken(Token::AssemblyAssign);
varDecl.value = std::make_unique<Expression>(parseExpression());
updateLocationEndFrom(varDecl.debugData, nativeLocationOf(*varDecl.value));
}
else
updateLocationEndFrom(varDecl.debugData, nativeLocationOf(varDecl.variables.back()));
return varDecl;
}
FunctionDefinition Parser::parseFunctionDefinition()
{
RecursionGuard recursionGuard(*this);
if (m_currentForLoopComponent == ForLoopComponent::ForLoopPre)
m_errorReporter.syntaxError(
3441_error,
currentLocation(),
"Functions cannot be defined inside a for-loop init block."
);
ForLoopComponent outerForLoopComponent = m_currentForLoopComponent;
m_currentForLoopComponent = ForLoopComponent::None;
FunctionDefinition funDef = createWithLocation<FunctionDefinition>();
expectToken(Token::Function);
funDef.name = expectAsmIdentifier();
expectToken(Token::LParen);
while (currentToken() != Token::RParen)
{
funDef.parameters.emplace_back(parseTypedName());
if (currentToken() == Token::RParen)
break;
expectToken(Token::Comma);
}
expectToken(Token::RParen);
if (currentToken() == Token::RightArrow)
{
expectToken(Token::RightArrow);
while (true)
{
funDef.returnVariables.emplace_back(parseTypedName());
if (currentToken() == Token::LBrace)
break;
expectToken(Token::Comma);
}
}
bool preInsideFunction = m_insideFunction;
m_insideFunction = true;
funDef.body = parseBlock();
m_insideFunction = preInsideFunction;
updateLocationEndFrom(funDef.debugData, nativeLocationOf(funDef.body));
m_currentForLoopComponent = outerForLoopComponent;
return funDef;
}
FunctionCall Parser::parseCall(std::variant<Literal, Identifier>&& _initialOp)
{
RecursionGuard recursionGuard(*this);
if (!std::holds_alternative<Identifier>(_initialOp))
fatalParserError(9980_error, "Function name expected.");
FunctionCall ret;
ret.functionName = std::move(std::get<Identifier>(_initialOp));
ret.debugData = ret.functionName.debugData;
expectToken(Token::LParen);
if (currentToken() != Token::RParen)
{
ret.arguments.emplace_back(parseExpression());
while (currentToken() != Token::RParen)
{
expectToken(Token::Comma);
ret.arguments.emplace_back(parseExpression());
}
}
updateLocationEndFrom(ret.debugData, currentLocation());
expectToken(Token::RParen);
return ret;
}
TypedName Parser::parseTypedName()
{
RecursionGuard recursionGuard(*this);
TypedName typedName = createWithLocation<TypedName>();
typedName.name = expectAsmIdentifier();
if (currentToken() == Token::Colon)
{
expectToken(Token::Colon);
updateLocationEndFrom(typedName.debugData, currentLocation());
typedName.type = expectAsmIdentifier();
}
else
typedName.type = m_dialect.defaultType;
return typedName;
}
YulString Parser::expectAsmIdentifier()
{
YulString name{currentLiteral()};
if (currentToken() == Token::Identifier && m_dialect.builtin(name))
fatalParserError(5568_error, "Cannot use builtin function name \"" + name.str() + "\" as identifier name.");
// NOTE: We keep the expectation here to ensure the correct source location for the error above.
expectToken(Token::Identifier);
return name;
}
void Parser::checkBreakContinuePosition(std::string const& _which)
{
switch (m_currentForLoopComponent)
{
case ForLoopComponent::None:
m_errorReporter.syntaxError(2592_error, currentLocation(), "Keyword \"" + _which + "\" needs to be inside a for-loop body.");
break;
case ForLoopComponent::ForLoopPre:
m_errorReporter.syntaxError(9615_error, currentLocation(), "Keyword \"" + _which + "\" in for-loop init block is not allowed.");
break;
case ForLoopComponent::ForLoopPost:
m_errorReporter.syntaxError(2461_error, currentLocation(), "Keyword \"" + _which + "\" in for-loop post block is not allowed.");
break;
case ForLoopComponent::ForLoopBody:
break;
}
}
bool Parser::isValidNumberLiteral(std::string const& _literal)
{
try
{
// Try to convert _literal to u256.
[[maybe_unused]] auto tmp = u256(_literal);
}
catch (...)
{
return false;
}
if (boost::starts_with(_literal, "0x"))
return true;
else
return _literal.find_first_not_of("0123456789") == std::string::npos;
}