/* This file is part of solidity. solidity is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. solidity is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with solidity. If not, see . */ // SPDX-License-Identifier: GPL-3.0 /** * @author Christian * @date 2016 * Solidity inline assembly parser. */ #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; using namespace solidity; using namespace solidity::util; using namespace solidity::langutil; using namespace solidity::yul; namespace { [[nodiscard]] shared_ptr updateLocationEndFrom( shared_ptr const& _debugData, langutil::SourceLocation const& _location ) { SourceLocation updatedLocation = _debugData ? _debugData->location : langutil::SourceLocation{}; updatedLocation.end = _location.end; return make_shared(updatedLocation); } optional toInt(string const& _value) { try { return stoi(_value); } catch (...) { return nullopt; } } } std::shared_ptr Parser::createDebugData() const { switch (m_useSourceLocationFrom) { case UseSourceLocationFrom::Scanner: return DebugData::create(ParserBase::currentLocation()); case UseSourceLocationFrom::LocationOverride: return DebugData::create(m_locationOverride); case UseSourceLocationFrom::Comments: return m_debugDataOverride; } solAssert(false, ""); } unique_ptr Parser::parse(CharStream& _charStream) { m_scanner = make_shared(_charStream); unique_ptr block = parseInline(m_scanner); expectToken(Token::EOS); return block; } unique_ptr Parser::parseInline(std::shared_ptr const& _scanner) { m_recursionDepth = 0; _scanner->setScannerMode(ScannerKind::Yul); ScopeGuard resetScanner([&]{ _scanner->setScannerMode(ScannerKind::Solidity); }); try { m_scanner = _scanner; if (m_sourceNames) fetchSourceLocationFromComment(); return make_unique(parseBlock()); } catch (FatalError const&) { yulAssert(!m_errorReporter.errors().empty(), "Fatal error detected, but no error is reported."); } return nullptr; } langutil::Token Parser::advance() { auto const token = ParserBase::advance(); if (m_useSourceLocationFrom == UseSourceLocationFrom::Comments) fetchSourceLocationFromComment(); return token; } void Parser::fetchSourceLocationFromComment() { solAssert(m_sourceNames.has_value(), ""); if (m_scanner->currentCommentLiteral().empty()) return; static regex const tagRegex = regex( R"~~((?:^|\s+)(@[a-zA-Z0-9\-_]+)(?:\s+|$))~~", // tag, e.g: @src regex_constants::ECMAScript | regex_constants::optimize ); static regex const srcTagArgsRegex = regex( R"~~(^(-1|\d+):(-1|\d+):(-1|\d+)(?:\s+|$))~~" // index and location, e.g.: 1:234:-1 R"~~(("(?:[^"\\]|\\.)*"?)?)~~", // optional code snippet, e.g.: "string memory s = \"abc\";..." regex_constants::ECMAScript | regex_constants::optimize ); string const commentLiteral = m_scanner->currentCommentLiteral(); SourceLocation const commentLocation = m_scanner->currentCommentLocation(); smatch tagMatch; string::const_iterator position = commentLiteral.begin(); while (regex_search(position, commentLiteral.end(), tagMatch, tagRegex)) { solAssert(tagMatch.size() == 2, ""); position += tagMatch.position() + tagMatch.length(); if (tagMatch[1] == "@src") { smatch srcTagArgsMatch; if (!regex_search(position, commentLiteral.end(), srcTagArgsMatch, srcTagArgsRegex)) { m_errorReporter.syntaxError( 8387_error, commentLocation, "Invalid values in source location mapping. Could not parse location specification." ); // If the arguments to @src are malformed, we don't know where they end so we can't continue. return; } solAssert(srcTagArgsMatch.size() == 5, ""); position += srcTagArgsMatch.position() + srcTagArgsMatch.length(); if (srcTagArgsMatch[4].matched && ( !boost::algorithm::ends_with(srcTagArgsMatch[4].str(), "\"") || boost::algorithm::ends_with(srcTagArgsMatch[4].str(), "\\\"") )) { m_errorReporter.syntaxError( 1544_error, commentLocation, "Invalid code snippet in source location mapping. Quote is not terminated." ); return; } optional const sourceIndex = toInt(srcTagArgsMatch[1].str()); optional const start = toInt(srcTagArgsMatch[2].str()); optional const end = toInt(srcTagArgsMatch[3].str()); m_debugDataOverride = DebugData::create(); if (!sourceIndex.has_value() || !start.has_value() || !end.has_value()) m_errorReporter.syntaxError( 6367_error, commentLocation, "Invalid value in source location mapping. " "Expected non-negative integer values or -1 for source index and location." ); else if (sourceIndex == -1) m_debugDataOverride = DebugData::create(SourceLocation{start.value(), end.value(), nullptr}); else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast(sourceIndex.value())))) m_errorReporter.syntaxError( 2674_error, commentLocation, "Invalid source mapping. Source index not defined via @use-src." ); else { shared_ptr sourceName = m_sourceNames->at(static_cast(sourceIndex.value())); solAssert(sourceName, ""); m_debugDataOverride = DebugData::create(SourceLocation{start.value(), end.value(), move(sourceName)}); } } else // Ignore unrecognized tags. continue; } } Block Parser::parseBlock() { RecursionGuard recursionGuard(*this); Block block = createWithLocation(); expectToken(Token::LBrace); while (currentToken() != Token::RBrace) block.statements.emplace_back(parseStatement()); if (m_useSourceLocationFrom == UseSourceLocationFrom::Scanner) block.debugData = updateLocationEndFrom(block.debugData, currentLocation()); advance(); return block; } Statement Parser::parseStatement() { RecursionGuard recursionGuard(*this); switch (currentToken()) { case Token::Let: return parseVariableDeclaration(); case Token::Function: return parseFunctionDefinition(); case Token::LBrace: return parseBlock(); case Token::If: { If _if = createWithLocation(); advance(); _if.condition = make_unique(parseExpression()); _if.body = parseBlock(); if (m_useSourceLocationFrom == UseSourceLocationFrom::Scanner) _if.debugData = updateLocationEndFrom(_if.debugData, locationOf(_if.body)); return Statement{move(_if)}; } case Token::Switch: { Switch _switch = createWithLocation(); advance(); _switch.expression = make_unique(parseExpression()); while (currentToken() == Token::Case) _switch.cases.emplace_back(parseCase()); if (currentToken() == Token::Default) _switch.cases.emplace_back(parseCase()); if (currentToken() == Token::Default) fatalParserError(6931_error, "Only one default case allowed."); else if (currentToken() == Token::Case) fatalParserError(4904_error, "Case not allowed after default case."); if (_switch.cases.empty()) fatalParserError(2418_error, "Switch statement without any cases."); if (m_useSourceLocationFrom == UseSourceLocationFrom::Scanner) _switch.debugData = updateLocationEndFrom(_switch.debugData, locationOf(_switch.cases.back().body)); return Statement{move(_switch)}; } case Token::For: return parseForLoop(); case Token::Break: { Statement stmt{createWithLocation()}; checkBreakContinuePosition("break"); advance(); return stmt; } case Token::Continue: { Statement stmt{createWithLocation()}; checkBreakContinuePosition("continue"); advance(); return stmt; } case Token::Leave: { Statement stmt{createWithLocation()}; if (!m_insideFunction) m_errorReporter.syntaxError(8149_error, currentLocation(), "Keyword \"leave\" can only be used inside a function."); advance(); return stmt; } default: break; } // Options left: // Expression/FunctionCall // Assignment variant elementary(parseLiteralOrIdentifier()); switch (currentToken()) { case Token::LParen: { Expression expr = parseCall(std::move(elementary)); return ExpressionStatement{debugDataOf(expr), move(expr)}; } case Token::Comma: case Token::AssemblyAssign: { Assignment assignment; assignment.debugData = debugDataOf(elementary); while (true) { if (!holds_alternative(elementary)) { auto const token = currentToken() == Token::Comma ? "," : ":="; fatalParserError( 2856_error, std::string("Variable name must precede \"") + token + "\"" + (currentToken() == Token::Comma ? " in multiple assignment." : " in assignment.") ); } auto const& identifier = std::get(elementary); if (m_dialect.builtin(identifier.name)) fatalParserError(6272_error, "Cannot assign to builtin function \"" + identifier.name.str() + "\"."); assignment.variableNames.emplace_back(identifier); if (currentToken() != Token::Comma) break; expectToken(Token::Comma); elementary = parseLiteralOrIdentifier(); } expectToken(Token::AssemblyAssign); assignment.value = make_unique(parseExpression()); if (m_useSourceLocationFrom == UseSourceLocationFrom::Scanner) assignment.debugData = updateLocationEndFrom(assignment.debugData, locationOf(*assignment.value)); return Statement{move(assignment)}; } default: fatalParserError(6913_error, "Call or assignment expected."); break; } yulAssert(false, ""); return {}; } Case Parser::parseCase() { RecursionGuard recursionGuard(*this); Case _case = createWithLocation(); if (currentToken() == Token::Default) advance(); else if (currentToken() == Token::Case) { advance(); variant literal = parseLiteralOrIdentifier(); if (!holds_alternative(literal)) fatalParserError(4805_error, "Literal expected."); _case.value = make_unique(std::get(std::move(literal))); } else yulAssert(false, "Case or default case expected."); _case.body = parseBlock(); if (m_useSourceLocationFrom == UseSourceLocationFrom::Scanner) _case.debugData = updateLocationEndFrom(_case.debugData, locationOf(_case.body)); return _case; } ForLoop Parser::parseForLoop() { RecursionGuard recursionGuard(*this); ForLoopComponent outerForLoopComponent = m_currentForLoopComponent; ForLoop forLoop = createWithLocation(); expectToken(Token::For); m_currentForLoopComponent = ForLoopComponent::ForLoopPre; forLoop.pre = parseBlock(); m_currentForLoopComponent = ForLoopComponent::None; forLoop.condition = make_unique(parseExpression()); m_currentForLoopComponent = ForLoopComponent::ForLoopPost; forLoop.post = parseBlock(); m_currentForLoopComponent = ForLoopComponent::ForLoopBody; forLoop.body = parseBlock(); if (m_useSourceLocationFrom == UseSourceLocationFrom::Scanner) forLoop.debugData = updateLocationEndFrom(forLoop.debugData, locationOf(forLoop.body)); m_currentForLoopComponent = outerForLoopComponent; return forLoop; } Expression Parser::parseExpression() { RecursionGuard recursionGuard(*this); variant operation = parseLiteralOrIdentifier(); return visit(GenericVisitor{ [&](Identifier& _identifier) -> Expression { if (currentToken() == Token::LParen) return parseCall(std::move(operation)); if (m_dialect.builtin(_identifier.name)) fatalParserError( 7104_error, locationOf(_identifier), "Builtin function \"" + _identifier.name.str() + "\" must be called." ); return move(_identifier); }, [&](Literal& _literal) -> Expression { return move(_literal); } }, operation); } variant Parser::parseLiteralOrIdentifier() { RecursionGuard recursionGuard(*this); switch (currentToken()) { case Token::Identifier: { Identifier identifier{createDebugData(), YulString{currentLiteral()}}; advance(); return identifier; } case Token::StringLiteral: case Token::HexStringLiteral: case Token::Number: case Token::TrueLiteral: case Token::FalseLiteral: { LiteralKind kind = LiteralKind::Number; switch (currentToken()) { case Token::StringLiteral: case Token::HexStringLiteral: kind = LiteralKind::String; break; case Token::Number: if (!isValidNumberLiteral(currentLiteral())) fatalParserError(4828_error, "Invalid number literal."); kind = LiteralKind::Number; break; case Token::TrueLiteral: case Token::FalseLiteral: kind = LiteralKind::Boolean; break; default: break; } Literal literal{ createDebugData(), kind, YulString{currentLiteral()}, kind == LiteralKind::Boolean ? m_dialect.boolType : m_dialect.defaultType }; advance(); if (currentToken() == Token::Colon) { expectToken(Token::Colon); if (m_useSourceLocationFrom == UseSourceLocationFrom::Scanner) literal.debugData = updateLocationEndFrom(literal.debugData, currentLocation()); literal.type = expectAsmIdentifier(); } return literal; } case Token::Illegal: fatalParserError(1465_error, "Illegal token: " + to_string(m_scanner->currentError())); break; default: fatalParserError(1856_error, "Literal or identifier expected."); } return {}; } VariableDeclaration Parser::parseVariableDeclaration() { RecursionGuard recursionGuard(*this); VariableDeclaration varDecl = createWithLocation(); expectToken(Token::Let); while (true) { varDecl.variables.emplace_back(parseTypedName()); if (currentToken() == Token::Comma) expectToken(Token::Comma); else break; } if (currentToken() == Token::AssemblyAssign) { expectToken(Token::AssemblyAssign); varDecl.value = make_unique(parseExpression()); if (m_useSourceLocationFrom == UseSourceLocationFrom::Scanner) varDecl.debugData = updateLocationEndFrom(varDecl.debugData, locationOf(*varDecl.value)); } else if (m_useSourceLocationFrom == UseSourceLocationFrom::Scanner) varDecl.debugData = updateLocationEndFrom(varDecl.debugData, locationOf(varDecl.variables.back())); return varDecl; } FunctionDefinition Parser::parseFunctionDefinition() { RecursionGuard recursionGuard(*this); if (m_currentForLoopComponent == ForLoopComponent::ForLoopPre) m_errorReporter.syntaxError( 3441_error, currentLocation(), "Functions cannot be defined inside a for-loop init block." ); ForLoopComponent outerForLoopComponent = m_currentForLoopComponent; m_currentForLoopComponent = ForLoopComponent::None; FunctionDefinition funDef = createWithLocation(); expectToken(Token::Function); funDef.name = expectAsmIdentifier(); expectToken(Token::LParen); while (currentToken() != Token::RParen) { funDef.parameters.emplace_back(parseTypedName()); if (currentToken() == Token::RParen) break; expectToken(Token::Comma); } expectToken(Token::RParen); if (currentToken() == Token::RightArrow) { expectToken(Token::RightArrow); while (true) { funDef.returnVariables.emplace_back(parseTypedName()); if (currentToken() == Token::LBrace) break; expectToken(Token::Comma); } } bool preInsideFunction = m_insideFunction; m_insideFunction = true; funDef.body = parseBlock(); m_insideFunction = preInsideFunction; if (m_useSourceLocationFrom == UseSourceLocationFrom::Scanner) funDef.debugData = updateLocationEndFrom(funDef.debugData, locationOf(funDef.body)); m_currentForLoopComponent = outerForLoopComponent; return funDef; } FunctionCall Parser::parseCall(variant&& _initialOp) { RecursionGuard recursionGuard(*this); if (!holds_alternative(_initialOp)) fatalParserError(9980_error, "Function name expected."); FunctionCall ret; ret.functionName = std::move(std::get(_initialOp)); ret.debugData = ret.functionName.debugData; expectToken(Token::LParen); if (currentToken() != Token::RParen) { ret.arguments.emplace_back(parseExpression()); while (currentToken() != Token::RParen) { expectToken(Token::Comma); ret.arguments.emplace_back(parseExpression()); } } if (m_useSourceLocationFrom == UseSourceLocationFrom::Scanner) ret.debugData = updateLocationEndFrom(ret.debugData, currentLocation()); expectToken(Token::RParen); return ret; } TypedName Parser::parseTypedName() { RecursionGuard recursionGuard(*this); TypedName typedName = createWithLocation(); typedName.name = expectAsmIdentifier(); if (currentToken() == Token::Colon) { expectToken(Token::Colon); if (m_useSourceLocationFrom == UseSourceLocationFrom::Scanner) typedName.debugData = updateLocationEndFrom(typedName.debugData, currentLocation()); typedName.type = expectAsmIdentifier(); } else typedName.type = m_dialect.defaultType; return typedName; } YulString Parser::expectAsmIdentifier() { YulString name{currentLiteral()}; if (currentToken() == Token::Identifier && m_dialect.builtin(name)) fatalParserError(5568_error, "Cannot use builtin function name \"" + name.str() + "\" as identifier name."); // NOTE: We keep the expectation here to ensure the correct source location for the error above. expectToken(Token::Identifier); return name; } void Parser::checkBreakContinuePosition(string const& _which) { switch (m_currentForLoopComponent) { case ForLoopComponent::None: m_errorReporter.syntaxError(2592_error, currentLocation(), "Keyword \"" + _which + "\" needs to be inside a for-loop body."); break; case ForLoopComponent::ForLoopPre: m_errorReporter.syntaxError(9615_error, currentLocation(), "Keyword \"" + _which + "\" in for-loop init block is not allowed."); break; case ForLoopComponent::ForLoopPost: m_errorReporter.syntaxError(2461_error, currentLocation(), "Keyword \"" + _which + "\" in for-loop post block is not allowed."); break; case ForLoopComponent::ForLoopBody: break; } } bool Parser::isValidNumberLiteral(string const& _literal) { try { // Try to convert _literal to u256. [[maybe_unused]] auto tmp = u256(_literal); } catch (...) { return false; } if (boost::starts_with(_literal, "0x")) return true; else return _literal.find_first_not_of("0123456789") == string::npos; }