From 14396c207cf6e0513b5fbfae3eb33fd4c1eb186b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Wed, 8 Sep 2021 16:08:05 +0200 Subject: [PATCH] AsmParser: Generalize location comment parsing to make it easier to add support for more tags --- libyul/AsmParser.cpp | 86 +++++++++++++++++++++++++++--------------- scripts/error_codes.py | 1 + test/libyul/Parser.cpp | 17 +++++++-- 3 files changed, 70 insertions(+), 34 deletions(-) diff --git a/libyul/AsmParser.cpp b/libyul/AsmParser.cpp index 4e82fe9b9..382cc72aa 100644 --- a/libyul/AsmParser.cpp +++ b/libyul/AsmParser.cpp @@ -131,46 +131,72 @@ void Parser::fetchSourceLocationFromComment() return; static regex const tagRegex = regex( - R"~~(\s*@src\s+)~~" // tag: @src - R"~~((-1|\d+):(-1|\d+):(-1|\d+)(?:\s+|$))~~", // index and location, e.g.: 1:234:-1 + R"~~(\s*(@[a-zA-Z0-9\-_]+)(?:\s+|$))~~", // tag, e.g: @src + regex_constants::ECMAScript | regex_constants::optimize + ); + static regex const srcTagArgsRegex = regex( + R"~~(^(-1|\d+):(-1|\d+):(-1|\d+)(?:\s+|$))~~", // index and location, e.g.: 1:234:-1 regex_constants::ECMAScript | regex_constants::optimize ); string const commentLiteral = m_scanner->currentCommentLiteral(); SourceLocation const commentLocation = m_scanner->currentCommentLocation(); - auto from = sregex_iterator(commentLiteral.begin(), commentLiteral.end(), tagRegex); - auto to = sregex_iterator(); + smatch tagMatch; + string::const_iterator position = commentLiteral.begin(); - for (auto const& tagMatch: ranges::make_subrange(from, to)) + while (regex_search(position, commentLiteral.end(), tagMatch, tagRegex)) { - solAssert(tagMatch.size() == 4, ""); + solAssert(tagMatch.size() == 2, ""); + position += tagMatch.position() + tagMatch.length(); - optional const sourceIndex = toInt(tagMatch[1].str()); - optional const start = toInt(tagMatch[2].str()); - optional const end = toInt(tagMatch[3].str()); - - auto const commentLocation = m_scanner->currentCommentLocation(); - m_debugDataOverride = DebugData::create(); - if (!sourceIndex.has_value() || !start.has_value() || !end.has_value()) - m_errorReporter.syntaxError( - 6367_error, - commentLocation, - "Invalid value in source location mapping. Could not parse location specification." - ); - else if (sourceIndex == -1) - m_debugDataOverride = DebugData::create(SourceLocation{start.value(), end.value(), nullptr}); - else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast(sourceIndex.value())))) - m_errorReporter.syntaxError( - 2674_error, - commentLocation, - "Invalid source mapping. Source index not defined via @use-src." - ); - else + if (tagMatch[1] == "@src") { - shared_ptr sourceName = m_sourceNames->at(static_cast(sourceIndex.value())); - solAssert(sourceName, ""); - m_debugDataOverride = DebugData::create(SourceLocation{start.value(), end.value(), move(sourceName)}); + smatch srcTagArgsMatch; + if (!regex_search(position, commentLiteral.end(), srcTagArgsMatch, srcTagArgsRegex)) + { + m_errorReporter.syntaxError( + 8387_error, + commentLocation, + "Invalid values in source location mapping. Could not parse location specification." + ); + + // If the arguments to @src are malformed, we don't know where they end so we can't continue. + return; + } + + solAssert(srcTagArgsMatch.size() == 4, ""); + position += srcTagArgsMatch.position() + srcTagArgsMatch.length(); + + optional const sourceIndex = toInt(srcTagArgsMatch[1].str()); + optional const start = toInt(srcTagArgsMatch[2].str()); + optional const end = toInt(srcTagArgsMatch[3].str()); + + m_debugDataOverride = DebugData::create(); + if (!sourceIndex.has_value() || !start.has_value() || !end.has_value()) + m_errorReporter.syntaxError( + 6367_error, + commentLocation, + "Invalid value in source location mapping. " + "Expected non-negative integer values or -1 for source index and location." + ); + else if (sourceIndex == -1) + m_debugDataOverride = DebugData::create(SourceLocation{start.value(), end.value(), nullptr}); + else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast(sourceIndex.value())))) + m_errorReporter.syntaxError( + 2674_error, + commentLocation, + "Invalid source mapping. Source index not defined via @use-src." + ); + else + { + shared_ptr sourceName = m_sourceNames->at(static_cast(sourceIndex.value())); + solAssert(sourceName, ""); + m_debugDataOverride = DebugData::create(SourceLocation{start.value(), end.value(), move(sourceName)}); + } } + else + // Ignore unrecognized tags. + continue; } } diff --git a/scripts/error_codes.py b/scripts/error_codes.py index 320074ff1..6f3f9ce95 100755 --- a/scripts/error_codes.py +++ b/scripts/error_codes.py @@ -194,6 +194,7 @@ def examine_id_coverage(top_dir, source_id_to_file_names, new_ids_only=False): "9804", # Tested in test/libyul/ObjectParser.cpp. "2674", "6367", + "8387", "3805", # "This is a pre-release compiler version, please do not use it in production." # The warning may or may not exist in a compiler build. "4591", # "There are more than 256 warnings. Ignoring the rest." diff --git a/test/libyul/Parser.cpp b/test/libyul/Parser.cpp index e1ed28a52..67196ffc8 100644 --- a/test/libyul/Parser.cpp +++ b/test/libyul/Parser.cpp @@ -516,7 +516,10 @@ BOOST_AUTO_TEST_CASE(customSourceLocations_invalid_suffix) )"; EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{}); shared_ptr result = parse(sourceText, dialect, reporter); - BOOST_REQUIRE(!!result && errorList.size() == 0); + BOOST_REQUIRE(!!result); + BOOST_REQUIRE(errorList.size() == 1); + BOOST_TEST(errorList[0]->type() == Error::Type::SyntaxError); + BOOST_TEST(errorList[0]->errorId() == 8387_error); CHECK_LOCATION(result->debugData->location, "", -1, -1); } @@ -558,7 +561,10 @@ BOOST_AUTO_TEST_CASE(customSourceLocations_non_integer) )"; EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{}); shared_ptr result = parse(sourceText, dialect, reporter); - BOOST_REQUIRE(!!result && errorList.size() == 0); + BOOST_REQUIRE(!!result); + BOOST_REQUIRE(errorList.size() == 1); + BOOST_TEST(errorList[0]->type() == Error::Type::SyntaxError); + BOOST_TEST(errorList[0]->errorId() == 8387_error); CHECK_LOCATION(result->debugData->location, "", -1, -1); } @@ -611,8 +617,11 @@ BOOST_AUTO_TEST_CASE(customSourceLocations_two_locations_no_whitespace) )"; EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{}); shared_ptr result = parse(sourceText, dialect, reporter); - BOOST_REQUIRE(!!result && errorList.size() == 0); - CHECK_LOCATION(result->debugData->location, "source1", 333, 444); + BOOST_REQUIRE(!!result); + BOOST_REQUIRE(errorList.size() == 1); + BOOST_TEST(errorList[0]->type() == Error::Type::SyntaxError); + BOOST_TEST(errorList[0]->errorId() == 8387_error); + CHECK_LOCATION(result->debugData->location, "", -1, -1); } BOOST_AUTO_TEST_CASE(customSourceLocations_leading_trailing_whitespace)