Merge pull request #11968 from ethereum/refactorSrcCommentParsing

Refactor source comment parsing.
This commit is contained in:
chriseth 2021-09-16 14:42:15 +02:00 committed by GitHub
commit 227029d27a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 91 additions and 76 deletions

View File

@ -104,7 +104,7 @@ unique_ptr<Block> Parser::parseInline(std::shared_ptr<Scanner> const& _scanner)
{ {
m_scanner = _scanner; m_scanner = _scanner;
if (m_sourceNames) if (m_sourceNames)
fetchSourceLocationFromComment(); fetchDebugDataFromComment();
return make_unique<Block>(parseBlock()); return make_unique<Block>(parseBlock());
} }
catch (FatalError const&) catch (FatalError const&)
@ -119,99 +119,107 @@ langutil::Token Parser::advance()
{ {
auto const token = ParserBase::advance(); auto const token = ParserBase::advance();
if (m_useSourceLocationFrom == UseSourceLocationFrom::Comments) if (m_useSourceLocationFrom == UseSourceLocationFrom::Comments)
fetchSourceLocationFromComment(); fetchDebugDataFromComment();
return token; return token;
} }
void Parser::fetchSourceLocationFromComment() void Parser::fetchDebugDataFromComment()
{ {
solAssert(m_sourceNames.has_value(), ""); solAssert(m_sourceNames.has_value(), "");
if (m_scanner->currentCommentLiteral().empty())
return;
static regex const tagRegex = regex( static regex const tagRegex = regex(
R"~~((?:^|\s+)(@[a-zA-Z0-9\-_]+)(?:\s+|$))~~", // tag, e.g: @src R"~~((?:^|\s+)(@[a-zA-Z0-9\-_]+)(?:\s+|$))~~", // tag, e.g: @src
regex_constants::ECMAScript | regex_constants::optimize regex_constants::ECMAScript | regex_constants::optimize
); );
static regex const srcTagArgsRegex = regex(
R"~~(^(-1|\d+):(-1|\d+):(-1|\d+)(?:\s+|$))~~" // index and location, e.g.: 1:234:-1
R"~~(("(?:[^"\\]|\\.)*"?)?)~~", // optional code snippet, e.g.: "string memory s = \"abc\";..."
regex_constants::ECMAScript | regex_constants::optimize
);
string const commentLiteral = m_scanner->currentCommentLiteral(); string_view commentLiteral = m_scanner->currentCommentLiteral();
SourceLocation const commentLocation = m_scanner->currentCommentLocation(); match_results<string_view::const_iterator> match;
smatch tagMatch;
string::const_iterator position = commentLiteral.begin();
while (regex_search(position, commentLiteral.end(), tagMatch, tagRegex)) langutil::SourceLocation sourceLocation = m_debugDataOverride->location;
while (regex_search(commentLiteral.cbegin(), commentLiteral.cend(), match, tagRegex))
{ {
solAssert(tagMatch.size() == 2, ""); solAssert(match.size() == 2, "");
position += tagMatch.position() + tagMatch.length(); commentLiteral = commentLiteral.substr(static_cast<size_t>(match.position() + match.length()));
if (tagMatch[1] == "@src") if (match[1] == "@src")
{ {
smatch srcTagArgsMatch; if (auto parseResult = parseSrcComment(commentLiteral, m_scanner->currentCommentLocation()))
if (!regex_search(position, commentLiteral.end(), srcTagArgsMatch, srcTagArgsRegex)) tie(commentLiteral, sourceLocation) = *parseResult;
{
m_errorReporter.syntaxError(
8387_error,
commentLocation,
"Invalid values in source location mapping. Could not parse location specification."
);
// If the arguments to @src are malformed, we don't know where they end so we can't continue.
return;
}
solAssert(srcTagArgsMatch.size() == 5, "");
position += srcTagArgsMatch.position() + srcTagArgsMatch.length();
if (srcTagArgsMatch[4].matched && (
!boost::algorithm::ends_with(srcTagArgsMatch[4].str(), "\"") ||
boost::algorithm::ends_with(srcTagArgsMatch[4].str(), "\\\"")
))
{
m_errorReporter.syntaxError(
1544_error,
commentLocation,
"Invalid code snippet in source location mapping. Quote is not terminated."
);
return;
}
optional<int> const sourceIndex = toInt(srcTagArgsMatch[1].str());
optional<int> const start = toInt(srcTagArgsMatch[2].str());
optional<int> const end = toInt(srcTagArgsMatch[3].str());
m_debugDataOverride = DebugData::create();
if (!sourceIndex.has_value() || !start.has_value() || !end.has_value())
m_errorReporter.syntaxError(
6367_error,
commentLocation,
"Invalid value in source location mapping. "
"Expected non-negative integer values or -1 for source index and location."
);
else if (sourceIndex == -1)
m_debugDataOverride = DebugData::create(SourceLocation{start.value(), end.value(), nullptr});
else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast<unsigned>(sourceIndex.value()))))
m_errorReporter.syntaxError(
2674_error,
commentLocation,
"Invalid source mapping. Source index not defined via @use-src."
);
else else
{ break;
shared_ptr<string const> sourceName = m_sourceNames->at(static_cast<unsigned>(sourceIndex.value()));
solAssert(sourceName, "");
m_debugDataOverride = DebugData::create(SourceLocation{start.value(), end.value(), move(sourceName)});
}
} }
else else
// Ignore unrecognized tags. // Ignore unrecognized tags.
continue; continue;
} }
m_debugDataOverride = DebugData::create(sourceLocation);
}
optional<pair<string_view, SourceLocation>> Parser::parseSrcComment(
string_view const _arguments,
langutil::SourceLocation const& _commentLocation
)
{
static regex const argsRegex = regex(
R"~~(^(-1|\d+):(-1|\d+):(-1|\d+)(?:\s+|$))~~" // index and location, e.g.: 1:234:-1
R"~~(("(?:[^"\\]|\\.)*"?)?)~~", // optional code snippet, e.g.: "string memory s = \"abc\";..."
regex_constants::ECMAScript | regex_constants::optimize
);
match_results<string_view::const_iterator> match;
if (!regex_search(_arguments.cbegin(), _arguments.cend(), match, argsRegex))
{
m_errorReporter.syntaxError(
8387_error,
_commentLocation,
"Invalid values in source location mapping. Could not parse location specification."
);
return nullopt;
}
solAssert(match.size() == 5, "");
string_view tail = _arguments.substr(static_cast<size_t>(match.position() + match.length()));
if (match[4].matched && (
!boost::algorithm::ends_with(match[4].str(), "\"") ||
boost::algorithm::ends_with(match[4].str(), "\\\"")
))
{
m_errorReporter.syntaxError(
1544_error,
_commentLocation,
"Invalid code snippet in source location mapping. Quote is not terminated."
);
return {{tail, SourceLocation{}}};
}
optional<int> const sourceIndex = toInt(match[1].str());
optional<int> const start = toInt(match[2].str());
optional<int> const end = toInt(match[3].str());
if (!sourceIndex.has_value() || !start.has_value() || !end.has_value())
m_errorReporter.syntaxError(
6367_error,
_commentLocation,
"Invalid value in source location mapping. "
"Expected non-negative integer values or -1 for source index and location."
);
else if (sourceIndex == -1)
return {{tail, SourceLocation{start.value(), end.value(), nullptr}}};
else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast<unsigned>(sourceIndex.value()))))
m_errorReporter.syntaxError(
2674_error,
_commentLocation,
"Invalid source mapping. Source index not defined via @use-src."
);
else
{
shared_ptr<string const> sourceName = m_sourceNames->at(static_cast<unsigned>(sourceIndex.value()));
solAssert(sourceName, "");
return {{tail, SourceLocation{start.value(), end.value(), move(sourceName)}}};
}
return {{tail, SourceLocation{}}};
} }
Block Parser::parseBlock() Block Parser::parseBlock()

View File

@ -35,6 +35,7 @@
#include <memory> #include <memory>
#include <variant> #include <variant>
#include <vector> #include <vector>
#include <string_view>
namespace solidity::yul namespace solidity::yul
{ {
@ -68,8 +69,8 @@ public:
} }
{} {}
/// Constructs a Yul parser that is using the source locations /// Constructs a Yul parser that is using the debug data
/// from the comments (via @src). /// from the comments (via @src and other tags).
explicit Parser( explicit Parser(
langutil::ErrorReporter& _errorReporter, langutil::ErrorReporter& _errorReporter,
Dialect const& _dialect, Dialect const& _dialect,
@ -105,7 +106,13 @@ protected:
langutil::Token advance() override; langutil::Token advance() override;
void fetchSourceLocationFromComment(); void fetchDebugDataFromComment();
std::optional<std::pair<std::string_view, langutil::SourceLocation>>
parseSrcComment(
std::string_view _arguments,
langutil::SourceLocation const& _commentLocation
);
/// Creates a DebugData object with the correct source location set. /// Creates a DebugData object with the correct source location set.
std::shared_ptr<DebugData const> createDebugData() const; std::shared_ptr<DebugData const> createDebugData() const;