Refactor source comment parsing.

This commit is contained in:
chriseth 2021-09-15 15:23:22 +02:00
parent 0fa24c786b
commit d708612e27
2 changed files with 91 additions and 76 deletions

View File

@ -104,7 +104,7 @@ unique_ptr<Block> Parser::parseInline(std::shared_ptr<Scanner> const& _scanner)
{ {
m_scanner = _scanner; m_scanner = _scanner;
if (m_sourceNames) if (m_sourceNames)
fetchSourceLocationFromComment(); fetchDebugDataFromComment();
return make_unique<Block>(parseBlock()); return make_unique<Block>(parseBlock());
} }
catch (FatalError const&) catch (FatalError const&)
@ -119,99 +119,107 @@ langutil::Token Parser::advance()
{ {
auto const token = ParserBase::advance(); auto const token = ParserBase::advance();
if (m_useSourceLocationFrom == UseSourceLocationFrom::Comments) if (m_useSourceLocationFrom == UseSourceLocationFrom::Comments)
fetchSourceLocationFromComment(); fetchDebugDataFromComment();
return token; return token;
} }
void Parser::fetchSourceLocationFromComment() void Parser::fetchDebugDataFromComment()
{ {
solAssert(m_sourceNames.has_value(), ""); solAssert(m_sourceNames.has_value(), "");
if (m_scanner->currentCommentLiteral().empty())
return;
static regex const tagRegex = regex( static regex const tagRegex = regex(
R"~~((?:^|\s+)(@[a-zA-Z0-9\-_]+)(?:\s+|$))~~", // tag, e.g: @src R"~~((?:^|\s+)(@[a-zA-Z0-9\-_]+)(?:\s+|$))~~", // tag, e.g: @src
regex_constants::ECMAScript | regex_constants::optimize regex_constants::ECMAScript | regex_constants::optimize
); );
static regex const srcTagArgsRegex = regex(
string_view commentLiteral = m_scanner->currentCommentLiteral();
match_results<string_view::const_iterator> match;
langutil::SourceLocation sourceLocation = m_debugDataOverride->location;
while (regex_search(commentLiteral.cbegin(), commentLiteral.cend(), match, tagRegex))
{
solAssert(match.size() == 2, "");
commentLiteral = commentLiteral.substr(static_cast<size_t>(match.position() + match.length()));
if (match[1] == "@src")
{
if (auto parseResult = parseSrcComment(commentLiteral, m_scanner->currentCommentLocation()))
tie(commentLiteral, sourceLocation) = *parseResult;
else
break;
}
else
// Ignore unrecognized tags.
continue;
}
m_debugDataOverride = DebugData::create(sourceLocation);
}
optional<pair<string_view, SourceLocation>> Parser::parseSrcComment(
string_view const _arguments,
langutil::SourceLocation const& _commentLocation
)
{
static regex const argsRegex = regex(
R"~~(^(-1|\d+):(-1|\d+):(-1|\d+)(?:\s+|$))~~" // index and location, e.g.: 1:234:-1 R"~~(^(-1|\d+):(-1|\d+):(-1|\d+)(?:\s+|$))~~" // index and location, e.g.: 1:234:-1
R"~~(("(?:[^"\\]|\\.)*"?)?)~~", // optional code snippet, e.g.: "string memory s = \"abc\";..." R"~~(("(?:[^"\\]|\\.)*"?)?)~~", // optional code snippet, e.g.: "string memory s = \"abc\";..."
regex_constants::ECMAScript | regex_constants::optimize regex_constants::ECMAScript | regex_constants::optimize
); );
match_results<string_view::const_iterator> match;
string const commentLiteral = m_scanner->currentCommentLiteral(); if (!regex_search(_arguments.cbegin(), _arguments.cend(), match, argsRegex))
SourceLocation const commentLocation = m_scanner->currentCommentLocation();
smatch tagMatch;
string::const_iterator position = commentLiteral.begin();
while (regex_search(position, commentLiteral.end(), tagMatch, tagRegex))
{
solAssert(tagMatch.size() == 2, "");
position += tagMatch.position() + tagMatch.length();
if (tagMatch[1] == "@src")
{
smatch srcTagArgsMatch;
if (!regex_search(position, commentLiteral.end(), srcTagArgsMatch, srcTagArgsRegex))
{ {
m_errorReporter.syntaxError( m_errorReporter.syntaxError(
8387_error, 8387_error,
commentLocation, _commentLocation,
"Invalid values in source location mapping. Could not parse location specification." "Invalid values in source location mapping. Could not parse location specification."
); );
return nullopt;
// If the arguments to @src are malformed, we don't know where they end so we can't continue.
return;
} }
solAssert(srcTagArgsMatch.size() == 5, ""); solAssert(match.size() == 5, "");
position += srcTagArgsMatch.position() + srcTagArgsMatch.length(); string_view tail = _arguments.substr(static_cast<size_t>(match.position() + match.length()));
if (srcTagArgsMatch[4].matched && ( if (match[4].matched && (
!boost::algorithm::ends_with(srcTagArgsMatch[4].str(), "\"") || !boost::algorithm::ends_with(match[4].str(), "\"") ||
boost::algorithm::ends_with(srcTagArgsMatch[4].str(), "\\\"") boost::algorithm::ends_with(match[4].str(), "\\\"")
)) ))
{ {
m_errorReporter.syntaxError( m_errorReporter.syntaxError(
1544_error, 1544_error,
commentLocation, _commentLocation,
"Invalid code snippet in source location mapping. Quote is not terminated." "Invalid code snippet in source location mapping. Quote is not terminated."
); );
return; return {{tail, SourceLocation{}}};
} }
optional<int> const sourceIndex = toInt(srcTagArgsMatch[1].str()); optional<int> const sourceIndex = toInt(match[1].str());
optional<int> const start = toInt(srcTagArgsMatch[2].str()); optional<int> const start = toInt(match[2].str());
optional<int> const end = toInt(srcTagArgsMatch[3].str()); optional<int> const end = toInt(match[3].str());
m_debugDataOverride = DebugData::create();
if (!sourceIndex.has_value() || !start.has_value() || !end.has_value()) if (!sourceIndex.has_value() || !start.has_value() || !end.has_value())
m_errorReporter.syntaxError( m_errorReporter.syntaxError(
6367_error, 6367_error,
commentLocation, _commentLocation,
"Invalid value in source location mapping. " "Invalid value in source location mapping. "
"Expected non-negative integer values or -1 for source index and location." "Expected non-negative integer values or -1 for source index and location."
); );
else if (sourceIndex == -1) else if (sourceIndex == -1)
m_debugDataOverride = DebugData::create(SourceLocation{start.value(), end.value(), nullptr}); return {{tail, SourceLocation{start.value(), end.value(), nullptr}}};
else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast<unsigned>(sourceIndex.value())))) else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast<unsigned>(sourceIndex.value()))))
m_errorReporter.syntaxError( m_errorReporter.syntaxError(
2674_error, 2674_error,
commentLocation, _commentLocation,
"Invalid source mapping. Source index not defined via @use-src." "Invalid source mapping. Source index not defined via @use-src."
); );
else else
{ {
shared_ptr<string const> sourceName = m_sourceNames->at(static_cast<unsigned>(sourceIndex.value())); shared_ptr<string const> sourceName = m_sourceNames->at(static_cast<unsigned>(sourceIndex.value()));
solAssert(sourceName, ""); solAssert(sourceName, "");
m_debugDataOverride = DebugData::create(SourceLocation{start.value(), end.value(), move(sourceName)}); return {{tail, SourceLocation{start.value(), end.value(), move(sourceName)}}};
}
}
else
// Ignore unrecognized tags.
continue;
} }
return {{tail, SourceLocation{}}};
} }
Block Parser::parseBlock() Block Parser::parseBlock()

View File

@ -35,6 +35,7 @@
#include <memory> #include <memory>
#include <variant> #include <variant>
#include <vector> #include <vector>
#include <string_view>
namespace solidity::yul namespace solidity::yul
{ {
@ -68,8 +69,8 @@ public:
} }
{} {}
/// Constructs a Yul parser that is using the source locations /// Constructs a Yul parser that is using the debug data
/// from the comments (via @src). /// from the comments (via @src and other tags).
explicit Parser( explicit Parser(
langutil::ErrorReporter& _errorReporter, langutil::ErrorReporter& _errorReporter,
Dialect const& _dialect, Dialect const& _dialect,
@ -105,7 +106,13 @@ protected:
langutil::Token advance() override; langutil::Token advance() override;
void fetchSourceLocationFromComment(); void fetchDebugDataFromComment();
std::optional<std::pair<std::string_view, langutil::SourceLocation>>
parseSrcComment(
std::string_view _arguments,
langutil::SourceLocation const& _commentLocation
);
/// Creates a DebugData object with the correct source location set. /// Creates a DebugData object with the correct source location set.
std::shared_ptr<DebugData const> createDebugData() const; std::shared_ptr<DebugData const> createDebugData() const;