2014-10-20 14:37:04 +00:00
|
|
|
/*
|
2019-02-13 15:56:46 +00:00
|
|
|
* This file is part of solidity.
|
|
|
|
*
|
|
|
|
* solidity is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* solidity is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with solidity. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*
|
|
|
|
* This file is derived from the file "scanner.h", which was part of the
|
|
|
|
* V8 project. The original copyright header follows:
|
|
|
|
*
|
|
|
|
* Copyright 2006-2012, the V8 project authors. All rights reserved.
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are
|
|
|
|
* met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer in the documentation and/or other materials provided
|
|
|
|
* with the distribution.
|
|
|
|
* * Neither the name of Google Inc. nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived
|
|
|
|
* from this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
2014-10-20 14:37:04 +00:00
|
|
|
*/
|
|
|
|
/**
|
|
|
|
* @author Christian <c@ethdev.com>
|
|
|
|
* @date 2014
|
|
|
|
* Solidity scanner.
|
|
|
|
*/
|
2014-10-06 15:13:52 +00:00
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2018-11-14 16:11:55 +00:00
|
|
|
#include <liblangutil/Token.h>
|
2018-11-14 14:18:55 +00:00
|
|
|
#include <liblangutil/CharStream.h>
|
2018-11-14 13:59:30 +00:00
|
|
|
#include <liblangutil/SourceLocation.h>
|
2019-10-28 10:39:30 +00:00
|
|
|
|
|
|
|
#include <optional>
|
2018-11-23 15:47:34 +00:00
|
|
|
#include <iosfwd>
|
2014-10-06 15:13:52 +00:00
|
|
|
|
2019-12-11 16:31:36 +00:00
|
|
|
namespace solidity::langutil
|
2014-10-16 12:08:54 +00:00
|
|
|
{
|
2014-10-06 15:13:52 +00:00
|
|
|
|
|
|
|
class AstRawString;
|
|
|
|
class AstValueFactory;
|
|
|
|
class ParserRecorder;
|
|
|
|
|
2020-07-10 15:05:52 +00:00
|
|
|
enum class ScannerKind
|
|
|
|
{
|
|
|
|
Solidity,
|
|
|
|
Yul
|
|
|
|
};
|
|
|
|
|
2018-11-23 15:47:34 +00:00
|
|
|
enum class ScannerError
|
|
|
|
{
|
|
|
|
NoError,
|
|
|
|
|
|
|
|
IllegalToken,
|
|
|
|
IllegalHexString,
|
|
|
|
IllegalHexDigit,
|
|
|
|
IllegalCommentTerminator,
|
|
|
|
IllegalEscapeSequence,
|
2020-07-15 14:06:43 +00:00
|
|
|
IllegalCharacterInString,
|
2018-11-23 15:47:34 +00:00
|
|
|
IllegalStringEndQuote,
|
|
|
|
IllegalNumberSeparator,
|
|
|
|
IllegalExponent,
|
|
|
|
IllegalNumberEnd,
|
|
|
|
|
2020-11-18 13:35:16 +00:00
|
|
|
DirectionalOverrideUnderflow,
|
|
|
|
DirectionalOverrideMismatch,
|
|
|
|
|
2018-11-23 15:47:34 +00:00
|
|
|
OctalNotAllowed,
|
|
|
|
};
|
|
|
|
|
|
|
|
std::string to_string(ScannerError _errorCode);
|
|
|
|
std::ostream& operator<<(std::ostream& os, ScannerError _errorCode);
|
|
|
|
|
2014-10-16 12:08:54 +00:00
|
|
|
class Scanner
|
|
|
|
{
|
2014-11-30 22:25:42 +00:00
|
|
|
friend class LiteralScope;
|
2014-10-06 15:13:52 +00:00
|
|
|
public:
|
2021-07-14 10:53:39 +00:00
|
|
|
explicit Scanner(CharStream& _source):
|
|
|
|
m_source(_source),
|
|
|
|
m_sourceName{std::make_shared<std::string>(_source.name())}
|
|
|
|
{
|
|
|
|
reset();
|
|
|
|
}
|
2018-11-28 23:56:25 +00:00
|
|
|
|
2014-12-03 16:45:12 +00:00
|
|
|
/// Resets scanner to the start of input.
|
|
|
|
void reset();
|
2014-10-09 10:28:37 +00:00
|
|
|
|
2020-07-10 15:05:52 +00:00
|
|
|
/// Changes the scanner mode.
|
|
|
|
void setScannerMode(ScannerKind _kind)
|
|
|
|
{
|
|
|
|
m_kind = _kind;
|
|
|
|
|
|
|
|
// Invalidate lookahead buffer.
|
|
|
|
rescan();
|
|
|
|
}
|
2019-04-24 11:16:43 +00:00
|
|
|
|
2021-08-03 13:50:38 +00:00
|
|
|
CharStream const& charStream() const noexcept { return m_source; }
|
|
|
|
|
2016-03-09 16:23:05 +00:00
|
|
|
/// @returns the next token and advances input
|
2018-10-22 14:48:21 +00:00
|
|
|
Token next();
|
2014-10-16 21:49:45 +00:00
|
|
|
|
2019-05-27 14:13:27 +00:00
|
|
|
/// Set scanner to a specific offset. This is used in error recovery.
|
|
|
|
void setPosition(size_t _offset);
|
|
|
|
|
2014-10-22 18:35:35 +00:00
|
|
|
///@{
|
|
|
|
///@name Information about the current token
|
2014-10-16 21:49:45 +00:00
|
|
|
|
2016-03-09 16:23:05 +00:00
|
|
|
/// @returns the current token
|
2018-10-22 14:48:21 +00:00
|
|
|
Token currentToken() const
|
2014-11-18 17:50:40 +00:00
|
|
|
{
|
2020-01-22 19:10:56 +00:00
|
|
|
return m_tokens[Current].token;
|
2014-11-18 17:50:40 +00:00
|
|
|
}
|
2017-08-17 00:14:15 +00:00
|
|
|
ElementaryTypeNameToken currentElementaryTypeNameToken() const
|
2016-03-30 18:09:38 +00:00
|
|
|
{
|
|
|
|
unsigned firstSize;
|
|
|
|
unsigned secondSize;
|
2020-01-22 19:10:56 +00:00
|
|
|
std::tie(firstSize, secondSize) = m_tokens[Current].extendedTokenInfo;
|
|
|
|
return ElementaryTypeNameToken(m_tokens[Current].token, firstSize, secondSize);
|
2016-03-30 18:09:38 +00:00
|
|
|
}
|
2015-01-05 15:37:43 +00:00
|
|
|
|
2020-01-22 19:10:56 +00:00
|
|
|
SourceLocation currentLocation() const { return m_tokens[Current].location; }
|
|
|
|
std::string const& currentLiteral() const { return m_tokens[Current].literal; }
|
|
|
|
std::tuple<unsigned, unsigned> const& currentTokenInfo() const { return m_tokens[Current].extendedTokenInfo; }
|
2018-11-23 15:47:34 +00:00
|
|
|
|
|
|
|
/// Retrieves the last error that occurred during lexical analysis.
|
|
|
|
/// @note If no error occurred, the value is undefined.
|
2020-01-22 19:10:56 +00:00
|
|
|
ScannerError currentError() const noexcept { return m_tokens[Current].error; }
|
2014-10-22 18:35:35 +00:00
|
|
|
///@}
|
|
|
|
|
2014-11-19 15:21:42 +00:00
|
|
|
///@{
|
|
|
|
///@name Information about the current comment token
|
2014-11-27 17:57:50 +00:00
|
|
|
|
2020-01-22 19:10:56 +00:00
|
|
|
SourceLocation currentCommentLocation() const { return m_skippedComments[Current].location; }
|
|
|
|
std::string const& currentCommentLiteral() const { return m_skippedComments[Current].literal; }
|
2014-11-27 17:57:50 +00:00
|
|
|
/// Called by the parser during FunctionDefinition parsing to clear the current comment
|
2020-01-22 19:10:56 +00:00
|
|
|
void clearCurrentCommentLiteral() { m_skippedComments[Current].literal.clear(); }
|
2014-11-27 17:57:50 +00:00
|
|
|
|
2014-11-19 15:21:42 +00:00
|
|
|
///@}
|
|
|
|
|
2014-10-22 18:35:35 +00:00
|
|
|
///@{
|
|
|
|
///@name Information about the next token
|
2014-10-09 10:28:37 +00:00
|
|
|
|
2016-03-09 16:23:05 +00:00
|
|
|
/// @returns the next token without advancing input.
|
2020-01-22 19:10:56 +00:00
|
|
|
Token peekNextToken() const { return m_tokens[Next].token; }
|
|
|
|
SourceLocation peekLocation() const { return m_tokens[Next].location; }
|
|
|
|
std::string const& peekLiteral() const { return m_tokens[Next].literal; }
|
|
|
|
|
|
|
|
Token peekNextNextToken() const { return m_tokens[NextNext].token; }
|
2014-10-22 18:35:35 +00:00
|
|
|
///@}
|
2014-10-09 10:28:37 +00:00
|
|
|
|
2014-10-06 15:13:52 +00:00
|
|
|
private:
|
2020-11-18 13:35:16 +00:00
|
|
|
|
2018-11-23 15:47:34 +00:00
|
|
|
inline Token setError(ScannerError _error) noexcept
|
|
|
|
{
|
2020-01-22 19:10:56 +00:00
|
|
|
m_tokens[NextNext].error = _error;
|
2018-11-23 15:47:34 +00:00
|
|
|
return Token::Illegal;
|
|
|
|
}
|
|
|
|
|
2014-11-20 21:08:16 +00:00
|
|
|
/// Used for the current and look-ahead token and comments
|
2014-10-16 12:08:54 +00:00
|
|
|
struct TokenDesc
|
|
|
|
{
|
2018-10-22 14:48:21 +00:00
|
|
|
Token token;
|
2015-02-23 16:14:59 +00:00
|
|
|
SourceLocation location;
|
2014-10-09 10:28:37 +00:00
|
|
|
std::string literal;
|
2018-11-23 15:47:34 +00:00
|
|
|
ScannerError error = ScannerError::NoError;
|
2016-02-12 21:01:27 +00:00
|
|
|
std::tuple<unsigned, unsigned> extendedTokenInfo;
|
2014-10-09 10:28:37 +00:00
|
|
|
};
|
|
|
|
|
2014-10-22 18:35:35 +00:00
|
|
|
///@{
|
|
|
|
///@name Literal buffer support
|
2020-01-22 19:10:56 +00:00
|
|
|
inline void addLiteralChar(char c) { m_tokens[NextNext].literal.push_back(c); }
|
|
|
|
inline void addCommentLiteralChar(char c) { m_skippedComments[NextNext].literal.push_back(c); }
|
2014-10-16 12:08:54 +00:00
|
|
|
inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); }
|
2016-08-01 13:10:46 +00:00
|
|
|
void addUnicodeAsUTF8(unsigned codepoint);
|
2014-10-22 18:35:35 +00:00
|
|
|
///@}
|
2014-10-09 10:28:37 +00:00
|
|
|
|
2021-07-14 10:53:39 +00:00
|
|
|
bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); }
|
|
|
|
void rollback(size_t _amount) { m_char = m_source.rollback(_amount); }
|
2019-04-24 11:16:43 +00:00
|
|
|
/// Rolls back to the start of the current token and re-runs the scanner.
|
|
|
|
void rescan();
|
2014-10-09 10:28:37 +00:00
|
|
|
|
2018-11-23 15:47:34 +00:00
|
|
|
inline Token selectErrorToken(ScannerError _err) { advance(); return setError(_err); }
|
2018-10-22 14:48:21 +00:00
|
|
|
inline Token selectToken(Token _tok) { advance(); return _tok; }
|
2014-10-16 21:49:45 +00:00
|
|
|
/// If the next character is _next, advance and return _then, otherwise return _else.
|
2018-10-22 14:48:21 +00:00
|
|
|
inline Token selectToken(char _next, Token _then, Token _else);
|
2014-10-09 10:28:37 +00:00
|
|
|
|
2014-11-05 13:20:56 +00:00
|
|
|
bool scanHexByte(char& o_scannedByte);
|
2019-10-28 10:39:30 +00:00
|
|
|
std::optional<unsigned> scanUnicode();
|
2014-10-09 10:28:37 +00:00
|
|
|
|
2014-11-30 21:43:40 +00:00
|
|
|
/// Scans a single Solidity token.
|
|
|
|
void scanToken();
|
2014-10-09 10:28:37 +00:00
|
|
|
|
2014-10-22 18:35:35 +00:00
|
|
|
/// Skips all whitespace and @returns true if something was skipped.
|
2014-10-09 10:28:37 +00:00
|
|
|
bool skipWhitespace();
|
2018-09-06 09:05:35 +00:00
|
|
|
/// Skips all whitespace that are neither '\r' nor '\n'.
|
2020-04-28 08:34:07 +00:00
|
|
|
bool skipWhitespaceExceptUnicodeLinebreak();
|
2018-10-22 14:48:21 +00:00
|
|
|
Token skipSingleLineComment();
|
|
|
|
Token skipMultiLineComment();
|
2014-10-09 10:28:37 +00:00
|
|
|
|
2019-08-05 09:53:24 +00:00
|
|
|
/// Tests if current source position is CR, LF or CRLF.
|
|
|
|
bool atEndOfLine() const;
|
|
|
|
|
|
|
|
/// Tries to consume CR, LF or CRLF line terminators and returns success or failure.
|
|
|
|
bool tryScanEndOfLine();
|
|
|
|
|
2014-10-09 10:28:37 +00:00
|
|
|
void scanDecimalDigits();
|
2018-10-22 14:48:21 +00:00
|
|
|
Token scanNumber(char _charSeen = 0);
|
|
|
|
std::tuple<Token, unsigned, unsigned> scanIdentifierOrKeyword();
|
2014-10-09 10:28:37 +00:00
|
|
|
|
2020-07-02 16:39:04 +00:00
|
|
|
Token scanString(bool const _isUnicode);
|
2018-10-22 14:48:21 +00:00
|
|
|
Token scanHexString();
|
2020-01-25 16:53:48 +00:00
|
|
|
/// Scans a single line comment and returns its corrected end position.
|
2020-06-02 13:45:03 +00:00
|
|
|
size_t scanSingleLineDocComment();
|
2018-10-22 14:48:21 +00:00
|
|
|
Token scanMultiLineDocComment();
|
2014-12-18 16:30:10 +00:00
|
|
|
/// Scans a slash '/' and depending on the characters returns the appropriate token
|
2018-10-22 14:48:21 +00:00
|
|
|
Token scanSlash();
|
2014-10-09 10:28:37 +00:00
|
|
|
|
2014-10-16 21:49:45 +00:00
|
|
|
/// Scans an escape-sequence which is part of a string and adds the
|
|
|
|
/// decoded character to the current literal. Returns true if a pattern
|
|
|
|
/// is scanned.
|
2014-10-09 10:28:37 +00:00
|
|
|
bool scanEscape();
|
|
|
|
|
2018-09-06 09:05:35 +00:00
|
|
|
/// @returns true iff we are currently positioned at a unicode line break.
|
|
|
|
bool isUnicodeLinebreak();
|
|
|
|
|
2014-10-16 21:49:45 +00:00
|
|
|
/// Return the current source position.
|
2021-07-14 10:53:39 +00:00
|
|
|
size_t sourcePos() const { return m_source.position(); }
|
|
|
|
bool isSourcePastEndOfInput() const { return m_source.isPastEndOfInput(); }
|
2014-10-09 10:28:37 +00:00
|
|
|
|
2020-01-22 19:10:56 +00:00
|
|
|
enum TokenIndex { Current, Next, NextNext };
|
2014-11-19 15:21:42 +00:00
|
|
|
|
2020-01-22 19:10:56 +00:00
|
|
|
TokenDesc m_skippedComments[3] = {}; // desc for the current, next and nextnext skipped comment
|
|
|
|
TokenDesc m_tokens[3] = {}; // desc for the current, next and nextnext token
|
2014-10-09 10:28:37 +00:00
|
|
|
|
2021-07-14 10:53:39 +00:00
|
|
|
CharStream& m_source;
|
2021-06-29 12:38:59 +00:00
|
|
|
std::shared_ptr<std::string const> m_sourceName;
|
2014-10-09 10:28:37 +00:00
|
|
|
|
2020-07-10 15:05:52 +00:00
|
|
|
ScannerKind m_kind = ScannerKind::Solidity;
|
|
|
|
|
2014-10-16 21:49:45 +00:00
|
|
|
/// one character look-ahead, equals 0 at end of input
|
2014-10-09 10:28:37 +00:00
|
|
|
char m_char;
|
2014-10-06 15:13:52 +00:00
|
|
|
};
|
|
|
|
|
2014-10-16 12:08:54 +00:00
|
|
|
}
|