Solidity scanner and some unit tests.

The scanner is a modified version of the v8 javascript scanner.
This commit is contained in:
Christian 2014-10-06 17:13:52 +02:00
commit ef59373871
5 changed files with 1370 additions and 0 deletions

49
CMakeLists.txt Normal file
View File

@ -0,0 +1,49 @@
cmake_policy(SET CMP0015 NEW)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB")
aux_source_directory(. SRC_LIST)
set(EXECUTABLE solidity)
if(ETH_STATIC)
add_library(${EXECUTABLE} STATIC ${SRC_LIST})
else()
add_library(${EXECUTABLE} SHARED ${SRC_LIST})
endif()
file(GLOB HEADERS "*.h")
include_directories(..)
target_link_libraries(${EXECUTABLE} evmface)
target_link_libraries(${EXECUTABLE} devcore)
if("${TARGET_PLATFORM}" STREQUAL "w64")
target_link_libraries(${EXECUTABLE} boost_system-mt-s)
target_link_libraries(${EXECUTABLE} boost_thread_win32-mt-s)
target_link_libraries(${EXECUTABLE} iphlpapi)
target_link_libraries(${EXECUTABLE} ws2_32)
target_link_libraries(${EXECUTABLE} mswsock)
target_link_libraries(${EXECUTABLE} shlwapi)
elseif (APPLE)
# Latest mavericks boost libraries only come with -mt
target_link_libraries(${EXECUTABLE} boost_system-mt)
target_link_libraries(${EXECUTABLE} boost_thread-mt)
find_package(Threads REQUIRED)
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT})
elseif (UNIX)
target_link_libraries(${EXECUTABLE} ${Boost_SYSTEM_LIBRARY})
target_link_libraries(${EXECUTABLE} ${Boost_THREAD_LIBRARY})
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT})
else ()
target_link_libraries(${EXECUTABLE} boost_system)
target_link_libraries(${EXECUTABLE} boost_thread)
find_package(Threads REQUIRED)
target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT})
endif ()
install( TARGETS ${EXECUTABLE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib )
install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} )

653
Scanner.cpp Normal file
View File

@ -0,0 +1,653 @@
// Copyright 2006-2012, the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Modifications as part of cpp-ethereum under the following license:
//
// cpp-ethereum is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// cpp-ethereum is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
#include <libsolidity/Scanner.h>
namespace dev {
namespace solidity {
namespace {
bool IsDecimalDigit(char c) {
return '0' <= c && c <= '9';
}
bool IsHexDigit(char c) {
return IsDecimalDigit(c)
|| ('a' <= c && c <= 'f')
|| ('A' <= c && c <= 'F');
}
bool IsLineTerminator(char c) { return c == '\n'; }
bool IsWhiteSpace(char c) {
return c == ' ' || c == '\n' || c == '\t';
}
bool IsIdentifierStart(char c) {
return c == '_' || c == '$' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
}
bool IsIdentifierPart(char c) {
return IsIdentifierStart(c) || IsDecimalDigit(c);
}
int HexValue(char c) {
if (c >= '0' && c <= '9') return c - '0';
else if (c >= 'a' && c <= 'f') return c - 'a' + 10;
else if (c >= 'A' && c <= 'F') return c - 'A' + 10;
else return -1;
}
}
Scanner::Scanner(const CharStream& _source)
{
reset(_source);
}
void Scanner::reset(const CharStream& _source)
{
m_source = _source;
// Initialize current_ to not refer to a literal.
m_current_token.token = Token::ILLEGAL;
m_current_token.literal.clear();
m_hasLineTerminatorBeforeNext = true;
m_hasMultilineCommentBeforeNext = false;
m_char = m_source.get();
skipWhitespace();
scanToken();
}
bool Scanner::scanHexNumber(char& scanned_number, int expected_length)
{
BOOST_ASSERT(expected_length <= 4); // prevent overflow
char x = 0;
for (int i = 0; i < expected_length; i++) {
int d = HexValue(m_char);
if (d < 0) {
rollback(i);
return false;
}
x = x * 16 + d;
advance();
}
scanned_number = x;
return true;
}
// Ensure that tokens can be stored in a byte.
BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
Token::Value Scanner::next()
{
m_current_token = m_next_token;
m_hasLineTerminatorBeforeNext = false;
m_hasMultilineCommentBeforeNext = false;
scanToken();
return m_current_token.token;
}
bool Scanner::skipWhitespace()
{
const int start_position = getSourcePos();
while (true) {
if (IsLineTerminator(m_char)) {
m_hasLineTerminatorBeforeNext = true;
} else if (!IsWhiteSpace(m_char)) {
break;
}
advance();
}
// Return whether or not we skipped any characters.
return getSourcePos() != start_position;
}
Token::Value Scanner::skipSingleLineComment()
{
// The line terminator at the end of the line is not considered
// to be part of the single-line comment; it is recognized
// separately by the lexical grammar and becomes part of the
// stream of input elements for the syntactic grammar
while (advance() && !IsLineTerminator(m_char)) { };
return Token::WHITESPACE;
}
Token::Value Scanner::skipMultiLineComment()
{
BOOST_ASSERT(m_char == '*');
advance();
while (!isSourcePastEndOfInput()) {
char ch = m_char;
advance();
if (IsLineTerminator(ch)) {
// Following ECMA-262, section 7.4, a comment containing
// a newline will make the comment count as a line-terminator.
m_hasMultilineCommentBeforeNext = true;
}
// If we have reached the end of the multi-line comment, we
// consume the '/' and insert a whitespace. This way all
// multi-line comments are treated as whitespace.
if (ch == '*' && m_char == '/') {
m_char = ' ';
return Token::WHITESPACE;
}
}
// Unterminated multi-line comment.
return Token::ILLEGAL;
}
void Scanner::scanToken()
{
m_next_token.literal.clear();
Token::Value token;
do {
// Remember the position of the next token
m_next_token.location.beg_pos = getSourcePos();
switch (m_char) {
case '\n':
m_hasLineTerminatorBeforeNext = true; // fall-through
case ' ':
case '\t':
token = selectToken(Token::WHITESPACE);
break;
case '"': case '\'':
token = scanString();
break;
case '<':
// < <= << <<=
advance();
if (m_char == '=') {
token = selectToken(Token::LTE);
} else if (m_char == '<') {
token = selectToken('=', Token::ASSIGN_SHL, Token::SHL);
} else {
token = Token::LT;
}
break;
case '>':
// > >= >> >>= >>> >>>=
advance();
if (m_char == '=') {
token = selectToken(Token::GTE);
} else if (m_char == '>') {
// >> >>= >>> >>>=
advance();
if (m_char == '=') {
token = selectToken(Token::ASSIGN_SAR);
} else if (m_char == '>') {
token = selectToken('=', Token::ASSIGN_SHR, Token::SHR);
} else {
token = Token::SAR;
}
} else {
token = Token::GT;
}
break;
case '=':
// = == =>
advance();
if (m_char == '=') {
token = selectToken(Token::EQ);
} else if (m_char == '>') {
token = selectToken(Token::ARROW);
} else {
token = Token::ASSIGN;
}
break;
case '!':
// ! != !==
advance();
if (m_char == '=') {
token = selectToken(Token::NE);
} else {
token = Token::NOT;
}
break;
case '+':
// + ++ +=
advance();
if (m_char == '+') {
token = selectToken(Token::INC);
} else if (m_char == '=') {
token = selectToken(Token::ASSIGN_ADD);
} else {
token = Token::ADD;
}
break;
case '-':
// - -- -=
advance();
if (m_char == '-') {
advance();
token = Token::DEC;
} else if (m_char == '=') {
token = selectToken(Token::ASSIGN_SUB);
} else {
token = Token::SUB;
}
break;
case '*':
// * *=
token = selectToken('=', Token::ASSIGN_MUL, Token::MUL);
break;
case '%':
// % %=
token = selectToken('=', Token::ASSIGN_MOD, Token::MOD);
break;
case '/':
// / // /* /=
advance();
if (m_char == '/') {
token = skipSingleLineComment();
} else if (m_char == '*') {
token = skipMultiLineComment();
} else if (m_char == '=') {
token = selectToken(Token::ASSIGN_DIV);
} else {
token = Token::DIV;
}
break;
case '&':
// & && &=
advance();
if (m_char == '&') {
token = selectToken(Token::AND);
} else if (m_char == '=') {
token = selectToken(Token::ASSIGN_BIT_AND);
} else {
token = Token::BIT_AND;
}
break;
case '|':
// | || |=
advance();
if (m_char == '|') {
token = selectToken(Token::OR);
} else if (m_char == '=') {
token = selectToken(Token::ASSIGN_BIT_OR);
} else {
token = Token::BIT_OR;
}
break;
case '^':
// ^ ^=
token = selectToken('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
break;
case '.':
// . Number
advance();
if (IsDecimalDigit(m_char)) {
token = scanNumber(true);
} else {
token = Token::PERIOD;
}
break;
case ':':
token = selectToken(Token::COLON);
break;
case ';':
token = selectToken(Token::SEMICOLON);
break;
case ',':
token = selectToken(Token::COMMA);
break;
case '(':
token = selectToken(Token::LPAREN);
break;
case ')':
token = selectToken(Token::RPAREN);
break;
case '[':
token = selectToken(Token::LBRACK);
break;
case ']':
token = selectToken(Token::RBRACK);
break;
case '{':
token = selectToken(Token::LBRACE);
break;
case '}':
token = selectToken(Token::RBRACE);
break;
case '?':
token = selectToken(Token::CONDITIONAL);
break;
case '~':
token = selectToken(Token::BIT_NOT);
break;
default:
if (IsIdentifierStart(m_char)) {
token = scanIdentifierOrKeyword();
} else if (IsDecimalDigit(m_char)) {
token = scanNumber(false);
} else if (skipWhitespace()) {
token = Token::WHITESPACE;
} else if (isSourcePastEndOfInput()) {
token = Token::EOS;
} else {
token = selectToken(Token::ILLEGAL);
}
break;
}
// Continue scanning for tokens as long as we're just skipping
// whitespace.
} while (token == Token::WHITESPACE);
m_next_token.location.end_pos = getSourcePos();
m_next_token.token = token;
}
bool Scanner::scanEscape()
{
char c = m_char;
advance();
// Skip escaped newlines.
if (IsLineTerminator(c))
return true;
switch (c) {
case '\'': // fall through
case '"' : // fall through
case '\\': break;
case 'b' : c = '\b'; break;
case 'f' : c = '\f'; break;
case 'n' : c = '\n'; break;
case 'r' : c = '\r'; break;
case 't' : c = '\t'; break;
case 'u' : {
if (!scanHexNumber(c, 4)) return false;
break;
}
case 'v' : c = '\v'; break;
case 'x' : {
if (!scanHexNumber(c, 2)) return false;
break;
}
}
// According to ECMA-262, section 7.8.4, characters not covered by the
// above cases should be illegal, but they are commonly handled as
// non-escaped characters by JS VMs.
addLiteralChar(c);
return true;
}
Token::Value Scanner::scanString()
{
const char quote = m_char;
advance(); // consume quote
LiteralScope literal(this);
while (m_char != quote && !isSourcePastEndOfInput() && !IsLineTerminator(m_char)) {
char c = m_char;
advance();
if (c == '\\') {
if (isSourcePastEndOfInput() || !scanEscape()) return Token::ILLEGAL;
} else {
addLiteralChar(c);
}
}
if (m_char != quote) return Token::ILLEGAL;
literal.Complete();
advance(); // consume quote
return Token::STRING;
}
void Scanner::scanDecimalDigits()
{
while (IsDecimalDigit(m_char))
addLiteralCharAndAdvance();
}
Token::Value Scanner::scanNumber(bool _periodSeen)
{
BOOST_ASSERT(IsDecimalDigit(m_char)); // the first digit of the number or the fraction
enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;
LiteralScope literal(this);
if (_periodSeen) {
// we have already seen a decimal point of the float
addLiteralChar('.');
scanDecimalDigits(); // we know we have at least one digit
} else {
// if the first character is '0' we must check for octals and hex
if (m_char == '0') {
addLiteralCharAndAdvance();
// either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or
// an octal number.
if (m_char == 'x' || m_char == 'X') {
// hex number
kind = HEX;
addLiteralCharAndAdvance();
if (!IsHexDigit(m_char)) {
// we must have at least one hex digit after 'x'/'X'
return Token::ILLEGAL;
}
while (IsHexDigit(m_char)) {
addLiteralCharAndAdvance();
}
}
}
// Parse decimal digits and allow trailing fractional part.
if (kind == DECIMAL) {
scanDecimalDigits(); // optional
if (m_char == '.') {
addLiteralCharAndAdvance();
scanDecimalDigits(); // optional
}
}
}
// scan exponent, if any
if (m_char == 'e' || m_char == 'E') {
BOOST_ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
if (kind != DECIMAL) return Token::ILLEGAL;
// scan exponent
addLiteralCharAndAdvance();
if (m_char == '+' || m_char == '-')
addLiteralCharAndAdvance();
if (!IsDecimalDigit(m_char)) {
// we must have at least one decimal digit after 'e'/'E'
return Token::ILLEGAL;
}
scanDecimalDigits();
}
// The source character immediately following a numeric literal must
// not be an identifier start or a decimal digit; see ECMA-262
// section 7.8.3, page 17 (note that we read only one decimal digit
// if the value is 0).
if (IsDecimalDigit(m_char) || IsIdentifierStart(m_char))
return Token::ILLEGAL;
literal.Complete();
return Token::NUMBER;
}
// ----------------------------------------------------------------------------
// Keyword Matcher
#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
KEYWORD_GROUP('b') \
KEYWORD("break", Token::BREAK) \
KEYWORD_GROUP('c') \
KEYWORD("case", Token::CASE) \
KEYWORD("catch", Token::CATCH) \
KEYWORD("const", Token::CONST) \
KEYWORD("continue", Token::CONTINUE) \
KEYWORD_GROUP('d') \
KEYWORD("debugger", Token::DEBUGGER) \
KEYWORD("default", Token::DEFAULT) \
KEYWORD("delete", Token::DELETE) \
KEYWORD("do", Token::DO) \
KEYWORD_GROUP('e') \
KEYWORD("else", Token::ELSE) \
KEYWORD("enum", Token::FUTURE_RESERVED_WORD) \
KEYWORD_GROUP('f') \
KEYWORD("false", Token::FALSE_LITERAL) \
KEYWORD("finally", Token::FINALLY) \
KEYWORD("for", Token::FOR) \
KEYWORD("function", Token::FUNCTION) \
KEYWORD_GROUP('i') \
KEYWORD("if", Token::IF) \
KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("in", Token::IN) \
KEYWORD("instanceof", Token::INSTANCEOF) \
KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD_GROUP('l') \
KEYWORD_GROUP('n') \
KEYWORD("new", Token::NEW) \
KEYWORD("null", Token::NULL_LITERAL) \
KEYWORD_GROUP('p') \
KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD_GROUP('r') \
KEYWORD("return", Token::RETURN) \
KEYWORD_GROUP('s') \
KEYWORD("switch", Token::SWITCH) \
KEYWORD_GROUP('t') \
KEYWORD("this", Token::THIS) \
KEYWORD("throw", Token::THROW) \
KEYWORD("true", Token::TRUE_LITERAL) \
KEYWORD("try", Token::TRY) \
KEYWORD("typeof", Token::TYPEOF) \
KEYWORD_GROUP('v') \
KEYWORD("var", Token::VAR) \
KEYWORD("void", Token::VOID) \
KEYWORD_GROUP('w') \
KEYWORD("while", Token::WHILE) \
KEYWORD("with", Token::WITH)
static Token::Value KeywordOrIdentifierToken(const std::string& input)
{
BOOST_ASSERT(!input.empty());
const int kMinLength = 2;
const int kMaxLength = 10;
if (input.size() < kMinLength || input.size() > kMaxLength) {
return Token::IDENTIFIER;
}
switch (input[0]) {
default:
#define KEYWORD_GROUP_CASE(ch) \
break; \
case ch:
#define KEYWORD(keyword, token) \
{ \
/* 'keyword' is a char array, so sizeof(keyword) is */ \
/* strlen(keyword) plus 1 for the NUL char. */ \
const int keyword_length = sizeof(keyword) - 1; \
BOOST_STATIC_ASSERT(keyword_length >= kMinLength); \
BOOST_STATIC_ASSERT(keyword_length <= kMaxLength); \
if (input == keyword) { \
return token; \
} \
}
KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
}
return Token::IDENTIFIER;
}
Token::Value Scanner::scanIdentifierOrKeyword()
{
BOOST_ASSERT(IsIdentifierStart(m_char));
LiteralScope literal(this);
addLiteralCharAndAdvance();
// Scan the rest of the identifier characters.
while (IsIdentifierPart(m_char))
addLiteralCharAndAdvance();
literal.Complete();
return KeywordOrIdentifierToken(m_next_token.literal);
}
} }

252
Scanner.h Normal file
View File

@ -0,0 +1,252 @@
// Copyright 2006-2012, the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Modifications as part of cpp-ethereum under the following license:
//
// cpp-ethereum is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// cpp-ethereum is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
#pragma once
#include <boost/assert.hpp>
#include <libdevcore/Common.h>
#include <libdevcore/Log.h>
#include <libdevcore/CommonData.h>
#include <libsolidity/Token.h>
namespace dev {
namespace solidity {
class AstRawString;
class AstValueFactory;
class ParserRecorder;
class CharStream {
public:
CharStream()
: m_pos(0)
{}
explicit CharStream(const std::string& _source)
: m_source(_source), m_pos(0)
{}
int getPos() const { return m_pos; }
bool isPastEndOfInput() const { return m_pos >= m_source.size(); }
char get() const { return m_source[m_pos]; }
char advanceAndGet() {
if (isPastEndOfInput()) return 0;
++m_pos;
if (isPastEndOfInput()) return 0;
return get();
}
char rollback(size_t _amount) {
BOOST_ASSERT(m_pos >= _amount);
m_pos -= _amount;
return get();
}
private:
std::string m_source;
size_t m_pos;
};
// ----------------------------------------------------------------------------
// JavaScript Scanner.
class Scanner {
public:
// Scoped helper for literal recording. Automatically drops the literal
// if aborting the scanning before it's complete.
class LiteralScope {
public:
explicit LiteralScope(Scanner* self)
: scanner_(self), complete_(false) {
scanner_->startNewLiteral();
}
~LiteralScope() {
if (!complete_) scanner_->dropLiteral();
}
void Complete() {
complete_ = true;
}
private:
Scanner* scanner_;
bool complete_;
};
// Representation of an interval of source positions.
struct Location {
Location(int b, int e) : beg_pos(b), end_pos(e) { }
Location() : beg_pos(0), end_pos(0) { }
bool IsValid() const {
return beg_pos >= 0 && end_pos >= beg_pos;
}
static Location invalid() { return Location(-1, -1); }
int beg_pos;
int end_pos;
};
explicit Scanner(const CharStream& _source);
// Resets the scanner as if newly constructed with _input as input.
void reset(const CharStream& _source);
// Returns the next token and advances input.
Token::Value next();
// Returns the current token again.
Token::Value getCurrentToken() { return m_current_token.token; }
// Returns the location information for the current token
// (the token last returned by Next()).
Location getCurrentLocation() const { return m_current_token.location; }
const std::string& getCurrentLiteral() const { return m_current_token.literal; }
// Similar functions for the upcoming token.
// One token look-ahead (past the token returned by Next()).
Token::Value peek() const { return m_next_token.token; }
Location peekLocation() const { return m_next_token.location; }
const std::string& peekLiteral() const { return m_next_token.literal; }
// Returns true if there was a line terminator before the peek'ed token,
// possibly inside a multi-line comment.
bool hasAnyLineTerminatorBeforeNext() const {
return m_hasLineTerminatorBeforeNext ||
m_hasMultilineCommentBeforeNext;
}
private:
// Used for the current and look-ahead token.
struct TokenDesc {
Token::Value token;
Location location;
std::string literal;
};
static const int kCharacterLookaheadBufferSize = 1;
// Literal buffer support
inline void startNewLiteral() {
m_next_token.literal.clear();
}
inline void addLiteralChar(char c) {
m_next_token.literal.push_back(c);
}
inline void dropLiteral() {
m_next_token.literal.clear();
}
inline void addLiteralCharAndAdvance() {
addLiteralChar(m_char);
advance();
}
// Low-level scanning support.
bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); }
void rollback(int amount) {
m_char = m_source.rollback(amount);
}
inline Token::Value selectToken(Token::Value tok) {
advance();
return tok;
}
inline Token::Value selectToken(char next, Token::Value then, Token::Value else_) {
advance();
if (m_char == next) {
advance();
return then;
} else {
return else_;
}
}
bool scanHexNumber(char& scanned_number, int expected_length);
// Scans a single JavaScript token.
void scanToken();
bool skipWhitespace();
Token::Value skipSingleLineComment();
Token::Value skipMultiLineComment();
void scanDecimalDigits();
Token::Value scanNumber(bool _periodSeen);
Token::Value scanIdentifierOrKeyword();
Token::Value scanString();
// Scans an escape-sequence which is part of a string and adds the
// decoded character to the current literal. Returns true if a pattern
// is scanned.
bool scanEscape();
// Return the current source position.
int getSourcePos() {
return m_source.getPos();
}
bool isSourcePastEndOfInput() {
return m_source.isPastEndOfInput();
}
TokenDesc m_current_token; // desc for current token (as returned by Next())
TokenDesc m_next_token; // desc for next token (one token look-ahead)
CharStream m_source;
// one character look-ahead, equals 0 at end of input
char m_char;
// Whether there is a line terminator whitespace character after
// the current token, and before the next. Does not count newlines
// inside multiline comments.
bool m_hasLineTerminatorBeforeNext;
// Whether there is a multi-line comment that contains a
// line-terminator after the current token, and before the next.
bool m_hasMultilineCommentBeforeNext;
};
} }

77
Token.cpp Normal file
View File

@ -0,0 +1,77 @@
// Copyright 2006-2012, the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Modifications as part of cpp-ethereum under the following license:
//
// cpp-ethereum is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// cpp-ethereum is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
#include <libsolidity/Token.h>
namespace dev {
namespace solidity {
#define T(name, string, precedence) #name,
const char* const Token::m_name[NUM_TOKENS] = {
TOKEN_LIST(T, T)
};
#undef T
#define T(name, string, precedence) string,
const char* const Token::m_string[NUM_TOKENS] = {
TOKEN_LIST(T, T)
};
#undef T
#define T(name, string, precedence) precedence,
const int8_t Token::m_precedence[NUM_TOKENS] = {
TOKEN_LIST(T, T)
};
#undef T
#define KT(a, b, c) 'T',
#define KK(a, b, c) 'K',
const char Token::m_tokenType[] = {
TOKEN_LIST(KT, KK)
};
#undef KT
#undef KK
} }

339
Token.h Normal file
View File

@ -0,0 +1,339 @@
// Copyright 2006-2012, the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Modifications as part of cpp-ethereum under the following license:
//
// cpp-ethereum is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// cpp-ethereum is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
#pragma once
#include <boost/assert.hpp>
#include <libdevcore/Common.h>
#include <libdevcore/Log.h>
namespace dev {
namespace solidity {
// TOKEN_LIST takes a list of 3 macros M, all of which satisfy the
// same signature M(name, string, precedence), where name is the
// symbolic token name, string is the corresponding syntactic symbol
// (or NULL, for literals), and precedence is the precedence (or 0).
// The parameters are invoked for token categories as follows:
//
// T: Non-keyword tokens
// K: Keyword tokens
// IGNORE_TOKEN is a convenience macro that can be supplied as
// an argument (at any position) for a TOKEN_LIST call. It does
// nothing with tokens belonging to the respective category.
#define IGNORE_TOKEN(name, string, precedence)
#define TOKEN_LIST(T, K) \
/* End of source indicator. */ \
T(EOS, "EOS", 0) \
\
/* Punctuators (ECMA-262, section 7.7, page 15). */ \
T(LPAREN, "(", 0) \
T(RPAREN, ")", 0) \
T(LBRACK, "[", 0) \
T(RBRACK, "]", 0) \
T(LBRACE, "{", 0) \
T(RBRACE, "}", 0) \
T(COLON, ":", 0) \
T(SEMICOLON, ";", 0) \
T(PERIOD, ".", 0) \
T(CONDITIONAL, "?", 3) \
T(INC, "++", 0) \
T(DEC, "--", 0) \
T(ARROW, "=>", 0) \
\
/* Assignment operators. */ \
/* IsAssignmentOp() and Assignment::is_compound() relies on */ \
/* this block of enum values being contiguous and sorted in the */ \
/* same order! */ \
T(INIT_VAR, "=init_var", 2) /* AST-use only. */ \
T(INIT_LET, "=init_let", 2) /* AST-use only. */ \
T(INIT_CONST, "=init_const", 2) /* AST-use only. */ \
T(INIT_CONST_LEGACY, "=init_const_legacy", 2) /* AST-use only. */ \
T(ASSIGN, "=", 2) \
T(ASSIGN_BIT_OR, "|=", 2) \
T(ASSIGN_BIT_XOR, "^=", 2) \
T(ASSIGN_BIT_AND, "&=", 2) \
T(ASSIGN_SHL, "<<=", 2) \
T(ASSIGN_SAR, ">>=", 2) \
T(ASSIGN_SHR, ">>>=", 2) \
T(ASSIGN_ADD, "+=", 2) \
T(ASSIGN_SUB, "-=", 2) \
T(ASSIGN_MUL, "*=", 2) \
T(ASSIGN_DIV, "/=", 2) \
T(ASSIGN_MOD, "%=", 2) \
\
/* Binary operators sorted by precedence. */ \
/* IsBinaryOp() relies on this block of enum values */ \
/* being contiguous and sorted in the same order! */ \
T(COMMA, ",", 1) \
T(OR, "||", 4) \
T(AND, "&&", 5) \
T(BIT_OR, "|", 6) \
T(BIT_XOR, "^", 7) \
T(BIT_AND, "&", 8) \
T(SHL, "<<", 11) \
T(SAR, ">>", 11) \
T(SHR, ">>>", 11) \
T(ROR, "rotate right", 11) /* only used by Crankshaft */ \
T(ADD, "+", 12) \
T(SUB, "-", 12) \
T(MUL, "*", 13) \
T(DIV, "/", 13) \
T(MOD, "%", 13) \
\
/* Compare operators sorted by precedence. */ \
/* IsCompareOp() relies on this block of enum values */ \
/* being contiguous and sorted in the same order! */ \
T(EQ, "==", 9) \
T(NE, "!=", 9) \
T(EQ_STRICT, "===", 9) \
T(NE_STRICT, "!==", 9) \
T(LT, "<", 10) \
T(GT, ">", 10) \
T(LTE, "<=", 10) \
T(GTE, ">=", 10) \
K(INSTANCEOF, "instanceof", 10) \
K(IN, "in", 10) \
\
/* Unary operators. */ \
/* IsUnaryOp() relies on this block of enum values */ \
/* being contiguous and sorted in the same order! */ \
T(NOT, "!", 0) \
T(BIT_NOT, "~", 0) \
K(DELETE, "delete", 0) \
K(TYPEOF, "typeof", 0) \
K(VOID, "void", 0) \
\
/* Keywords (ECMA-262, section 7.5.2, page 13). */ \
K(BREAK, "break", 0) \
K(CASE, "case", 0) \
K(CATCH, "catch", 0) \
K(CONTINUE, "continue", 0) \
K(DEBUGGER, "debugger", 0) \
K(DEFAULT, "default", 0) \
/* DELETE */ \
K(DO, "do", 0) \
K(ELSE, "else", 0) \
K(FINALLY, "finally", 0) \
K(FOR, "for", 0) \
K(FUNCTION, "function", 0) \
K(IF, "if", 0) \
/* IN */ \
/* INSTANCEOF */ \
K(NEW, "new", 0) \
K(RETURN, "return", 0) \
K(SWITCH, "switch", 0) \
K(THIS, "this", 0) \
K(THROW, "throw", 0) \
K(TRY, "try", 0) \
/* TYPEOF */ \
K(VAR, "var", 0) \
/* VOID */ \
K(WHILE, "while", 0) \
K(WITH, "with", 0) \
\
/* Literals (ECMA-262, section 7.8, page 16). */ \
K(NULL_LITERAL, "null", 0) \
K(TRUE_LITERAL, "true", 0) \
K(FALSE_LITERAL, "false", 0) \
T(NUMBER, NULL, 0) \
T(STRING, NULL, 0) \
\
/* Identifiers (not keywords or future reserved words). */ \
T(IDENTIFIER, NULL, 0) \
\
/* Future reserved words (ECMA-262, section 7.6.1.2). */ \
T(FUTURE_RESERVED_WORD, NULL, 0) \
T(FUTURE_STRICT_RESERVED_WORD, NULL, 0) \
K(CLASS, "class", 0) \
K(CONST, "const", 0) \
K(EXPORT, "export", 0) \
K(EXTENDS, "extends", 0) \
K(IMPORT, "import", 0) \
K(LET, "let", 0) \
K(STATIC, "static", 0) \
/* K(YIELD, "yield", 0) */ \
K(SUPER, "super", 0) \
\
/* Illegal token - not able to scan. */ \
T(ILLEGAL, "ILLEGAL", 0) \
\
/* Scanner-internal use only. */ \
T(WHITESPACE, NULL, 0)
class Token {
public:
// All token values.
#define T(name, string, precedence) name,
enum Value {
TOKEN_LIST(T, T)
NUM_TOKENS
};
#undef T
// Returns a string corresponding to the C++ token name
// (e.g. "LT" for the token LT).
static const char* Name(Value tok) {
BOOST_ASSERT(tok < NUM_TOKENS); // tok is unsigned
return m_name[tok];
}
// Predicates
static bool IsKeyword(Value tok) {
return m_tokenType[tok] == 'K';
}
static bool IsIdentifier(Value tok) {
return tok == IDENTIFIER;
}
static bool IsAssignmentOp(Value tok) {
return INIT_VAR <= tok && tok <= ASSIGN_MOD;
}
static bool IsBinaryOp(Value op) {
return COMMA <= op && op <= MOD;
}
static bool IsTruncatingBinaryOp(Value op) {
return BIT_OR <= op && op <= ROR;
}
static bool IsCompareOp(Value op) {
return EQ <= op && op <= IN;
}
static bool IsOrderedRelationalCompareOp(Value op) {
return op == LT || op == LTE || op == GT || op == GTE;
}
static bool IsEqualityOp(Value op) {
return op == EQ || op == EQ_STRICT;
}
static bool IsInequalityOp(Value op) {
return op == NE || op == NE_STRICT;
}
static bool IsArithmeticCompareOp(Value op) {
return IsOrderedRelationalCompareOp(op) ||
IsEqualityOp(op) || IsInequalityOp(op);
}
static Value NegateCompareOp(Value op) {
BOOST_ASSERT(IsArithmeticCompareOp(op));
switch (op) {
case EQ: return NE;
case NE: return EQ;
case EQ_STRICT: return NE_STRICT;
case NE_STRICT: return EQ_STRICT;
case LT: return GTE;
case GT: return LTE;
case LTE: return GT;
case GTE: return LT;
default:
BOOST_ASSERT(false); // should not get here
return op;
}
}
static Value ReverseCompareOp(Value op) {
BOOST_ASSERT(IsArithmeticCompareOp(op));
switch (op) {
case EQ: return EQ;
case NE: return NE;
case EQ_STRICT: return EQ_STRICT;
case NE_STRICT: return NE_STRICT;
case LT: return GT;
case GT: return LT;
case LTE: return GTE;
case GTE: return LTE;
default:
BOOST_ASSERT(false); // should not get here
return op;
}
}
static bool IsBitOp(Value op) {
return (BIT_OR <= op && op <= SHR) || op == BIT_NOT;
}
static bool IsUnaryOp(Value op) {
return (NOT <= op && op <= VOID) || op == ADD || op == SUB;
}
static bool IsCountOp(Value op) {
return op == INC || op == DEC;
}
static bool IsShiftOp(Value op) {
return (SHL <= op) && (op <= SHR);
}
// Returns a string corresponding to the JS token string
// (.e., "<" for the token LT) or NULL if the token doesn't
// have a (unique) string (e.g. an IDENTIFIER).
static const char* String(Value tok) {
BOOST_ASSERT(tok < NUM_TOKENS); // tok is unsigned.
return m_string[tok];
}
// Returns the precedence > 0 for binary and compare
// operators; returns 0 otherwise.
static int Precedence(Value tok) {
BOOST_ASSERT(tok < NUM_TOKENS); // tok is unsigned.
return m_precedence[tok];
}
private:
static const char* const m_name[NUM_TOKENS];
static const char* const m_string[NUM_TOKENS];
static const int8_t m_precedence[NUM_TOKENS];
static const char m_tokenType[NUM_TOKENS];
};
} }