mirror of
https://github.com/ethereum/solidity
synced 2023-10-03 13:03:40 +00:00
ef59373871
The scanner is a modified version of the v8 javascript scanner.
253 lines
8.0 KiB
C++
253 lines
8.0 KiB
C++
// Copyright 2006-2012, the V8 project authors. All rights reserved.
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following
|
|
// disclaimer in the documentation and/or other materials provided
|
|
// with the distribution.
|
|
// * Neither the name of Google Inc. nor the names of its
|
|
// contributors may be used to endorse or promote products derived
|
|
// from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Modifications as part of cpp-ethereum under the following license:
|
|
//
|
|
// cpp-ethereum is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// cpp-ethereum is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
#pragma once
|
|
|
|
#include <boost/assert.hpp>
|
|
|
|
#include <libdevcore/Common.h>
|
|
#include <libdevcore/Log.h>
|
|
#include <libdevcore/CommonData.h>
|
|
#include <libsolidity/Token.h>
|
|
|
|
namespace dev {
|
|
namespace solidity {
|
|
|
|
|
|
class AstRawString;
|
|
class AstValueFactory;
|
|
class ParserRecorder;
|
|
|
|
class CharStream {
|
|
public:
|
|
CharStream()
|
|
: m_pos(0)
|
|
{}
|
|
|
|
explicit CharStream(const std::string& _source)
|
|
: m_source(_source), m_pos(0)
|
|
{}
|
|
int getPos() const { return m_pos; }
|
|
bool isPastEndOfInput() const { return m_pos >= m_source.size(); }
|
|
char get() const { return m_source[m_pos]; }
|
|
char advanceAndGet() {
|
|
if (isPastEndOfInput()) return 0;
|
|
++m_pos;
|
|
if (isPastEndOfInput()) return 0;
|
|
return get();
|
|
}
|
|
char rollback(size_t _amount) {
|
|
BOOST_ASSERT(m_pos >= _amount);
|
|
m_pos -= _amount;
|
|
return get();
|
|
}
|
|
|
|
private:
|
|
std::string m_source;
|
|
size_t m_pos;
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// JavaScript Scanner.
|
|
|
|
class Scanner {
|
|
public:
|
|
// Scoped helper for literal recording. Automatically drops the literal
|
|
// if aborting the scanning before it's complete.
|
|
class LiteralScope {
|
|
public:
|
|
explicit LiteralScope(Scanner* self)
|
|
: scanner_(self), complete_(false) {
|
|
scanner_->startNewLiteral();
|
|
}
|
|
~LiteralScope() {
|
|
if (!complete_) scanner_->dropLiteral();
|
|
}
|
|
void Complete() {
|
|
complete_ = true;
|
|
}
|
|
|
|
private:
|
|
Scanner* scanner_;
|
|
bool complete_;
|
|
};
|
|
|
|
// Representation of an interval of source positions.
|
|
struct Location {
|
|
Location(int b, int e) : beg_pos(b), end_pos(e) { }
|
|
Location() : beg_pos(0), end_pos(0) { }
|
|
|
|
bool IsValid() const {
|
|
return beg_pos >= 0 && end_pos >= beg_pos;
|
|
}
|
|
|
|
static Location invalid() { return Location(-1, -1); }
|
|
|
|
int beg_pos;
|
|
int end_pos;
|
|
};
|
|
|
|
explicit Scanner(const CharStream& _source);
|
|
|
|
// Resets the scanner as if newly constructed with _input as input.
|
|
void reset(const CharStream& _source);
|
|
|
|
// Returns the next token and advances input.
|
|
Token::Value next();
|
|
// Returns the current token again.
|
|
Token::Value getCurrentToken() { return m_current_token.token; }
|
|
// Returns the location information for the current token
|
|
// (the token last returned by Next()).
|
|
Location getCurrentLocation() const { return m_current_token.location; }
|
|
const std::string& getCurrentLiteral() const { return m_current_token.literal; }
|
|
|
|
// Similar functions for the upcoming token.
|
|
|
|
// One token look-ahead (past the token returned by Next()).
|
|
Token::Value peek() const { return m_next_token.token; }
|
|
|
|
Location peekLocation() const { return m_next_token.location; }
|
|
const std::string& peekLiteral() const { return m_next_token.literal; }
|
|
|
|
// Returns true if there was a line terminator before the peek'ed token,
|
|
// possibly inside a multi-line comment.
|
|
bool hasAnyLineTerminatorBeforeNext() const {
|
|
return m_hasLineTerminatorBeforeNext ||
|
|
m_hasMultilineCommentBeforeNext;
|
|
}
|
|
|
|
private:
|
|
// Used for the current and look-ahead token.
|
|
struct TokenDesc {
|
|
Token::Value token;
|
|
Location location;
|
|
std::string literal;
|
|
};
|
|
|
|
static const int kCharacterLookaheadBufferSize = 1;
|
|
|
|
// Literal buffer support
|
|
inline void startNewLiteral() {
|
|
m_next_token.literal.clear();
|
|
}
|
|
|
|
inline void addLiteralChar(char c) {
|
|
m_next_token.literal.push_back(c);
|
|
}
|
|
|
|
inline void dropLiteral() {
|
|
m_next_token.literal.clear();
|
|
}
|
|
|
|
inline void addLiteralCharAndAdvance() {
|
|
addLiteralChar(m_char);
|
|
advance();
|
|
}
|
|
|
|
// Low-level scanning support.
|
|
bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); }
|
|
void rollback(int amount) {
|
|
m_char = m_source.rollback(amount);
|
|
}
|
|
|
|
inline Token::Value selectToken(Token::Value tok) {
|
|
advance();
|
|
return tok;
|
|
}
|
|
|
|
inline Token::Value selectToken(char next, Token::Value then, Token::Value else_) {
|
|
advance();
|
|
if (m_char == next) {
|
|
advance();
|
|
return then;
|
|
} else {
|
|
return else_;
|
|
}
|
|
}
|
|
|
|
bool scanHexNumber(char& scanned_number, int expected_length);
|
|
|
|
// Scans a single JavaScript token.
|
|
void scanToken();
|
|
|
|
bool skipWhitespace();
|
|
Token::Value skipSingleLineComment();
|
|
Token::Value skipMultiLineComment();
|
|
|
|
void scanDecimalDigits();
|
|
Token::Value scanNumber(bool _periodSeen);
|
|
Token::Value scanIdentifierOrKeyword();
|
|
|
|
Token::Value scanString();
|
|
|
|
// Scans an escape-sequence which is part of a string and adds the
|
|
// decoded character to the current literal. Returns true if a pattern
|
|
// is scanned.
|
|
bool scanEscape();
|
|
|
|
// Return the current source position.
|
|
int getSourcePos() {
|
|
return m_source.getPos();
|
|
}
|
|
bool isSourcePastEndOfInput() {
|
|
return m_source.isPastEndOfInput();
|
|
}
|
|
|
|
TokenDesc m_current_token; // desc for current token (as returned by Next())
|
|
TokenDesc m_next_token; // desc for next token (one token look-ahead)
|
|
|
|
CharStream m_source;
|
|
|
|
// one character look-ahead, equals 0 at end of input
|
|
char m_char;
|
|
|
|
// Whether there is a line terminator whitespace character after
|
|
// the current token, and before the next. Does not count newlines
|
|
// inside multiline comments.
|
|
bool m_hasLineTerminatorBeforeNext;
|
|
// Whether there is a multi-line comment that contains a
|
|
// line-terminator after the current token, and before the next.
|
|
bool m_hasMultilineCommentBeforeNext;
|
|
};
|
|
|
|
} }
|