solidity/Scanner.cpp

// Copyright 2006-2012, the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//    * Redistributions of source code must retain the above copyright
//      notice, this list of conditions and the following disclaimer.
//    * Redistributions in binary form must reproduce the above
//      copyright notice, this list of conditions and the following
//      disclaimer in the documentation and/or other materials provided
//      with the distribution.
//    * Neither the name of Google Inc. nor the names of its
//      contributors may be used to endorse or promote products derived
//      from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Modifications as part of cpp-ethereum under the following license:
//
// cpp-ethereum is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// cpp-ethereum is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.

#include <libsolidity/Scanner.h>

namespace dev {
namespace solidity {

namespace {
    bool IsDecimalDigit(char c) {
        return '0' <= c && c <= '9';
    }
    bool IsHexDigit(char c) {
        return IsDecimalDigit(c)
                || ('a' <= c && c <= 'f')
                || ('A' <= c && c <= 'F');
    }
    bool IsLineTerminator(char c) { return c == '\n'; }
    bool IsWhiteSpace(char c) {
        return c == ' ' || c == '\n' || c == '\t';
    }
    bool IsIdentifierStart(char c) {
        return c == '_' || c == '$' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
    }
    bool IsIdentifierPart(char c) {
        return IsIdentifierStart(c) || IsDecimalDigit(c);
    }

    int HexValue(char c) {
        if (c >= '0' && c <= '9') return c - '0';
        else if (c >= 'a' && c <= 'f') return c - 'a' + 10;
        else if (c >= 'A' && c <= 'F') return c - 'A' + 10;
        else return -1;
    }
}

Scanner::Scanner(const CharStream& _source)
{
    reset(_source);
}

void Scanner::reset(const CharStream& _source)
{
    m_source = _source;

    m_char = m_source.get();
    skipWhitespace();
    scanToken();
    next();
}


bool Scanner::scanHexNumber(char& scanned_number, int expected_length)
{
    BOOST_ASSERT(expected_length <= 4);  // prevent overflow

    char x = 0;
    for (int i = 0; i < expected_length; i++) {
        int d = HexValue(m_char);
        if (d < 0) {
            rollback(i);
            return false;
        }
        x = x * 16 + d;
        advance();
    }

    scanned_number = x;
    return true;
}


// Ensure that tokens can be stored in a byte.
BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);

Token::Value Scanner::next()
{
    m_current_token = m_next_token;
    m_hasLineTerminatorBeforeNext = false;
    m_hasMultilineCommentBeforeNext = false;
    scanToken();
    return m_current_token.token;
}

bool Scanner::skipWhitespace()
{
    const int start_position = getSourcePos();

    while (true) {
      if (IsLineTerminator(m_char)) {
        m_hasLineTerminatorBeforeNext = true;
      } else if (!IsWhiteSpace(m_char)) {
        break;
      }
      advance();
    }

    // Return whether or not we skipped any characters.
    return getSourcePos() != start_position;
}


Token::Value Scanner::skipSingleLineComment()
{
  // The line terminator at the end of the line is not considered
  // to be part of the single-line comment; it is recognized
  // separately by the lexical grammar and becomes part of the
  // stream of input elements for the syntactic grammar
  while (advance() && !IsLineTerminator(m_char)) { };

  return Token::WHITESPACE;
}

Token::Value Scanner::skipMultiLineComment()
{
    BOOST_ASSERT(m_char == '*');
    advance();

    while (!isSourcePastEndOfInput()) {
        char ch = m_char;
        advance();
        if (IsLineTerminator(ch)) {
            // Following ECMA-262, section 7.4, a comment containing
            // a newline will make the comment count as a line-terminator.
            m_hasMultilineCommentBeforeNext = true;
        }
        // If we have reached the end of the multi-line comment, we
        // consume the '/' and insert a whitespace. This way all
        // multi-line comments are treated as whitespace.
        if (ch == '*' && m_char == '/') {
            m_char = ' ';
            return Token::WHITESPACE;
        }
    }

    // Unterminated multi-line comment.
    return Token::ILLEGAL;
}

void Scanner::scanToken()
{
  m_next_token.literal.clear();
  Token::Value token;
  do {
    // Remember the position of the next token
    m_next_token.location.beg_pos = getSourcePos();

    switch (m_char) {
      case '\n':
        m_hasLineTerminatorBeforeNext = true; // fall-through
      case ' ':
      case '\t':
        token = selectToken(Token::WHITESPACE);
        break;

      case '"': case '\'':
        token = scanString();
        break;

      case '<':
        // < <= << <<=
        advance();
        if (m_char == '=') {
          token = selectToken(Token::LTE);
        } else if (m_char == '<') {
          token = selectToken('=', Token::ASSIGN_SHL, Token::SHL);
        } else {
          token = Token::LT;
        }
        break;

      case '>':
        // > >= >> >>= >>> >>>=
        advance();
        if (m_char == '=') {
          token = selectToken(Token::GTE);
        } else if (m_char == '>') {
          // >> >>= >>> >>>=
          advance();
          if (m_char == '=') {
            token = selectToken(Token::ASSIGN_SAR);
          } else if (m_char == '>') {
            token = selectToken('=', Token::ASSIGN_SHR, Token::SHR);
          } else {
            token = Token::SAR;
          }
        } else {
          token = Token::GT;
        }
        break;

      case '=':
        // = == =>
        advance();
        if (m_char == '=') {
          token = selectToken(Token::EQ);
        } else if (m_char == '>') {
          token = selectToken(Token::ARROW);
        } else {
          token = Token::ASSIGN;
        }
        break;

      case '!':
        // ! != !==
        advance();
        if (m_char == '=') {
          token = selectToken(Token::NE);
        } else {
          token = Token::NOT;
        }
        break;

      case '+':
        // + ++ +=
        advance();
        if (m_char == '+') {
          token = selectToken(Token::INC);
        } else if (m_char == '=') {
          token = selectToken(Token::ASSIGN_ADD);
        } else {
          token = Token::ADD;
        }
        break;

      case '-':
        // - -- -=
        advance();
        if (m_char == '-') {
          advance();
          token = Token::DEC;
        } else if (m_char == '=') {
          token = selectToken(Token::ASSIGN_SUB);
        } else {
          token = Token::SUB;
        }
        break;

      case '*':
        // * *=
        token = selectToken('=', Token::ASSIGN_MUL, Token::MUL);
        break;

      case '%':
        // % %=
        token = selectToken('=', Token::ASSIGN_MOD, Token::MOD);
        break;

      case '/':
        // /  // /* /=
        advance();
        if (m_char == '/') {
          token = skipSingleLineComment();
        } else if (m_char == '*') {
          token = skipMultiLineComment();
        } else if (m_char == '=') {
          token = selectToken(Token::ASSIGN_DIV);
        } else {
          token = Token::DIV;
        }
        break;

      case '&':
        // & && &=
        advance();
        if (m_char == '&') {
          token = selectToken(Token::AND);
        } else if (m_char == '=') {
          token = selectToken(Token::ASSIGN_BIT_AND);
        } else {
          token = Token::BIT_AND;
        }
        break;

      case '|':
        // | || |=
        advance();
        if (m_char == '|') {
          token = selectToken(Token::OR);
        } else if (m_char == '=') {
          token = selectToken(Token::ASSIGN_BIT_OR);
        } else {
          token = Token::BIT_OR;
        }
        break;

      case '^':
        // ^ ^=
        token = selectToken('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
        break;

      case '.':
        // . Number
        advance();
        if (IsDecimalDigit(m_char)) {
          token = scanNumber(true);
        } else {
          token = Token::PERIOD;
        }
        break;

      case ':':
        token = selectToken(Token::COLON);
        break;

      case ';':
        token = selectToken(Token::SEMICOLON);
        break;

      case ',':
        token = selectToken(Token::COMMA);
        break;

      case '(':
        token = selectToken(Token::LPAREN);
        break;

      case ')':
        token = selectToken(Token::RPAREN);
        break;

      case '[':
        token = selectToken(Token::LBRACK);
        break;

      case ']':
        token = selectToken(Token::RBRACK);
        break;

      case '{':
        token = selectToken(Token::LBRACE);
        break;

      case '}':
        token = selectToken(Token::RBRACE);
        break;

      case '?':
        token = selectToken(Token::CONDITIONAL);
        break;

      case '~':
        token = selectToken(Token::BIT_NOT);
        break;

      default:
        if (IsIdentifierStart(m_char)) {
          token = scanIdentifierOrKeyword();
        } else if (IsDecimalDigit(m_char)) {
          token = scanNumber(false);
        } else if (skipWhitespace()) {
          token = Token::WHITESPACE;
        } else if (isSourcePastEndOfInput()) {
          token = Token::EOS;
        } else {
          token = selectToken(Token::ILLEGAL);
        }
        break;
    }

    // Continue scanning for tokens as long as we're just skipping
    // whitespace.
  } while (token == Token::WHITESPACE);

  m_next_token.location.end_pos = getSourcePos();
  m_next_token.token = token;
}

bool Scanner::scanEscape()
{
    char c = m_char;
    advance();

    // Skip escaped newlines.
    if (IsLineTerminator(c))
        return true;

    switch (c) {
    case '\'':  // fall through
    case '"' :  // fall through
    case '\\': break;
    case 'b' : c = '\b'; break;
    case 'f' : c = '\f'; break;
    case 'n' : c = '\n'; break;
    case 'r' : c = '\r'; break;
    case 't' : c = '\t'; break;
    case 'u' : {
        if (!scanHexNumber(c, 4)) return false;
        break;
    }
    case 'v' : c = '\v'; break;
    case 'x' : {
        if (!scanHexNumber(c, 2)) return false;
        break;
    }
    }

    // According to ECMA-262, section 7.8.4, characters not covered by the
    // above cases should be illegal, but they are commonly handled as
    // non-escaped characters by JS VMs.
    addLiteralChar(c);
    return true;
}

Token::Value Scanner::scanString()
{
    const char quote = m_char;
    advance();  // consume quote

    LiteralScope literal(this);
    while (m_char != quote && !isSourcePastEndOfInput() && !IsLineTerminator(m_char)) {
        char c = m_char;
        advance();
        if (c == '\\') {
            if (isSourcePastEndOfInput() || !scanEscape()) return Token::ILLEGAL;
        } else {
            addLiteralChar(c);
        }
    }
    if (m_char != quote) return Token::ILLEGAL;
    literal.Complete();

    advance();  // consume quote
    return Token::STRING_LITERAL;
}


void Scanner::scanDecimalDigits()
{
    while (IsDecimalDigit(m_char))
        addLiteralCharAndAdvance();
}


Token::Value Scanner::scanNumber(bool _periodSeen)
{
  BOOST_ASSERT(IsDecimalDigit(m_char));  // the first digit of the number or the fraction

  enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;

  LiteralScope literal(this);
  if (_periodSeen) {
    // we have already seen a decimal point of the float
    addLiteralChar('.');
    scanDecimalDigits();  // we know we have at least one digit
  } else {
    // if the first character is '0' we must check for octals and hex
    if (m_char == '0') {
      addLiteralCharAndAdvance();

      // either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or
      // an octal number.
      if (m_char == 'x' || m_char == 'X') {
        // hex number
        kind = HEX;
        addLiteralCharAndAdvance();
        if (!IsHexDigit(m_char)) {
          // we must have at least one hex digit after 'x'/'X'
          return Token::ILLEGAL;
        }
        while (IsHexDigit(m_char)) {
          addLiteralCharAndAdvance();
        }
      }
    }

    // Parse decimal digits and allow trailing fractional part.
    if (kind == DECIMAL) {
      scanDecimalDigits();  // optional
      if (m_char == '.') {
        addLiteralCharAndAdvance();
        scanDecimalDigits();  // optional
      }
    }
  }

  // scan exponent, if any
  if (m_char == 'e' || m_char == 'E') {
    BOOST_ASSERT(kind != HEX);  // 'e'/'E' must be scanned as part of the hex number
    if (kind != DECIMAL) return Token::ILLEGAL;
    // scan exponent
    addLiteralCharAndAdvance();
    if (m_char == '+' || m_char == '-')
      addLiteralCharAndAdvance();
    if (!IsDecimalDigit(m_char)) {
      // we must have at least one decimal digit after 'e'/'E'
      return Token::ILLEGAL;
    }
    scanDecimalDigits();
  }

  // The source character immediately following a numeric literal must
  // not be an identifier start or a decimal digit; see ECMA-262
  // section 7.8.3, page 17 (note that we read only one decimal digit
  // if the value is 0).
  if (IsDecimalDigit(m_char) || IsIdentifierStart(m_char))
    return Token::ILLEGAL;

  literal.Complete();

  return Token::NUMBER;
}


// ----------------------------------------------------------------------------
// Keyword Matcher

#define KEYWORDS(KEYWORD_GROUP, KEYWORD)                                     \
  KEYWORD_GROUP('a')                                                         \
  KEYWORD("address", Token::BREAK)                                           \
  KEYWORD_GROUP('b')                                                         \
  KEYWORD("break", Token::BREAK)                                             \
  KEYWORD("bool", Token::BOOL)                                               \
  KEYWORD_GROUP('c')                                                         \
  KEYWORD("case", Token::CASE)                                               \
  KEYWORD("catch", Token::CATCH)                                             \
  KEYWORD("const", Token::CONST)                                             \
  KEYWORD("continue", Token::CONTINUE)                                       \
  KEYWORD("contract", Token::CONTRACT)                                       \
  KEYWORD_GROUP('d')                                                         \
  KEYWORD("debugger", Token::DEBUGGER)                                       \
  KEYWORD("default", Token::DEFAULT)                                         \
  KEYWORD("delete", Token::DELETE)                                           \
  KEYWORD("do", Token::DO)                                                   \
  KEYWORD_GROUP('e')                                                         \
  KEYWORD("else", Token::ELSE)                                               \
  KEYWORD("enum", Token::FUTURE_RESERVED_WORD)                               \
  KEYWORD_GROUP('f')                                                         \
  KEYWORD("false", Token::FALSE_LITERAL)                                     \
  KEYWORD("finally", Token::FINALLY)                                         \
  KEYWORD("for", Token::FOR)                                                 \
  KEYWORD("function", Token::FUNCTION)                                       \
  KEYWORD_GROUP('h')                                                         \
  KEYWORD("hash", Token::HASH)                                               \
  KEYWORD("hash32", Token::HASH32)                                           \
  KEYWORD("hash64", Token::HASH64)                                           \
  KEYWORD("hash128", Token::HASH128)                                         \
  KEYWORD("hash256", Token::HASH256)                                         \
  KEYWORD_GROUP('i')                                                         \
  KEYWORD("if", Token::IF)                                                   \
  KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD)                  \
  KEYWORD("in", Token::IN)                                                   \
  KEYWORD("instanceof", Token::INSTANCEOF)                                   \
  KEYWORD("int", Token::INT)                                                 \
  KEYWORD("int32", Token::INT32)                                             \
  KEYWORD("int64", Token::INT64)                                             \
  KEYWORD("int128", Token::INT128)                                           \
  KEYWORD("int256", Token::INT256)                                           \
  KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD)                   \
  KEYWORD_GROUP('l')                                                         \
  KEYWORD_GROUP('m')                                                         \
  KEYWORD_GROUP('n')                                                         \
  KEYWORD("mapping", Token::MAPPING)                                         \
  KEYWORD("new", Token::NEW)                                                 \
  KEYWORD("null", Token::NULL_LITERAL)                                       \
  KEYWORD_GROUP('p')                                                         \
  KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD)                     \
  KEYWORD("private", Token::PRIVATE)                                         \
  KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD)                   \
  KEYWORD("public", Token::PUBLIC)                                           \
  KEYWORD_GROUP('r')                                                         \
  KEYWORD("real", Token::REAL)                                               \
  KEYWORD("return", Token::RETURN)                                           \
  KEYWORD_GROUP('s')                                                         \
  KEYWORD("string", Token::STRING_TYPE)                                           \
  KEYWORD("struct", Token::STRUCT)                                           \
  KEYWORD("switch", Token::SWITCH)                                           \
  KEYWORD_GROUP('t')                                                         \
  KEYWORD("text", Token::TEXT)                                               \
  KEYWORD("this", Token::THIS)                                               \
  KEYWORD("throw", Token::THROW)                                             \
  KEYWORD("true", Token::TRUE_LITERAL)                                       \
  KEYWORD("try", Token::TRY)                                                 \
  KEYWORD("typeof", Token::TYPEOF)                                           \
  KEYWORD_GROUP('u')                                                         \
  KEYWORD("uint", Token::UINT)                                               \
  KEYWORD("uint32", Token::UINT32)                                           \
  KEYWORD("uint64", Token::UINT64)                                           \
  KEYWORD("uint128", Token::UINT128)                                         \
  KEYWORD("uint256", Token::UINT256)                                         \
  KEYWORD("ureal", Token::UREAL)                                             \
  KEYWORD_GROUP('v')                                                         \
  KEYWORD("var", Token::VAR)                                                 \
  KEYWORD("void", Token::VOID)                                               \
  KEYWORD_GROUP('w')                                                         \
  KEYWORD("while", Token::WHILE)                                             \
  KEYWORD("with", Token::WITH)


static Token::Value KeywordOrIdentifierToken(const std::string& input)
{
  BOOST_ASSERT(!input.empty());
  const int kMinLength = 2;
  const int kMaxLength = 10;
  if (input.size() < kMinLength || input.size() > kMaxLength) {
    return Token::IDENTIFIER;
  }
  switch (input[0]) {
    default:
#define KEYWORD_GROUP_CASE(ch)                                \
      break;                                                  \
    case ch:
#define KEYWORD(keyword, token)                               \
    {                                                         \
      /* 'keyword' is a char array, so sizeof(keyword) is */  \
      /* strlen(keyword) plus 1 for the NUL char. */          \
      const int keyword_length = sizeof(keyword) - 1;         \
      BOOST_STATIC_ASSERT(keyword_length >= kMinLength);      \
      BOOST_STATIC_ASSERT(keyword_length <= kMaxLength);      \
      if (input == keyword) {                                 \
        return token;                                         \
      }                                                       \
    }
    KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
  }
  return Token::IDENTIFIER;
}

Token::Value Scanner::scanIdentifierOrKeyword()
{
    BOOST_ASSERT(IsIdentifierStart(m_char));
    LiteralScope literal(this);

    addLiteralCharAndAdvance();

    // Scan the rest of the identifier characters.
    while (IsIdentifierPart(m_char))
        addLiteralCharAndAdvance();

    literal.Complete();

    return KeywordOrIdentifierToken(m_next_token.literal);
}


} }
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`// Copyright 2006-2012, the V8 project authors. All rights reserved.`
			`// Redistribution and use in source and binary forms, with or without`
			`// modification, are permitted provided that the following conditions are`
			`// met:`
			`//`
			`// * Redistributions of source code must retain the above copyright`
			`// notice, this list of conditions and the following disclaimer.`
			`// * Redistributions in binary form must reproduce the above`
			`// copyright notice, this list of conditions and the following`
			`// disclaimer in the documentation and/or other materials provided`
			`// with the distribution.`
			`// * Neither the name of Google Inc. nor the names of its`
			`// contributors may be used to endorse or promote products derived`
			`// from this software without specific prior written permission.`
			`//`
			`// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS`
			`// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
			`// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR`
			`// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT`
			`// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,`
			`// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT`
			`// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,`
			`// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY`
			`// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT`
			`// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
			`// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
			`//`
			`// Modifications as part of cpp-ethereum under the following license:`
			`//`
			`// cpp-ethereum is free software: you can redistribute it and/or modify`
			`// it under the terms of the GNU General Public License as published by`
			`// the Free Software Foundation, either version 3 of the License, or`
			`// (at your option) any later version.`
			`//`
			`// cpp-ethereum is distributed in the hope that it will be useful,`
			`// but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`// GNU General Public License for more details.`
			`//`
			`// You should have received a copy of the GNU General Public License`
			`// along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.`

			`#include <libsolidity/Scanner.h>`

			`namespace dev {`
			`namespace solidity {`

			`namespace {`
			`bool IsDecimalDigit(char c) {`
			`return '0' <= c && c <= '9';`
			`}`
			`bool IsHexDigit(char c) {`
			`return IsDecimalDigit(c)`
			`\|\| ('a' <= c && c <= 'f')`
			`\|\| ('A' <= c && c <= 'F');`
			`}`
			`bool IsLineTerminator(char c) { return c == '\n'; }`
			`bool IsWhiteSpace(char c) {`
			`return c == ' ' \|\| c == '\n' \|\| c == '\t';`
			`}`
			`bool IsIdentifierStart(char c) {`
			`return c == '_' \|\| c == '$' \|\| ('a' <= c && c <= 'z') \|\| ('A' <= c && c <= 'Z');`
			`}`
			`bool IsIdentifierPart(char c) {`
			`return IsIdentifierStart(c) \|\| IsDecimalDigit(c);`
			`}`

			`int HexValue(char c) {`
			`if (c >= '0' && c <= '9') return c - '0';`
			`else if (c >= 'a' && c <= 'f') return c - 'a' + 10;`
			`else if (c >= 'A' && c <= 'F') return c - 'A' + 10;`
			`else return -1;`
			`}`
			`}`

			`Scanner::Scanner(const CharStream& _source)`
			`{`
			`reset(_source);`
			`}`

			`void Scanner::reset(const CharStream& _source)`
			`{`
			`m_source = _source;`

			`m_char = m_source.get();`
			`skipWhitespace();`
			`scanToken();`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`next();`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`}`


			`bool Scanner::scanHexNumber(char& scanned_number, int expected_length)`
			`{`
			`BOOST_ASSERT(expected_length <= 4); // prevent overflow`

			`char x = 0;`
			`for (int i = 0; i < expected_length; i++) {`
			`int d = HexValue(m_char);`
			`if (d < 0) {`
			`rollback(i);`
			`return false;`
			`}`
			`x = x * 16 + d;`
			`advance();`
			`}`

			`scanned_number = x;`
			`return true;`
			`}`


			`// Ensure that tokens can be stored in a byte.`
			`BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);`

			`Token::Value Scanner::next()`
			`{`
			`m_current_token = m_next_token;`
			`m_hasLineTerminatorBeforeNext = false;`
			`m_hasMultilineCommentBeforeNext = false;`
			`scanToken();`
			`return m_current_token.token;`
			`}`

			`bool Scanner::skipWhitespace()`
			`{`
			`const int start_position = getSourcePos();`

			`while (true) {`
			`if (IsLineTerminator(m_char)) {`
			`m_hasLineTerminatorBeforeNext = true;`
			`} else if (!IsWhiteSpace(m_char)) {`
			`break;`
			`}`
			`advance();`
			`}`

			`// Return whether or not we skipped any characters.`
			`return getSourcePos() != start_position;`
			`}`


			`Token::Value Scanner::skipSingleLineComment()`
			`{`
			`// The line terminator at the end of the line is not considered`
			`// to be part of the single-line comment; it is recognized`
			`// separately by the lexical grammar and becomes part of the`
			`// stream of input elements for the syntactic grammar`
			`while (advance() && !IsLineTerminator(m_char)) { };`

			`return Token::WHITESPACE;`
			`}`

			`Token::Value Scanner::skipMultiLineComment()`
			`{`
			`BOOST_ASSERT(m_char == '*');`
			`advance();`

			`while (!isSourcePastEndOfInput()) {`
			`char ch = m_char;`
			`advance();`
			`if (IsLineTerminator(ch)) {`
			`// Following ECMA-262, section 7.4, a comment containing`
			`// a newline will make the comment count as a line-terminator.`
			`m_hasMultilineCommentBeforeNext = true;`
			`}`
			`// If we have reached the end of the multi-line comment, we`
			`// consume the '/' and insert a whitespace. This way all`
			`// multi-line comments are treated as whitespace.`
			`if (ch == '*' && m_char == '/') {`
			`m_char = ' ';`
			`return Token::WHITESPACE;`
			`}`
			`}`

			`// Unterminated multi-line comment.`
			`return Token::ILLEGAL;`
			`}`

			`void Scanner::scanToken()`
			`{`
			`m_next_token.literal.clear();`
			`Token::Value token;`
			`do {`
			`// Remember the position of the next token`
			`m_next_token.location.beg_pos = getSourcePos();`

			`switch (m_char) {`
			`case '\n':`
			`m_hasLineTerminatorBeforeNext = true; // fall-through`
			`case ' ':`
			`case '\t':`
			`token = selectToken(Token::WHITESPACE);`
			`break;`

			`case '"': case '\'':`
			`token = scanString();`
			`break;`

			`case '<':`
			`// < <= << <<=`
			`advance();`
			`if (m_char == '=') {`
			`token = selectToken(Token::LTE);`
			`} else if (m_char == '<') {`
			`token = selectToken('=', Token::ASSIGN_SHL, Token::SHL);`
			`} else {`
			`token = Token::LT;`
			`}`
			`break;`

			`case '>':`
			`// > >= >> >>= >>> >>>=`
			`advance();`
			`if (m_char == '=') {`
			`token = selectToken(Token::GTE);`
			`} else if (m_char == '>') {`
			`// >> >>= >>> >>>=`
			`advance();`
			`if (m_char == '=') {`
			`token = selectToken(Token::ASSIGN_SAR);`
			`} else if (m_char == '>') {`
			`token = selectToken('=', Token::ASSIGN_SHR, Token::SHR);`
			`} else {`
			`token = Token::SAR;`
			`}`
			`} else {`
			`token = Token::GT;`
			`}`
			`break;`

			`case '=':`
			`// = == =>`
			`advance();`
			`if (m_char == '=') {`
			`token = selectToken(Token::EQ);`
			`} else if (m_char == '>') {`
			`token = selectToken(Token::ARROW);`
			`} else {`
			`token = Token::ASSIGN;`
			`}`
			`break;`

			`case '!':`
			`// ! != !==`
			`advance();`
			`if (m_char == '=') {`
			`token = selectToken(Token::NE);`
			`} else {`
			`token = Token::NOT;`
			`}`
			`break;`

			`case '+':`
			`// + ++ +=`
			`advance();`
			`if (m_char == '+') {`
			`token = selectToken(Token::INC);`
			`} else if (m_char == '=') {`
			`token = selectToken(Token::ASSIGN_ADD);`
			`} else {`
			`token = Token::ADD;`
			`}`
			`break;`

			`case '-':`
			`// - -- -=`
			`advance();`
			`if (m_char == '-') {`
			`advance();`
			`token = Token::DEC;`
			`} else if (m_char == '=') {`
			`token = selectToken(Token::ASSIGN_SUB);`
			`} else {`
			`token = Token::SUB;`
			`}`
			`break;`

			`case '*':`
			`// * *=`
			`token = selectToken('=', Token::ASSIGN_MUL, Token::MUL);`
			`break;`

			`case '%':`
			`// % %=`
			`token = selectToken('=', Token::ASSIGN_MOD, Token::MOD);`
			`break;`

			`case '/':`
			`// / // /* /=`
			`advance();`
			`if (m_char == '/') {`
			`token = skipSingleLineComment();`
			`} else if (m_char == '*') {`
			`token = skipMultiLineComment();`
			`} else if (m_char == '=') {`
			`token = selectToken(Token::ASSIGN_DIV);`
			`} else {`
			`token = Token::DIV;`
			`}`
			`break;`

			`case '&':`
			`// & && &=`
			`advance();`
			`if (m_char == '&') {`
			`token = selectToken(Token::AND);`
			`} else if (m_char == '=') {`
			`token = selectToken(Token::ASSIGN_BIT_AND);`
			`} else {`
			`token = Token::BIT_AND;`
			`}`
			`break;`

			`case '\|':`
			`// \| \|\| \|=`
			`advance();`
			`if (m_char == '\|') {`
			`token = selectToken(Token::OR);`
			`} else if (m_char == '=') {`
			`token = selectToken(Token::ASSIGN_BIT_OR);`
			`} else {`
			`token = Token::BIT_OR;`
			`}`
			`break;`

			`case '^':`
			`// ^ ^=`
			`token = selectToken('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);`
			`break;`

			`case '.':`
			`// . Number`
			`advance();`
			`if (IsDecimalDigit(m_char)) {`
			`token = scanNumber(true);`
			`} else {`
			`token = Token::PERIOD;`
			`}`
			`break;`

			`case ':':`
			`token = selectToken(Token::COLON);`
			`break;`

			`case ';':`
			`token = selectToken(Token::SEMICOLON);`
			`break;`

			`case ',':`
			`token = selectToken(Token::COMMA);`
			`break;`

			`case '(':`
			`token = selectToken(Token::LPAREN);`
			`break;`

			`case ')':`
			`token = selectToken(Token::RPAREN);`
			`break;`

			`case '[':`
			`token = selectToken(Token::LBRACK);`
			`break;`

			`case ']':`
			`token = selectToken(Token::RBRACK);`
			`break;`

			`case '{':`
			`token = selectToken(Token::LBRACE);`
			`break;`

			`case '}':`
			`token = selectToken(Token::RBRACE);`
			`break;`

			`case '?':`
			`token = selectToken(Token::CONDITIONAL);`
			`break;`

			`case '~':`
			`token = selectToken(Token::BIT_NOT);`
			`break;`

			`default:`
			`if (IsIdentifierStart(m_char)) {`
			`token = scanIdentifierOrKeyword();`
			`} else if (IsDecimalDigit(m_char)) {`
			`token = scanNumber(false);`
			`} else if (skipWhitespace()) {`
			`token = Token::WHITESPACE;`
			`} else if (isSourcePastEndOfInput()) {`
			`token = Token::EOS;`
			`} else {`
			`token = selectToken(Token::ILLEGAL);`
			`}`
			`break;`
			`}`

			`// Continue scanning for tokens as long as we're just skipping`
			`// whitespace.`
			`} while (token == Token::WHITESPACE);`

			`m_next_token.location.end_pos = getSourcePos();`
			`m_next_token.token = token;`
			`}`

			`bool Scanner::scanEscape()`
			`{`
			`char c = m_char;`
			`advance();`

			`// Skip escaped newlines.`
			`if (IsLineTerminator(c))`
			`return true;`

			`switch (c) {`
			`case '\'': // fall through`
			`case '"' : // fall through`
			`case '\\': break;`
			`case 'b' : c = '\b'; break;`
			`case 'f' : c = '\f'; break;`
			`case 'n' : c = '\n'; break;`
			`case 'r' : c = '\r'; break;`
			`case 't' : c = '\t'; break;`
			`case 'u' : {`
			`if (!scanHexNumber(c, 4)) return false;`
			`break;`
			`}`
			`case 'v' : c = '\v'; break;`
			`case 'x' : {`
			`if (!scanHexNumber(c, 2)) return false;`
			`break;`
			`}`
			`}`

			`// According to ECMA-262, section 7.8.4, characters not covered by the`
			`// above cases should be illegal, but they are commonly handled as`
			`// non-escaped characters by JS VMs.`
			`addLiteralChar(c);`
			`return true;`
			`}`

			`Token::Value Scanner::scanString()`
			`{`
			`const char quote = m_char;`
			`advance(); // consume quote`

			`LiteralScope literal(this);`
			`while (m_char != quote && !isSourcePastEndOfInput() && !IsLineTerminator(m_char)) {`
			`char c = m_char;`
			`advance();`
			`if (c == '\\') {`
			`if (isSourcePastEndOfInput() \|\| !scanEscape()) return Token::ILLEGAL;`
			`} else {`
			`addLiteralChar(c);`
			`}`
			`}`
			`if (m_char != quote) return Token::ILLEGAL;`
			`literal.Complete();`

			`advance(); // consume quote`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`return Token::STRING_LITERAL;`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`}`


			`void Scanner::scanDecimalDigits()`
			`{`
			`while (IsDecimalDigit(m_char))`
			`addLiteralCharAndAdvance();`
			`}`


			`Token::Value Scanner::scanNumber(bool _periodSeen)`
			`{`
			`BOOST_ASSERT(IsDecimalDigit(m_char)); // the first digit of the number or the fraction`

			`enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;`

			`LiteralScope literal(this);`
			`if (_periodSeen) {`
			`// we have already seen a decimal point of the float`
			`addLiteralChar('.');`
			`scanDecimalDigits(); // we know we have at least one digit`
			`} else {`
			`// if the first character is '0' we must check for octals and hex`
			`if (m_char == '0') {`
			`addLiteralCharAndAdvance();`

			`// either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or`
			`// an octal number.`
			`if (m_char == 'x' \|\| m_char == 'X') {`
			`// hex number`
			`kind = HEX;`
			`addLiteralCharAndAdvance();`
			`if (!IsHexDigit(m_char)) {`
			`// we must have at least one hex digit after 'x'/'X'`
			`return Token::ILLEGAL;`
			`}`
			`while (IsHexDigit(m_char)) {`
			`addLiteralCharAndAdvance();`
			`}`
			`}`
			`}`

			`// Parse decimal digits and allow trailing fractional part.`
			`if (kind == DECIMAL) {`
			`scanDecimalDigits(); // optional`
			`if (m_char == '.') {`
			`addLiteralCharAndAdvance();`
			`scanDecimalDigits(); // optional`
			`}`
			`}`
			`}`

			`// scan exponent, if any`
			`if (m_char == 'e' \|\| m_char == 'E') {`
			`BOOST_ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number`
			`if (kind != DECIMAL) return Token::ILLEGAL;`
			`// scan exponent`
			`addLiteralCharAndAdvance();`
			`if (m_char == '+' \|\| m_char == '-')`
			`addLiteralCharAndAdvance();`
			`if (!IsDecimalDigit(m_char)) {`
			`// we must have at least one decimal digit after 'e'/'E'`
			`return Token::ILLEGAL;`
			`}`
			`scanDecimalDigits();`
			`}`

			`// The source character immediately following a numeric literal must`
			`// not be an identifier start or a decimal digit; see ECMA-262`
			`// section 7.8.3, page 17 (note that we read only one decimal digit`
			`// if the value is 0).`
			`if (IsDecimalDigit(m_char) \|\| IsIdentifierStart(m_char))`
			`return Token::ILLEGAL;`

			`literal.Complete();`

			`return Token::NUMBER;`
			`}`


			`// ----------------------------------------------------------------------------`
			`// Keyword Matcher`

			`#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`KEYWORD_GROUP('a') \`
			`KEYWORD("address", Token::BREAK) \`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`KEYWORD_GROUP('b') \`
			`KEYWORD("break", Token::BREAK) \`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`KEYWORD("bool", Token::BOOL) \`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`KEYWORD_GROUP('c') \`
			`KEYWORD("case", Token::CASE) \`
			`KEYWORD("catch", Token::CATCH) \`
			`KEYWORD("const", Token::CONST) \`
			`KEYWORD("continue", Token::CONTINUE) \`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`KEYWORD("contract", Token::CONTRACT) \`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`KEYWORD_GROUP('d') \`
			`KEYWORD("debugger", Token::DEBUGGER) \`
			`KEYWORD("default", Token::DEFAULT) \`
			`KEYWORD("delete", Token::DELETE) \`
			`KEYWORD("do", Token::DO) \`
			`KEYWORD_GROUP('e') \`
			`KEYWORD("else", Token::ELSE) \`
			`KEYWORD("enum", Token::FUTURE_RESERVED_WORD) \`
			`KEYWORD_GROUP('f') \`
			`KEYWORD("false", Token::FALSE_LITERAL) \`
			`KEYWORD("finally", Token::FINALLY) \`
			`KEYWORD("for", Token::FOR) \`
			`KEYWORD("function", Token::FUNCTION) \`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`KEYWORD_GROUP('h') \`
			`KEYWORD("hash", Token::HASH) \`
			`KEYWORD("hash32", Token::HASH32) \`
			`KEYWORD("hash64", Token::HASH64) \`
			`KEYWORD("hash128", Token::HASH128) \`
			`KEYWORD("hash256", Token::HASH256) \`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`KEYWORD_GROUP('i') \`
			`KEYWORD("if", Token::IF) \`
			`KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \`
			`KEYWORD("in", Token::IN) \`
			`KEYWORD("instanceof", Token::INSTANCEOF) \`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`KEYWORD("int", Token::INT) \`
			`KEYWORD("int32", Token::INT32) \`
			`KEYWORD("int64", Token::INT64) \`
			`KEYWORD("int128", Token::INT128) \`
			`KEYWORD("int256", Token::INT256) \`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \`
			`KEYWORD_GROUP('l') \`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`KEYWORD_GROUP('m') \`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`KEYWORD_GROUP('n') \`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`KEYWORD("mapping", Token::MAPPING) \`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`KEYWORD("new", Token::NEW) \`
			`KEYWORD("null", Token::NULL_LITERAL) \`
			`KEYWORD_GROUP('p') \`
			`KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`KEYWORD("private", Token::PRIVATE) \`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`KEYWORD("public", Token::PUBLIC) \`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`KEYWORD_GROUP('r') \`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`KEYWORD("real", Token::REAL) \`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`KEYWORD("return", Token::RETURN) \`
			`KEYWORD_GROUP('s') \`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`KEYWORD("string", Token::STRING_TYPE) \`
			`KEYWORD("struct", Token::STRUCT) \`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`KEYWORD("switch", Token::SWITCH) \`
			`KEYWORD_GROUP('t') \`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`KEYWORD("text", Token::TEXT) \`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`KEYWORD("this", Token::THIS) \`
			`KEYWORD("throw", Token::THROW) \`
			`KEYWORD("true", Token::TRUE_LITERAL) \`
			`KEYWORD("try", Token::TRY) \`
			`KEYWORD("typeof", Token::TYPEOF) \`
Solidity parser, can not parse much yet. 2014-10-07 16:25:04 +00:00			`KEYWORD_GROUP('u') \`
			`KEYWORD("uint", Token::UINT) \`
			`KEYWORD("uint32", Token::UINT32) \`
			`KEYWORD("uint64", Token::UINT64) \`
			`KEYWORD("uint128", Token::UINT128) \`
			`KEYWORD("uint256", Token::UINT256) \`
			`KEYWORD("ureal", Token::UREAL) \`
Solidity scanner and some unit tests. The scanner is a modified version of the v8 javascript scanner. 2014-10-06 15:13:52 +00:00			`KEYWORD_GROUP('v') \`
			`KEYWORD("var", Token::VAR) \`
			`KEYWORD("void", Token::VOID) \`
			`KEYWORD_GROUP('w') \`
			`KEYWORD("while", Token::WHILE) \`
			`KEYWORD("with", Token::WITH)`


			`static Token::Value KeywordOrIdentifierToken(const std::string& input)`
			`{`
			`BOOST_ASSERT(!input.empty());`
			`const int kMinLength = 2;`
			`const int kMaxLength = 10;`
			`if (input.size() < kMinLength \|\| input.size() > kMaxLength) {`
			`return Token::IDENTIFIER;`
			`}`
			`switch (input[0]) {`
			`default:`
			`#define KEYWORD_GROUP_CASE(ch) \`
			`break; \`
			`case ch:`
			`#define KEYWORD(keyword, token) \`
			`{ \`
			`/* 'keyword' is a char array, so sizeof(keyword) is */ \`
			`/* strlen(keyword) plus 1 for the NUL char. */ \`
			`const int keyword_length = sizeof(keyword) - 1; \`
			`BOOST_STATIC_ASSERT(keyword_length >= kMinLength); \`
			`BOOST_STATIC_ASSERT(keyword_length <= kMaxLength); \`
			`if (input == keyword) { \`
			`return token; \`
			`} \`
			`}`
			`KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)`
			`}`
			`return Token::IDENTIFIER;`
			`}`

			`Token::Value Scanner::scanIdentifierOrKeyword()`
			`{`
			`BOOST_ASSERT(IsIdentifierStart(m_char));`
			`LiteralScope literal(this);`

			`addLiteralCharAndAdvance();`

			`// Scan the rest of the identifier characters.`
			`while (IsIdentifierPart(m_char))`
			`addLiteralCharAndAdvance();`

			`literal.Complete();`

			`return KeywordOrIdentifierToken(m_next_token.literal);`
			`}`


			`} }`