mirror of
https://github.com/ethereum/solidity
synced 2023-10-03 13:03:40 +00:00
Merge pull request #666 from axic/feature/unicode-escape
Support unicode escape characters
This commit is contained in:
commit
3c93a22d47
@ -214,7 +214,9 @@ a non-rational number).
|
|||||||
String Literals
|
String Literals
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
String Literals are written with double quotes (``"abc"``). As with integer literals, their type can vary, but they are implicitly convertible to ``bytes`` if they fit, to ``bytes`` and to ``string``.
|
String Literals are written with double quotes (``"abc"``). As with integer literals, their type can vary, but they are implicitly convertible to ``bytes1``, ..., ``bytes32`` if they fit, to ``bytes`` and to ``string``.
|
||||||
|
|
||||||
|
String Literals support escape characters, such as ``\n``, ``\xNN`` and ``\uNNNN``. ``\xNN`` takes a hex value and inserts the appropriate byte, while ``\uNNNN`` takes a Unicode codepoint and inserts an UTF8 sequence.
|
||||||
|
|
||||||
.. index:: enum
|
.. index:: enum
|
||||||
|
|
||||||
|
@ -177,6 +177,41 @@ bool Scanner::scanHexByte(char& o_scannedByte)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Scanner::scanUnicode(unsigned & o_codepoint)
|
||||||
|
{
|
||||||
|
unsigned x = 0;
|
||||||
|
for (int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
int d = hexValue(m_char);
|
||||||
|
if (d < 0)
|
||||||
|
{
|
||||||
|
rollback(i);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
x = x * 16 + d;
|
||||||
|
advance();
|
||||||
|
}
|
||||||
|
o_codepoint = x;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This supports codepoints between 0000 and FFFF.
|
||||||
|
void Scanner::addUnicodeAsUTF8(unsigned codepoint)
|
||||||
|
{
|
||||||
|
if (codepoint <= 0x7f)
|
||||||
|
addLiteralChar(codepoint);
|
||||||
|
else if (codepoint <= 0x7ff)
|
||||||
|
{
|
||||||
|
addLiteralChar(0xc0 | (codepoint >> 6));
|
||||||
|
addLiteralChar(0x80 | (codepoint & 0x3f));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
addLiteralChar(0xe0 | (codepoint >> 12));
|
||||||
|
addLiteralChar(0x80 | ((codepoint >> 6) & 0x3f));
|
||||||
|
addLiteralChar(0x80 | (codepoint & 0x3f));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Ensure that tokens can be stored in a byte.
|
// Ensure that tokens can be stored in a byte.
|
||||||
BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
|
BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
|
||||||
@ -607,6 +642,14 @@ bool Scanner::scanEscape()
|
|||||||
case 'v':
|
case 'v':
|
||||||
c = '\v';
|
c = '\v';
|
||||||
break;
|
break;
|
||||||
|
case 'u':
|
||||||
|
{
|
||||||
|
unsigned codepoint;
|
||||||
|
if (!scanUnicode(codepoint))
|
||||||
|
return false;
|
||||||
|
addUnicodeAsUTF8(codepoint);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
case 'x':
|
case 'x':
|
||||||
if (!scanHexByte(c))
|
if (!scanHexByte(c))
|
||||||
return false;
|
return false;
|
||||||
|
@ -175,6 +175,7 @@ private:
|
|||||||
inline void addLiteralChar(char c) { m_nextToken.literal.push_back(c); }
|
inline void addLiteralChar(char c) { m_nextToken.literal.push_back(c); }
|
||||||
inline void addCommentLiteralChar(char c) { m_nextSkippedComment.literal.push_back(c); }
|
inline void addCommentLiteralChar(char c) { m_nextSkippedComment.literal.push_back(c); }
|
||||||
inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); }
|
inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); }
|
||||||
|
void addUnicodeAsUTF8(unsigned codepoint);
|
||||||
///@}
|
///@}
|
||||||
|
|
||||||
bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); }
|
bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); }
|
||||||
@ -185,6 +186,7 @@ private:
|
|||||||
inline Token::Value selectToken(char _next, Token::Value _then, Token::Value _else);
|
inline Token::Value selectToken(char _next, Token::Value _then, Token::Value _else);
|
||||||
|
|
||||||
bool scanHexByte(char& o_scannedByte);
|
bool scanHexByte(char& o_scannedByte);
|
||||||
|
bool scanUnicode(unsigned& o_codepoint);
|
||||||
|
|
||||||
/// Scans a single Solidity token.
|
/// Scans a single Solidity token.
|
||||||
void scanToken();
|
void scanToken();
|
||||||
|
@ -291,6 +291,46 @@ BOOST_AUTO_TEST_CASE(empty_comment)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(valid_unicode_string_escape)
|
||||||
|
{
|
||||||
|
Scanner scanner(CharStream("{ \"\\u00DAnicode\""));
|
||||||
|
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
|
||||||
|
BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral);
|
||||||
|
BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\xC3\x9Anicode", 8));
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(valid_unicode_string_escape_7f)
|
||||||
|
{
|
||||||
|
Scanner scanner(CharStream("{ \"\\u007Fnicode\""));
|
||||||
|
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
|
||||||
|
BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral);
|
||||||
|
BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\x7Fnicode", 7));
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(valid_unicode_string_escape_7ff)
|
||||||
|
{
|
||||||
|
Scanner scanner(CharStream("{ \"\\u07FFnicode\""));
|
||||||
|
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
|
||||||
|
BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral);
|
||||||
|
BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\xDF\xBFnicode", 8));
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(valid_unicode_string_escape_ffff)
|
||||||
|
{
|
||||||
|
Scanner scanner(CharStream("{ \"\\uFFFFnicode\""));
|
||||||
|
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
|
||||||
|
BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral);
|
||||||
|
BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\xEF\xBF\xBFnicode", 9));
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE(invalid_short_unicode_string_escape)
|
||||||
|
{
|
||||||
|
Scanner scanner(CharStream("{ \"\\uFFnicode\""));
|
||||||
|
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
|
||||||
|
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_SUITE_END()
|
BOOST_AUTO_TEST_SUITE_END()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user