mirror of
				https://github.com/ethereum/solidity
				synced 2023-10-03 13:03:40 +00:00 
			
		
		
		
	Merge pull request #666 from axic/feature/unicode-escape
Support unicode escape characters
This commit is contained in:
		
						commit
						3c93a22d47
					
				| @ -214,7 +214,9 @@ a non-rational number). | ||||
| String Literals | ||||
| --------------- | ||||
| 
 | ||||
| String Literals are written with double quotes (``"abc"``). As with integer literals, their type can vary, but they are implicitly convertible to ``bytes`` if they fit, to ``bytes`` and to ``string``. | ||||
| String Literals are written with double quotes (``"abc"``). As with integer literals, their type can vary, but they are implicitly convertible to ``bytes1``, ..., ``bytes32`` if they fit, to ``bytes`` and to ``string``. | ||||
| 
 | ||||
| String Literals support escape characters, such as ``\n``, ``\xNN`` and ``\uNNNN``. ``\xNN`` takes a hex value and inserts the appropriate byte, while ``\uNNNN`` takes a Unicode codepoint and inserts an UTF8 sequence. | ||||
| 
 | ||||
| .. index:: enum | ||||
| 
 | ||||
|  | ||||
| @ -177,6 +177,41 @@ bool Scanner::scanHexByte(char& o_scannedByte) | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| bool Scanner::scanUnicode(unsigned & o_codepoint) | ||||
| { | ||||
| 	unsigned x = 0; | ||||
| 	for (int i = 0; i < 4; i++) | ||||
| 	{ | ||||
| 		int d = hexValue(m_char); | ||||
| 		if (d < 0) | ||||
| 		{ | ||||
| 			rollback(i); | ||||
| 			return false; | ||||
| 		} | ||||
| 		x = x * 16 + d; | ||||
| 		advance(); | ||||
| 	} | ||||
| 	o_codepoint = x; | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| // This supports codepoints between 0000 and FFFF.
 | ||||
| void Scanner::addUnicodeAsUTF8(unsigned codepoint) | ||||
| { | ||||
| 	if (codepoint <= 0x7f) | ||||
| 		addLiteralChar(codepoint); | ||||
| 	else if (codepoint <= 0x7ff) | ||||
| 	{ | ||||
| 		addLiteralChar(0xc0 | (codepoint >> 6)); | ||||
| 		addLiteralChar(0x80 | (codepoint & 0x3f)); | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| 		addLiteralChar(0xe0 | (codepoint >> 12)); | ||||
| 		addLiteralChar(0x80 | ((codepoint >> 6) & 0x3f)); | ||||
| 		addLiteralChar(0x80 | (codepoint & 0x3f)); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Ensure that tokens can be stored in a byte.
 | ||||
| BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | ||||
| @ -607,6 +642,14 @@ bool Scanner::scanEscape() | ||||
| 	case 'v': | ||||
| 		c = '\v'; | ||||
| 		break; | ||||
| 	case 'u': | ||||
| 	{ | ||||
| 		unsigned codepoint; | ||||
| 		if (!scanUnicode(codepoint)) | ||||
| 			return false; | ||||
| 		addUnicodeAsUTF8(codepoint); | ||||
| 		return true; | ||||
| 	} | ||||
| 	case 'x': | ||||
| 		if (!scanHexByte(c)) | ||||
| 			return false; | ||||
|  | ||||
| @ -175,6 +175,7 @@ private: | ||||
| 	inline void addLiteralChar(char c) { m_nextToken.literal.push_back(c); } | ||||
| 	inline void addCommentLiteralChar(char c) { m_nextSkippedComment.literal.push_back(c); } | ||||
| 	inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); } | ||||
| 	void addUnicodeAsUTF8(unsigned codepoint); | ||||
| 	///@}
 | ||||
| 
 | ||||
| 	bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); } | ||||
| @ -185,6 +186,7 @@ private: | ||||
| 	inline Token::Value selectToken(char _next, Token::Value _then, Token::Value _else); | ||||
| 
 | ||||
| 	bool scanHexByte(char& o_scannedByte); | ||||
| 	bool scanUnicode(unsigned& o_codepoint); | ||||
| 
 | ||||
| 	/// Scans a single Solidity token.
 | ||||
| 	void scanToken(); | ||||
|  | ||||
| @ -291,6 +291,46 @@ BOOST_AUTO_TEST_CASE(empty_comment) | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(valid_unicode_string_escape) | ||||
| { | ||||
| 	Scanner scanner(CharStream("{ \"\\u00DAnicode\"")); | ||||
| 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); | ||||
| 	BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral); | ||||
| 	BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\xC3\x9Anicode", 8)); | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(valid_unicode_string_escape_7f) | ||||
| { | ||||
| 	Scanner scanner(CharStream("{ \"\\u007Fnicode\"")); | ||||
| 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); | ||||
| 	BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral); | ||||
| 	BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\x7Fnicode", 7)); | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(valid_unicode_string_escape_7ff) | ||||
| { | ||||
| 	Scanner scanner(CharStream("{ \"\\u07FFnicode\"")); | ||||
| 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); | ||||
| 	BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral); | ||||
| 	BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\xDF\xBFnicode", 8)); | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(valid_unicode_string_escape_ffff) | ||||
| { | ||||
| 	Scanner scanner(CharStream("{ \"\\uFFFFnicode\"")); | ||||
| 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); | ||||
| 	BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral); | ||||
| 	BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\xEF\xBF\xBFnicode", 9)); | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(invalid_short_unicode_string_escape) | ||||
| { | ||||
| 	Scanner scanner(CharStream("{ \"\\uFFnicode\"")); | ||||
| 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); | ||||
| 	BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| BOOST_AUTO_TEST_SUITE_END() | ||||
| 
 | ||||
| } | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user