mirror of
https://github.com/ethereum/solidity
synced 2023-10-03 13:03:40 +00:00
Merge pull request #9412 from ethereum/unicode-string
[BREAKING] Support unicode string literal type
This commit is contained in:
commit
53d497fc31
@ -7,6 +7,8 @@ Breaking changes:
|
||||
* JSON AST: Remove members with ``null`` value from JSON output.
|
||||
* Parser: Disallow ``gwei`` as identifier.
|
||||
* Parser: Disallow dot syntax for ``value`` and ``gas``.
|
||||
* Parser: Disallow non-printable characters in string literals.
|
||||
* Parser: Introduce Unicode string literals: ``unicode"😃"``.
|
||||
* Parser: NatSpec comments on variables are only allowed for public state variables.
|
||||
* Parser: Remove the ``finney`` and ``szabo`` denominations.
|
||||
* Parser: Remove the identifier ``now`` (replaced by ``block.timestamp``).
|
||||
|
@ -25,16 +25,24 @@ Changes to the Syntax
|
||||
* In external function and contract creation calls, Ether and gas is now specified using a new syntax:
|
||||
``x.f{gas: 10000, value: 2 ether}(arg1, arg2)``.
|
||||
The old syntax -- ``x.f.gas(10000).value(2 ether)(arg1, arg2)`` -- will cause an error.
|
||||
|
||||
* The global variable ``now`` is deprecated, ``block.timestamp`` should be used instead.
|
||||
The single identifier ``now`` is too generic for a global variable and could give the impression
|
||||
that it changes during transaction processing, whereas ``block.timestamp`` correctly
|
||||
reflects the fact that it is just a property of the block.
|
||||
|
||||
* NatSpec comments on variables are only allowed for public state variables and not
|
||||
for local or internal variables.
|
||||
|
||||
* The token ``gwei`` is a keyword now (used to specify, e.g. ``2 gwei`` as a number)
|
||||
and cannot be used as an identifier.
|
||||
|
||||
* String literals now can only contain printable ASCII characters and this also includes a variety of
|
||||
escape sequences, such as hexadecimal (``\xff``) and unicode escapes (``\u20ac``).
|
||||
|
||||
* Unicode string literals are supported now to accommodate valid UTF-8 sequences. They are identified
|
||||
with the ``unicode`` prefix: ``unicode"Hello 😃"``.
|
||||
|
||||
* State Mutability: The state mutability of functions can now be restricted during inheritance:
|
||||
Functions with default state mutability can be overridden by ``pure`` and ``view`` functions
|
||||
while ``view`` functions can be overridden by ``pure`` functions.
|
||||
|
@ -253,6 +253,7 @@ primaryExpression
|
||||
| numberLiteral
|
||||
| hexLiteral
|
||||
| stringLiteral
|
||||
| unicodeStringLiteral
|
||||
| identifier ('[' ']')?
|
||||
| TypeKeyword
|
||||
| tupleExpression
|
||||
@ -461,6 +462,13 @@ StringLiteralFragment
|
||||
: '"' DoubleQuotedStringCharacter* '"'
|
||||
| '\'' SingleQuotedStringCharacter* '\'' ;
|
||||
|
||||
unicodeStringLiteral
|
||||
: UnicodeStringLiteralFragment+ ;
|
||||
|
||||
UnicodeStringLiteralFragment
|
||||
: 'unicode"' DoubleQuotedStringCharacter* '"'
|
||||
| 'unicode\'' SingleQuotedStringCharacter* '\'' ;
|
||||
|
||||
fragment
|
||||
DoubleQuotedStringCharacter
|
||||
: ~["\r\n\\] | ('\\' .) ;
|
||||
|
@ -484,7 +484,9 @@ String literals are written with either double or single-quotes (``"foo"`` or ``
|
||||
|
||||
For example, with ``bytes32 samevar = "stringliteral"`` the string literal is interpreted in its raw byte form when assigned to a ``bytes32`` type.
|
||||
|
||||
String literals support the following escape characters:
|
||||
String literals can only contain printable ASCII characters, which means the characters between and including 0x1F .. 0x7E.
|
||||
|
||||
Additionally, string literals also support the following escape characters:
|
||||
|
||||
- ``\<newline>`` (escapes an actual newline)
|
||||
- ``\\`` (backslash)
|
||||
@ -511,9 +513,19 @@ character sequence ``abcdef``.
|
||||
"\n\"\'\\abc\
|
||||
def"
|
||||
|
||||
Any unicode line terminator which is not a newline (i.e. LF, VF, FF, CR, NEL, LS, PS) is considered to
|
||||
Any Unicode line terminator which is not a newline (i.e. LF, VF, FF, CR, NEL, LS, PS) is considered to
|
||||
terminate the string literal. Newline only terminates the string literal if it is not preceded by a ``\``.
|
||||
|
||||
Unicode Literals
|
||||
----------------
|
||||
|
||||
While regular string literals can only contain ASCII, Unicode literals – prefixed with the keyword ``unicode`` – can contain any valid UTF-8 sequence.
|
||||
They also support the very same escape sequences as regular string literals.
|
||||
|
||||
::
|
||||
|
||||
string memory a = unicode"Hello 😃";
|
||||
|
||||
.. index:: literal, bytes
|
||||
|
||||
Hexadecimal Literals
|
||||
|
@ -73,6 +73,7 @@ string to_string(ScannerError _errorCode)
|
||||
case ScannerError::IllegalHexDigit: return "Hexadecimal digit missing or invalid.";
|
||||
case ScannerError::IllegalCommentTerminator: return "Expected multi-line comment-terminator.";
|
||||
case ScannerError::IllegalEscapeSequence: return "Invalid escape sequence.";
|
||||
case ScannerError::IllegalCharacterInString: return "Invalid character in string.";
|
||||
case ScannerError::IllegalStringEndQuote: return "Expected string end-quote.";
|
||||
case ScannerError::IllegalNumberSeparator: return "Invalid use of number separator '_'.";
|
||||
case ScannerError::IllegalExponent: return "Invalid exponent.";
|
||||
@ -508,7 +509,7 @@ void Scanner::scanToken()
|
||||
{
|
||||
case '"':
|
||||
case '\'':
|
||||
token = scanString();
|
||||
token = scanString(false);
|
||||
break;
|
||||
case '<':
|
||||
// < <= << <<=
|
||||
@ -683,6 +684,18 @@ void Scanner::scanToken()
|
||||
else
|
||||
token = setError(ScannerError::IllegalToken);
|
||||
}
|
||||
else if (token == Token::Unicode)
|
||||
{
|
||||
// reset
|
||||
m = 0;
|
||||
n = 0;
|
||||
|
||||
// Special quoted hex string must follow
|
||||
if (m_char == '"' || m_char == '\'')
|
||||
token = scanString(true);
|
||||
else
|
||||
token = setError(ScannerError::IllegalToken);
|
||||
}
|
||||
}
|
||||
else if (isDecimalDigit(m_char))
|
||||
token = scanNumber();
|
||||
@ -774,7 +787,7 @@ bool Scanner::isUnicodeLinebreak()
|
||||
return false;
|
||||
}
|
||||
|
||||
Token Scanner::scanString()
|
||||
Token Scanner::scanString(bool const _isUnicode)
|
||||
{
|
||||
char const quote = m_char;
|
||||
advance(); // consume quote
|
||||
@ -789,13 +802,23 @@ Token Scanner::scanString()
|
||||
return setError(ScannerError::IllegalEscapeSequence);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Report error on non-printable characters in string literals, however
|
||||
// allow anything for unicode string literals, because their validity will
|
||||
// be verified later (in the syntax checker).
|
||||
//
|
||||
// We are using a manual range and not isprint() to avoid
|
||||
// any potential complications with locale.
|
||||
if (!_isUnicode && (static_cast<unsigned>(c) <= 0x1f || static_cast<unsigned>(c) >= 0x7f))
|
||||
return setError(ScannerError::IllegalCharacterInString);
|
||||
addLiteralChar(c);
|
||||
}
|
||||
}
|
||||
if (m_char != quote)
|
||||
return setError(ScannerError::IllegalStringEndQuote);
|
||||
literal.complete();
|
||||
advance(); // consume quote
|
||||
return Token::StringLiteral;
|
||||
return _isUnicode ? Token::UnicodeStringLiteral : Token::StringLiteral;
|
||||
}
|
||||
|
||||
Token Scanner::scanHexString()
|
||||
|
@ -77,6 +77,7 @@ enum class ScannerError
|
||||
IllegalHexDigit,
|
||||
IllegalCommentTerminator,
|
||||
IllegalEscapeSequence,
|
||||
IllegalCharacterInString,
|
||||
IllegalStringEndQuote,
|
||||
IllegalNumberSeparator,
|
||||
IllegalExponent,
|
||||
@ -228,7 +229,7 @@ private:
|
||||
Token scanNumber(char _charSeen = 0);
|
||||
std::tuple<Token, unsigned, unsigned> scanIdentifierOrKeyword();
|
||||
|
||||
Token scanString();
|
||||
Token scanString(bool const _isUnicode);
|
||||
Token scanHexString();
|
||||
/// Scans a single line comment and returns its corrected end position.
|
||||
size_t scanSingleLineDocComment();
|
||||
|
@ -190,6 +190,7 @@ namespace solidity::langutil
|
||||
K(Throw, "throw", 0) \
|
||||
K(Try, "try", 0) \
|
||||
K(Type, "type", 0) \
|
||||
K(Unicode, "unicode", 0) \
|
||||
K(Using, "using", 0) \
|
||||
K(View, "view", 0) \
|
||||
K(Virtual, "virtual", 0) \
|
||||
@ -227,6 +228,7 @@ namespace solidity::langutil
|
||||
K(FalseLiteral, "false", 0) \
|
||||
T(Number, nullptr, 0) \
|
||||
T(StringLiteral, nullptr, 0) \
|
||||
T(UnicodeStringLiteral, nullptr, 0) \
|
||||
T(HexStringLiteral, nullptr, 0) \
|
||||
T(CommentLiteral, nullptr, 0) \
|
||||
\
|
||||
|
@ -28,6 +28,8 @@
|
||||
#include <liblangutil/ErrorReporter.h>
|
||||
#include <liblangutil/SemVerHandler.h>
|
||||
|
||||
#include <libsolutil/UTF8.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include <memory>
|
||||
@ -37,7 +39,7 @@ using namespace std;
|
||||
using namespace solidity;
|
||||
using namespace solidity::langutil;
|
||||
using namespace solidity::frontend;
|
||||
|
||||
using namespace solidity::util;
|
||||
|
||||
bool SyntaxChecker::checkSyntax(ASTNode const& _astRoot)
|
||||
{
|
||||
@ -217,6 +219,13 @@ bool SyntaxChecker::visit(Throw const& _throwStatement)
|
||||
|
||||
bool SyntaxChecker::visit(Literal const& _literal)
|
||||
{
|
||||
if ((_literal.token() == Token::UnicodeStringLiteral) && !validateUTF8(_literal.value()))
|
||||
m_errorReporter.syntaxError(
|
||||
8452_error,
|
||||
_literal.location(),
|
||||
"Invalid UTF-8 sequence found"
|
||||
);
|
||||
|
||||
if (_literal.token() != Token::Number)
|
||||
return true;
|
||||
|
||||
|
@ -920,6 +920,8 @@ string ASTJsonConverter::literalTokenKind(Token _token)
|
||||
return "number";
|
||||
case Token::StringLiteral:
|
||||
return "string";
|
||||
case Token::UnicodeStringLiteral:
|
||||
return "unicodeString";
|
||||
case Token::HexStringLiteral:
|
||||
return "hexString";
|
||||
case Token::TrueLiteral:
|
||||
|
@ -943,6 +943,8 @@ Token ASTJsonImporter::literalTokenKind(Json::Value const& _node)
|
||||
tok = Token::Number;
|
||||
else if (_node["kind"].asString() == "string")
|
||||
tok = Token::StringLiteral;
|
||||
else if (_node["kind"].asString() == "unicodeString")
|
||||
tok = Token::UnicodeStringLiteral;
|
||||
else if (_node["kind"].asString() == "hexString")
|
||||
tok = Token::HexStringLiteral;
|
||||
else if (_node["kind"].asString() == "bool")
|
||||
|
@ -349,6 +349,7 @@ TypePointer TypeProvider::forLiteral(Literal const& _literal)
|
||||
case Token::Number:
|
||||
return rationalNumber(_literal);
|
||||
case Token::StringLiteral:
|
||||
case Token::UnicodeStringLiteral:
|
||||
case Token::HexStringLiteral:
|
||||
return stringLiteral(_literal.value());
|
||||
default:
|
||||
|
@ -1408,7 +1408,7 @@ BoolResult StringLiteralType::isImplicitlyConvertibleTo(Type const& _convertTo)
|
||||
return
|
||||
arrayType->isByteArray() &&
|
||||
!(arrayType->dataStoredIn(DataLocation::Storage) && arrayType->isPointer()) &&
|
||||
!(arrayType->isString() && !isValidUTF8());
|
||||
!(arrayType->isString() && !util::validateUTF8(value()));
|
||||
else
|
||||
return false;
|
||||
}
|
||||
@ -1442,11 +1442,6 @@ TypePointer StringLiteralType::mobileType() const
|
||||
return TypeProvider::stringMemory();
|
||||
}
|
||||
|
||||
bool StringLiteralType::isValidUTF8() const
|
||||
{
|
||||
return util::validateUTF8(m_value);
|
||||
}
|
||||
|
||||
FixedBytesType::FixedBytesType(unsigned _bytes): m_bytes(_bytes)
|
||||
{
|
||||
solAssert(
|
||||
|
@ -629,8 +629,6 @@ public:
|
||||
std::string toString(bool) const override;
|
||||
TypePointer mobileType() const override;
|
||||
|
||||
bool isValidUTF8() const;
|
||||
|
||||
std::string const& value() const { return m_value; }
|
||||
|
||||
protected:
|
||||
|
@ -1782,6 +1782,7 @@ ASTPointer<Expression> Parser::parsePrimaryExpression()
|
||||
}
|
||||
break;
|
||||
case Token::StringLiteral:
|
||||
case Token::UnicodeStringLiteral:
|
||||
case Token::HexStringLiteral:
|
||||
{
|
||||
string literal = m_scanner->currentLiteral();
|
||||
|
@ -7,29 +7,30 @@ Warning: Source file does not specify required compiler version!
|
||||
Warning: Statement has no effect.
|
||||
--> message_format_utf8/input.sol:2:51:
|
||||
|
|
||||
2 | /* ©©©©ᄅ©©©©© 2017 */ constructor () { "©©©©ᄅ©©©©©" ; }
|
||||
| ^^^^^^^^^^^^
|
||||
2 | /* ©©©©ᄅ©©©©© 2017 */ constructor () { unicode"©©©©ᄅ©©©©©" ; }
|
||||
| ^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Warning: Statement has no effect.
|
||||
--> message_format_utf8/input.sol:6:25:
|
||||
|
|
||||
6 | "S = π × r²";
|
||||
| ^^^^^^^^^^^^
|
||||
6 | unicode"S = π × r²";
|
||||
| ^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Warning: Statement has no effect.
|
||||
--> message_format_utf8/input.sol:7:39:
|
||||
|
|
||||
7 | /* ₀₁₂₃₄⁵⁶⁷⁸⁹ */ "∑ 1/n! ≈ 2.7"; // tabs in-between
|
||||
| ^^^^^^^^^^^^^^
|
||||
7 | /* ₀₁₂₃₄⁵⁶⁷⁸⁹ */ unicode"∑ 1/n! ≈ 2.7"; // tabs in-between
|
||||
| ^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Warning: Statement has no effect.
|
||||
--> message_format_utf8/input.sol:8:30:
|
||||
|
|
||||
8 | /* Ŀŏŗėɯ ïƥŝʉɱ */ "μὴ χεῖρον βέλτιστον"; // tabs in-between and inside
|
||||
| ^^^ ^^^^^^ ^^^^^^^^^^
|
||||
8 | /* Ŀŏŗėɯ ïƥŝʉɱ */ unicode"μὴ χεῖρον βέλτιστον"; // tabs in-between and inside
|
||||
| ^^^^^^^^^^ ^^^^^^ ^^^^^^^^^^
|
||||
|
||||
Warning: Function state mutability can be restricted to pure
|
||||
--> message_format_utf8/input.sol:12:2:
|
||||
|
|
||||
12 | function selector() public returns(uint) { // starts with tab
|
||||
| ^ (Relevant source part starts here and spans across multiple lines).
|
||||
|
||||
|
@ -1,11 +1,11 @@
|
||||
contract Foo {
|
||||
/* ©©©©ᄅ©©©©© 2017 */ constructor () { "©©©©ᄅ©©©©©" ; }
|
||||
/* ©©©©ᄅ©©©©© 2017 */ constructor () { unicode"©©©©ᄅ©©©©©" ; }
|
||||
|
||||
function f() public pure {
|
||||
|
||||
"S = π × r²";
|
||||
/* ₀₁₂₃₄⁵⁶⁷⁸⁹ */ "∑ 1/n! ≈ 2.7"; // tabs in-between
|
||||
/* Ŀŏŗėɯ ïƥŝʉɱ */ "μὴ χεῖρον βέλτιστον"; // tabs in-between and inside
|
||||
unicode"S = π × r²";
|
||||
/* ₀₁₂₃₄⁵⁶⁷⁸⁹ */ unicode"∑ 1/n! ≈ 2.7"; // tabs in-between
|
||||
/* Ŀŏŗėɯ ïƥŝʉɱ */ unicode"μὴ χεῖρον βέλτιστον"; // tabs in-between and inside
|
||||
|
||||
}
|
||||
|
||||
|
131
test/libsolidity/ASTJSON/string.json
Normal file
131
test/libsolidity/ASTJSON/string.json
Normal file
@ -0,0 +1,131 @@
|
||||
{
|
||||
"absolutePath": "a",
|
||||
"exportedSymbols":
|
||||
{
|
||||
"C":
|
||||
[
|
||||
9
|
||||
]
|
||||
},
|
||||
"id": 10,
|
||||
"nodeType": "SourceUnit",
|
||||
"nodes":
|
||||
[
|
||||
{
|
||||
"abstract": false,
|
||||
"baseContracts": [],
|
||||
"contractDependencies": [],
|
||||
"contractKind": "contract",
|
||||
"fullyImplemented": true,
|
||||
"id": 9,
|
||||
"linearizedBaseContracts":
|
||||
[
|
||||
9
|
||||
],
|
||||
"name": "C",
|
||||
"nodeType": "ContractDefinition",
|
||||
"nodes":
|
||||
[
|
||||
{
|
||||
"body":
|
||||
{
|
||||
"id": 7,
|
||||
"nodeType": "Block",
|
||||
"src": "33:36:1",
|
||||
"statements":
|
||||
[
|
||||
{
|
||||
"assignments":
|
||||
[
|
||||
4
|
||||
],
|
||||
"declarations":
|
||||
[
|
||||
{
|
||||
"constant": false,
|
||||
"id": 4,
|
||||
"mutability": "mutable",
|
||||
"name": "x",
|
||||
"nodeType": "VariableDeclaration",
|
||||
"scope": 7,
|
||||
"src": "35:15:1",
|
||||
"stateVariable": false,
|
||||
"storageLocation": "memory",
|
||||
"typeDescriptions":
|
||||
{
|
||||
"typeIdentifier": "t_string_memory_ptr",
|
||||
"typeString": "string"
|
||||
},
|
||||
"typeName":
|
||||
{
|
||||
"id": 3,
|
||||
"name": "string",
|
||||
"nodeType": "ElementaryTypeName",
|
||||
"src": "35:6:1",
|
||||
"typeDescriptions":
|
||||
{
|
||||
"typeIdentifier": "t_string_storage_ptr",
|
||||
"typeString": "string"
|
||||
}
|
||||
},
|
||||
"visibility": "internal"
|
||||
}
|
||||
],
|
||||
"id": 6,
|
||||
"initialValue":
|
||||
{
|
||||
"hexValue": "48656c6c6f20576f726c64",
|
||||
"id": 5,
|
||||
"isConstant": false,
|
||||
"isLValue": false,
|
||||
"isPure": true,
|
||||
"kind": "string",
|
||||
"lValueRequested": false,
|
||||
"nodeType": "Literal",
|
||||
"src": "53:13:1",
|
||||
"typeDescriptions":
|
||||
{
|
||||
"typeIdentifier": "t_stringliteral_592fa743889fc7f92ac2a37bb1f5ba1daf2a5c84741ca0e0061d243a2e6707ba",
|
||||
"typeString": "literal_string \"Hello World\""
|
||||
},
|
||||
"value": "Hello World"
|
||||
},
|
||||
"nodeType": "VariableDeclarationStatement",
|
||||
"src": "35:31:1"
|
||||
}
|
||||
]
|
||||
},
|
||||
"functionSelector": "26121ff0",
|
||||
"id": 8,
|
||||
"implemented": true,
|
||||
"kind": "function",
|
||||
"modifiers": [],
|
||||
"name": "f",
|
||||
"nodeType": "FunctionDefinition",
|
||||
"parameters":
|
||||
{
|
||||
"id": 1,
|
||||
"nodeType": "ParameterList",
|
||||
"parameters": [],
|
||||
"src": "23:2:1"
|
||||
},
|
||||
"returnParameters":
|
||||
{
|
||||
"id": 2,
|
||||
"nodeType": "ParameterList",
|
||||
"parameters": [],
|
||||
"src": "33:0:1"
|
||||
},
|
||||
"scope": 9,
|
||||
"src": "13:56:1",
|
||||
"stateMutability": "nonpayable",
|
||||
"virtual": false,
|
||||
"visibility": "public"
|
||||
}
|
||||
],
|
||||
"scope": 10,
|
||||
"src": "0:71:1"
|
||||
}
|
||||
],
|
||||
"src": "0:72:1"
|
||||
}
|
3
test/libsolidity/ASTJSON/string.sol
Normal file
3
test/libsolidity/ASTJSON/string.sol
Normal file
@ -0,0 +1,3 @@
|
||||
contract C { function f() public { string memory x = "Hello World"; } }
|
||||
|
||||
// ----
|
165
test/libsolidity/ASTJSON/string_legacy.json
Normal file
165
test/libsolidity/ASTJSON/string_legacy.json
Normal file
@ -0,0 +1,165 @@
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"absolutePath": "a",
|
||||
"exportedSymbols":
|
||||
{
|
||||
"C":
|
||||
[
|
||||
9
|
||||
]
|
||||
}
|
||||
},
|
||||
"children":
|
||||
[
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"abstract": false,
|
||||
"baseContracts":
|
||||
[
|
||||
null
|
||||
],
|
||||
"contractDependencies":
|
||||
[
|
||||
null
|
||||
],
|
||||
"contractKind": "contract",
|
||||
"fullyImplemented": true,
|
||||
"linearizedBaseContracts":
|
||||
[
|
||||
9
|
||||
],
|
||||
"name": "C",
|
||||
"scope": 10
|
||||
},
|
||||
"children":
|
||||
[
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"functionSelector": "26121ff0",
|
||||
"implemented": true,
|
||||
"isConstructor": false,
|
||||
"kind": "function",
|
||||
"modifiers":
|
||||
[
|
||||
null
|
||||
],
|
||||
"name": "f",
|
||||
"scope": 9,
|
||||
"stateMutability": "nonpayable",
|
||||
"virtual": false,
|
||||
"visibility": "public"
|
||||
},
|
||||
"children":
|
||||
[
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"parameters":
|
||||
[
|
||||
null
|
||||
]
|
||||
},
|
||||
"children": [],
|
||||
"id": 1,
|
||||
"name": "ParameterList",
|
||||
"src": "23:2:1"
|
||||
},
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"parameters":
|
||||
[
|
||||
null
|
||||
]
|
||||
},
|
||||
"children": [],
|
||||
"id": 2,
|
||||
"name": "ParameterList",
|
||||
"src": "33:0:1"
|
||||
},
|
||||
{
|
||||
"children":
|
||||
[
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"assignments":
|
||||
[
|
||||
4
|
||||
]
|
||||
},
|
||||
"children":
|
||||
[
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"constant": false,
|
||||
"mutability": "mutable",
|
||||
"name": "x",
|
||||
"scope": 7,
|
||||
"stateVariable": false,
|
||||
"storageLocation": "memory",
|
||||
"type": "string",
|
||||
"visibility": "internal"
|
||||
},
|
||||
"children":
|
||||
[
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"name": "string",
|
||||
"type": "string"
|
||||
},
|
||||
"id": 3,
|
||||
"name": "ElementaryTypeName",
|
||||
"src": "35:6:1"
|
||||
}
|
||||
],
|
||||
"id": 4,
|
||||
"name": "VariableDeclaration",
|
||||
"src": "35:15:1"
|
||||
},
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"hexvalue": "48656c6c6f20576f726c64",
|
||||
"isConstant": false,
|
||||
"isLValue": false,
|
||||
"isPure": true,
|
||||
"lValueRequested": false,
|
||||
"token": "string",
|
||||
"type": "literal_string \"Hello World\"",
|
||||
"value": "Hello World"
|
||||
},
|
||||
"id": 5,
|
||||
"name": "Literal",
|
||||
"src": "53:13:1"
|
||||
}
|
||||
],
|
||||
"id": 6,
|
||||
"name": "VariableDeclarationStatement",
|
||||
"src": "35:31:1"
|
||||
}
|
||||
],
|
||||
"id": 7,
|
||||
"name": "Block",
|
||||
"src": "33:36:1"
|
||||
}
|
||||
],
|
||||
"id": 8,
|
||||
"name": "FunctionDefinition",
|
||||
"src": "13:56:1"
|
||||
}
|
||||
],
|
||||
"id": 9,
|
||||
"name": "ContractDefinition",
|
||||
"src": "0:71:1"
|
||||
}
|
||||
],
|
||||
"id": 10,
|
||||
"name": "SourceUnit",
|
||||
"src": "0:72:1"
|
||||
}
|
131
test/libsolidity/ASTJSON/unicode.json
Normal file
131
test/libsolidity/ASTJSON/unicode.json
Normal file
@ -0,0 +1,131 @@
|
||||
{
|
||||
"absolutePath": "a",
|
||||
"exportedSymbols":
|
||||
{
|
||||
"C":
|
||||
[
|
||||
9
|
||||
]
|
||||
},
|
||||
"id": 10,
|
||||
"nodeType": "SourceUnit",
|
||||
"nodes":
|
||||
[
|
||||
{
|
||||
"abstract": false,
|
||||
"baseContracts": [],
|
||||
"contractDependencies": [],
|
||||
"contractKind": "contract",
|
||||
"fullyImplemented": true,
|
||||
"id": 9,
|
||||
"linearizedBaseContracts":
|
||||
[
|
||||
9
|
||||
],
|
||||
"name": "C",
|
||||
"nodeType": "ContractDefinition",
|
||||
"nodes":
|
||||
[
|
||||
{
|
||||
"body":
|
||||
{
|
||||
"id": 7,
|
||||
"nodeType": "Block",
|
||||
"src": "33:42:1",
|
||||
"statements":
|
||||
[
|
||||
{
|
||||
"assignments":
|
||||
[
|
||||
4
|
||||
],
|
||||
"declarations":
|
||||
[
|
||||
{
|
||||
"constant": false,
|
||||
"id": 4,
|
||||
"mutability": "mutable",
|
||||
"name": "x",
|
||||
"nodeType": "VariableDeclaration",
|
||||
"scope": 7,
|
||||
"src": "35:15:1",
|
||||
"stateVariable": false,
|
||||
"storageLocation": "memory",
|
||||
"typeDescriptions":
|
||||
{
|
||||
"typeIdentifier": "t_string_memory_ptr",
|
||||
"typeString": "string"
|
||||
},
|
||||
"typeName":
|
||||
{
|
||||
"id": 3,
|
||||
"name": "string",
|
||||
"nodeType": "ElementaryTypeName",
|
||||
"src": "35:6:1",
|
||||
"typeDescriptions":
|
||||
{
|
||||
"typeIdentifier": "t_string_storage_ptr",
|
||||
"typeString": "string"
|
||||
}
|
||||
},
|
||||
"visibility": "internal"
|
||||
}
|
||||
],
|
||||
"id": 6,
|
||||
"initialValue":
|
||||
{
|
||||
"hexValue": "48656c6c6f20f09f9883",
|
||||
"id": 5,
|
||||
"isConstant": false,
|
||||
"isLValue": false,
|
||||
"isPure": true,
|
||||
"kind": "unicodeString",
|
||||
"lValueRequested": false,
|
||||
"nodeType": "Literal",
|
||||
"src": "53:19:1",
|
||||
"typeDescriptions":
|
||||
{
|
||||
"typeIdentifier": "t_stringliteral_cd7a99177cebb3d14b8cc54e313dbf76867c71cd6fbb9a33ce3870dc80e9992b",
|
||||
"typeString": "literal_string \"Hello \ud83d\ude03\""
|
||||
},
|
||||
"value": "Hello \ud83d\ude03"
|
||||
},
|
||||
"nodeType": "VariableDeclarationStatement",
|
||||
"src": "35:37:1"
|
||||
}
|
||||
]
|
||||
},
|
||||
"functionSelector": "26121ff0",
|
||||
"id": 8,
|
||||
"implemented": true,
|
||||
"kind": "function",
|
||||
"modifiers": [],
|
||||
"name": "f",
|
||||
"nodeType": "FunctionDefinition",
|
||||
"parameters":
|
||||
{
|
||||
"id": 1,
|
||||
"nodeType": "ParameterList",
|
||||
"parameters": [],
|
||||
"src": "23:2:1"
|
||||
},
|
||||
"returnParameters":
|
||||
{
|
||||
"id": 2,
|
||||
"nodeType": "ParameterList",
|
||||
"parameters": [],
|
||||
"src": "33:0:1"
|
||||
},
|
||||
"scope": 9,
|
||||
"src": "13:62:1",
|
||||
"stateMutability": "nonpayable",
|
||||
"virtual": false,
|
||||
"visibility": "public"
|
||||
}
|
||||
],
|
||||
"scope": 10,
|
||||
"src": "0:77:1"
|
||||
}
|
||||
],
|
||||
"src": "0:78:1"
|
||||
}
|
3
test/libsolidity/ASTJSON/unicode.sol
Normal file
3
test/libsolidity/ASTJSON/unicode.sol
Normal file
@ -0,0 +1,3 @@
|
||||
contract C { function f() public { string memory x = unicode"Hello 😃"; } }
|
||||
|
||||
// ----
|
165
test/libsolidity/ASTJSON/unicode_legacy.json
Normal file
165
test/libsolidity/ASTJSON/unicode_legacy.json
Normal file
@ -0,0 +1,165 @@
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"absolutePath": "a",
|
||||
"exportedSymbols":
|
||||
{
|
||||
"C":
|
||||
[
|
||||
9
|
||||
]
|
||||
}
|
||||
},
|
||||
"children":
|
||||
[
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"abstract": false,
|
||||
"baseContracts":
|
||||
[
|
||||
null
|
||||
],
|
||||
"contractDependencies":
|
||||
[
|
||||
null
|
||||
],
|
||||
"contractKind": "contract",
|
||||
"fullyImplemented": true,
|
||||
"linearizedBaseContracts":
|
||||
[
|
||||
9
|
||||
],
|
||||
"name": "C",
|
||||
"scope": 10
|
||||
},
|
||||
"children":
|
||||
[
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"functionSelector": "26121ff0",
|
||||
"implemented": true,
|
||||
"isConstructor": false,
|
||||
"kind": "function",
|
||||
"modifiers":
|
||||
[
|
||||
null
|
||||
],
|
||||
"name": "f",
|
||||
"scope": 9,
|
||||
"stateMutability": "nonpayable",
|
||||
"virtual": false,
|
||||
"visibility": "public"
|
||||
},
|
||||
"children":
|
||||
[
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"parameters":
|
||||
[
|
||||
null
|
||||
]
|
||||
},
|
||||
"children": [],
|
||||
"id": 1,
|
||||
"name": "ParameterList",
|
||||
"src": "23:2:1"
|
||||
},
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"parameters":
|
||||
[
|
||||
null
|
||||
]
|
||||
},
|
||||
"children": [],
|
||||
"id": 2,
|
||||
"name": "ParameterList",
|
||||
"src": "33:0:1"
|
||||
},
|
||||
{
|
||||
"children":
|
||||
[
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"assignments":
|
||||
[
|
||||
4
|
||||
]
|
||||
},
|
||||
"children":
|
||||
[
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"constant": false,
|
||||
"mutability": "mutable",
|
||||
"name": "x",
|
||||
"scope": 7,
|
||||
"stateVariable": false,
|
||||
"storageLocation": "memory",
|
||||
"type": "string",
|
||||
"visibility": "internal"
|
||||
},
|
||||
"children":
|
||||
[
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"name": "string",
|
||||
"type": "string"
|
||||
},
|
||||
"id": 3,
|
||||
"name": "ElementaryTypeName",
|
||||
"src": "35:6:1"
|
||||
}
|
||||
],
|
||||
"id": 4,
|
||||
"name": "VariableDeclaration",
|
||||
"src": "35:15:1"
|
||||
},
|
||||
{
|
||||
"attributes":
|
||||
{
|
||||
"hexvalue": "48656c6c6f20f09f9883",
|
||||
"isConstant": false,
|
||||
"isLValue": false,
|
||||
"isPure": true,
|
||||
"lValueRequested": false,
|
||||
"token": "unicodeString",
|
||||
"type": "literal_string \"Hello \ud83d\ude03\"",
|
||||
"value": "Hello \ud83d\ude03"
|
||||
},
|
||||
"id": 5,
|
||||
"name": "Literal",
|
||||
"src": "53:19:1"
|
||||
}
|
||||
],
|
||||
"id": 6,
|
||||
"name": "VariableDeclarationStatement",
|
||||
"src": "35:37:1"
|
||||
}
|
||||
],
|
||||
"id": 7,
|
||||
"name": "Block",
|
||||
"src": "33:42:1"
|
||||
}
|
||||
],
|
||||
"id": 8,
|
||||
"name": "FunctionDefinition",
|
||||
"src": "13:62:1"
|
||||
}
|
||||
],
|
||||
"id": 9,
|
||||
"name": "ContractDefinition",
|
||||
"src": "0:77:1"
|
||||
}
|
||||
],
|
||||
"id": 10,
|
||||
"name": "SourceUnit",
|
||||
"src": "0:78:1"
|
||||
}
|
@ -81,6 +81,45 @@ BOOST_AUTO_TEST_CASE(assembly_multiple_assign)
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(string_printable)
|
||||
{
|
||||
for (unsigned v = 0x20; v < 0x7e; v++) {
|
||||
string lit{static_cast<char>(v)};
|
||||
// Escape \ and " (since we are quoting with ")
|
||||
if (v == '\\' || v == '"')
|
||||
lit = string{'\\'} + lit;
|
||||
Scanner scanner(CharStream(" { \"" + lit + "\"", ""));
|
||||
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral);
|
||||
BOOST_CHECK_EQUAL(scanner.currentLiteral(), string{static_cast<char>(v)});
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
|
||||
}
|
||||
// Special case of unescaped " for strings quoted with '
|
||||
Scanner scanner(CharStream(" { '\"'", ""));
|
||||
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral);
|
||||
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "\"");
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(string_nonprintable)
|
||||
{
|
||||
for (unsigned v = 0; v < 0xff; v++) {
|
||||
// Skip the valid ones
|
||||
if (v >= 0x20 && v <= 0x7e)
|
||||
continue;
|
||||
string lit{static_cast<char>(v)};
|
||||
Scanner scanner(CharStream(" { \"" + lit + "\"", ""));
|
||||
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
|
||||
if (v == '\n' || v == '\v' || v == '\f' || v == '\r')
|
||||
BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalStringEndQuote);
|
||||
else
|
||||
BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalCharacterInString);
|
||||
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "");
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(string_escapes)
|
||||
{
|
||||
Scanner scanner(CharStream(" { \"a\\x61\"", ""));
|
||||
@ -506,6 +545,8 @@ BOOST_AUTO_TEST_CASE(empty_comment)
|
||||
|
||||
}
|
||||
|
||||
// Unicode string escapes
|
||||
|
||||
BOOST_AUTO_TEST_CASE(valid_unicode_string_escape)
|
||||
{
|
||||
Scanner scanner(CharStream("{ \"\\u00DAnicode\"", ""));
|
||||
@ -545,6 +586,25 @@ BOOST_AUTO_TEST_CASE(invalid_short_unicode_string_escape)
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
|
||||
}
|
||||
|
||||
// Unicode string literal
|
||||
|
||||
BOOST_AUTO_TEST_CASE(valid_unicode_literal)
|
||||
{
|
||||
Scanner scanner(CharStream("{ unicode\"Hello 😃\"", ""));
|
||||
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::UnicodeStringLiteral);
|
||||
BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("Hello \xf0\x9f\x98\x83", 10));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(valid_nonprintable_in_unicode_literal)
|
||||
{
|
||||
// Non-printable characters are allowed in unicode strings...
|
||||
Scanner scanner(CharStream("{ unicode\"Hello \007😃\"", ""));
|
||||
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
|
||||
BOOST_CHECK_EQUAL(scanner.next(), Token::UnicodeStringLiteral);
|
||||
BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("Hello \x07\xf0\x9f\x98\x83", 11));
|
||||
}
|
||||
|
||||
// HEX STRING LITERAL
|
||||
|
||||
BOOST_AUTO_TEST_CASE(valid_hex_literal)
|
||||
|
@ -1,9 +1,14 @@
|
||||
contract C {
|
||||
function f() public pure returns (string memory) {
|
||||
return "😃, 😭, and 😈";
|
||||
return unicode"😃, 😭, and 😈";
|
||||
}
|
||||
function g() public pure returns (string memory) {
|
||||
return unicode"😃, 😭,\
|
||||
and 😈";
|
||||
}
|
||||
}
|
||||
// ====
|
||||
// compileViaYul: also
|
||||
// ----
|
||||
// f() -> 0x20, 0x14, "\xf0\x9f\x98\x83, \xf0\x9f\x98\xad, and \xf0\x9f\x98\x88"
|
||||
// g() -> 0x20, 0x14, "\xf0\x9f\x98\x83, \xf0\x9f\x98\xad, and \xf0\x9f\x98\x88"
|
||||
|
@ -1,4 +1,7 @@
|
||||
contract test {
|
||||
function fixedBytesHex() public returns(bytes32 ret) {
|
||||
return hex"aabb00ff";
|
||||
}
|
||||
function fixedBytes() public returns(bytes32 ret) {
|
||||
return "abc\x00\xff__";
|
||||
}
|
||||
@ -11,5 +14,6 @@ contract test {
|
||||
// ====
|
||||
// compileViaYul: also
|
||||
// ----
|
||||
// fixedBytesHex() -> "\xaa\xbb\0\xff"
|
||||
// fixedBytes() -> "abc\0\xff__"
|
||||
// pipeThrough(bytes2, bool): "\0\x02", true -> "\0\x2", true
|
||||
|
@ -1,3 +1,4 @@
|
||||
contract C {
|
||||
string s = "\xf0\x9f\xa6\x84";
|
||||
}
|
||||
// ----
|
||||
|
9
test/libsolidity/syntaxTests/string/string_ascii.sol
Normal file
9
test/libsolidity/syntaxTests/string/string_ascii.sol
Normal file
@ -0,0 +1,9 @@
|
||||
contract test {
|
||||
function f() public pure returns (string memory) {
|
||||
return "hello world";
|
||||
}
|
||||
function g() public pure returns (string memory) {
|
||||
return unicode"hello world";
|
||||
}
|
||||
}
|
||||
// ----
|
@ -0,0 +1,7 @@
|
||||
contract test {
|
||||
function f() public pure returns (bytes32) {
|
||||
bytes32 escapeCharacters = unicode"foo" unicode"😃, 😭, and 😈" unicode"!";
|
||||
return escapeCharacters;
|
||||
}
|
||||
}
|
||||
// ----
|
@ -0,0 +1,8 @@
|
||||
contract test {
|
||||
function f() public pure returns (bytes32) {
|
||||
bytes32 escapeCharacters = "foo" hex"aa" unicode"😃, 😭, and 😈" "!" hex"00";
|
||||
return escapeCharacters;
|
||||
}
|
||||
}
|
||||
// ----
|
||||
// ParserError 2314: (106-113): Expected ';' but got 'HexStringLiteral'
|
@ -1,6 +1,6 @@
|
||||
contract test {
|
||||
function f() public pure returns (string memory) {
|
||||
return "😃, 😭, and 😈";
|
||||
return unicode"😃, 😭, and 😈";
|
||||
}
|
||||
}
|
||||
// ----
|
||||
|
@ -0,0 +1,7 @@
|
||||
contract test {
|
||||
function f() public pure returns (string memory) {
|
||||
return "😃, 😭, and 😈";
|
||||
}
|
||||
}
|
||||
// ----
|
||||
// ParserError 8936: (86-88): Invalid character in string.
|
@ -19,3 +19,4 @@ contract test {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
// ----
|
||||
|
Loading…
Reference in New Issue
Block a user