Update stringutils to upstream 3c63f18

This commit is contained in:
Alex Beregszaszi 2018-07-23 15:56:54 +01:00
parent afccf15f03
commit 2ecb7a2f72

View File

@ -33,13 +33,16 @@
* `s.splitNew('.')` leaves s unmodified, and returns two values * `s.splitNew('.')` leaves s unmodified, and returns two values
* corresponding to the left and right parts of the string. * corresponding to the left and right parts of the string.
*/ */
pragma solidity ^0.4.14;
library strings { library strings {
struct slice { struct slice {
uint _len; uint _len;
uint _ptr; uint _ptr;
} }
function memcpy(uint dest, uint src, uint len) private { function memcpy(uint dest, uint src, uint len) private pure {
// Copy word-length chunks while possible // Copy word-length chunks while possible
for(; len >= 32; len -= 32) { for(; len >= 32; len -= 32) {
assembly { assembly {
@ -63,7 +66,7 @@ library strings {
* @param self The string to make a slice from. * @param self The string to make a slice from.
* @return A newly allocated slice containing the entire string. * @return A newly allocated slice containing the entire string.
*/ */
function toSlice(string memory self) internal returns (slice memory) { function toSlice(string memory self) internal pure returns (slice memory) {
uint ptr; uint ptr;
assembly { assembly {
ptr := add(self, 0x20) ptr := add(self, 0x20)
@ -76,7 +79,7 @@ library strings {
* @param self The value to find the length of. * @param self The value to find the length of.
* @return The length of the string, from 0 to 32. * @return The length of the string, from 0 to 32.
*/ */
function len(bytes32 self) internal returns (uint) { function len(bytes32 self) internal pure returns (uint) {
uint ret; uint ret;
if (self == 0) if (self == 0)
return 0; return 0;
@ -104,12 +107,12 @@ library strings {
/* /*
* @dev Returns a slice containing the entire bytes32, interpreted as a * @dev Returns a slice containing the entire bytes32, interpreted as a
* null-termintaed utf-8 string. * null-terminated utf-8 string.
* @param self The bytes32 value to convert to a slice. * @param self The bytes32 value to convert to a slice.
* @return A new slice containing the value of the input argument up to the * @return A new slice containing the value of the input argument up to the
* first null. * first null.
*/ */
function toSliceB32(bytes32 self) internal returns (slice memory ret) { function toSliceB32(bytes32 self) internal pure returns (slice memory ret) {
// Allocate space for `self` in memory, copy it there, and point ret at it // Allocate space for `self` in memory, copy it there, and point ret at it
assembly { assembly {
let ptr := mload(0x40) let ptr := mload(0x40)
@ -125,7 +128,7 @@ library strings {
* @param self The slice to copy. * @param self The slice to copy.
* @return A new slice containing the same data as `self`. * @return A new slice containing the same data as `self`.
*/ */
function copy(slice memory self) internal returns (slice memory) { function copy(slice memory self) internal pure returns (slice memory) {
return slice(self._len, self._ptr); return slice(self._len, self._ptr);
} }
@ -134,7 +137,7 @@ library strings {
* @param self The slice to copy. * @param self The slice to copy.
* @return A newly allocated string containing the slice's text. * @return A newly allocated string containing the slice's text.
*/ */
function toString(slice memory self) internal returns (string memory) { function toString(slice memory self) internal pure returns (string memory) {
string memory ret = new string(self._len); string memory ret = new string(self._len);
uint retptr; uint retptr;
assembly { retptr := add(ret, 32) } assembly { retptr := add(ret, 32) }
@ -151,12 +154,11 @@ library strings {
* @param self The slice to operate on. * @param self The slice to operate on.
* @return The length of the slice in runes. * @return The length of the slice in runes.
*/ */
function len(slice memory self) internal returns (uint) { function len(slice memory self) internal pure returns (uint l) {
// Starting at ptr-31 means the LSB will be the byte we care about // Starting at ptr-31 means the LSB will be the byte we care about
uint ptr = self._ptr - 31; uint ptr = self._ptr - 31;
uint end = ptr + self._len; uint end = ptr + self._len;
uint len; for (l = 0; ptr < end; l++) {
for (len = 0; ptr < end; len++) {
uint8 b; uint8 b;
assembly { b := and(mload(ptr), 0xFF) } assembly { b := and(mload(ptr), 0xFF) }
if (b < 0x80) { if (b < 0x80) {
@ -173,7 +175,6 @@ library strings {
ptr += 6; ptr += 6;
} }
} }
return len;
} }
/* /*
@ -181,7 +182,7 @@ library strings {
* @param self The slice to operate on. * @param self The slice to operate on.
* @return True if the slice is empty, False otherwise. * @return True if the slice is empty, False otherwise.
*/ */
function empty(slice memory self) internal returns (bool) { function empty(slice memory self) internal pure returns (bool) {
return self._len == 0; return self._len == 0;
} }
@ -194,7 +195,7 @@ library strings {
* @param other The second slice to compare. * @param other The second slice to compare.
* @return The result of the comparison. * @return The result of the comparison.
*/ */
function compare(slice memory self, slice memory other) internal returns (int) { function compare(slice memory self, slice memory other) internal pure returns (int) {
uint shortest = self._len; uint shortest = self._len;
if (other._len < self._len) if (other._len < self._len)
shortest = other._len; shortest = other._len;
@ -210,8 +211,11 @@ library strings {
} }
if (a != b) { if (a != b) {
// Mask out irrelevant bytes and check again // Mask out irrelevant bytes and check again
uint mask = ~(2 ** (8 * (32 - shortest + idx)) - 1); uint256 mask = uint256(-1); // 0xffff...
uint diff = (a & mask) - (b & mask); if(shortest < 32) {
mask = ~(2 ** (8 * (32 - shortest + idx)) - 1);
}
uint256 diff = (a & mask) - (b & mask);
if (diff != 0) if (diff != 0)
return int(diff); return int(diff);
} }
@ -227,7 +231,7 @@ library strings {
* @param self The second slice to compare. * @param self The second slice to compare.
* @return True if the slices are equal, false otherwise. * @return True if the slices are equal, false otherwise.
*/ */
function equals(slice memory self, slice memory other) internal returns (bool) { function equals(slice memory self, slice memory other) internal pure returns (bool) {
return compare(self, other) == 0; return compare(self, other) == 0;
} }
@ -238,7 +242,7 @@ library strings {
* @param rune The slice that will contain the first rune. * @param rune The slice that will contain the first rune.
* @return `rune`. * @return `rune`.
*/ */
function nextRune(slice memory self, slice memory rune) internal returns (slice memory) { function nextRune(slice memory self, slice memory rune) internal pure returns (slice memory) {
rune._ptr = self._ptr; rune._ptr = self._ptr;
if (self._len == 0) { if (self._len == 0) {
@ -246,31 +250,31 @@ library strings {
return rune; return rune;
} }
uint len; uint l;
uint b; uint b;
// Load the first byte of the rune into the LSBs of b // Load the first byte of the rune into the LSBs of b
assembly { b := and(mload(sub(mload(add(self, 32)), 31)), 0xFF) } assembly { b := and(mload(sub(mload(add(self, 32)), 31)), 0xFF) }
if (b < 0x80) { if (b < 0x80) {
len = 1; l = 1;
} else if(b < 0xE0) { } else if(b < 0xE0) {
len = 2; l = 2;
} else if(b < 0xF0) { } else if(b < 0xF0) {
len = 3; l = 3;
} else { } else {
len = 4; l = 4;
} }
// Check for truncated codepoints // Check for truncated codepoints
if (len > self._len) { if (l > self._len) {
rune._len = self._len; rune._len = self._len;
self._ptr += self._len; self._ptr += self._len;
self._len = 0; self._len = 0;
return rune; return rune;
} }
self._ptr += len; self._ptr += l;
self._len -= len; self._len -= l;
rune._len = len; rune._len = l;
return rune; return rune;
} }
@ -280,7 +284,7 @@ library strings {
* @param self The slice to operate on. * @param self The slice to operate on.
* @return A slice containing only the first rune from `self`. * @return A slice containing only the first rune from `self`.
*/ */
function nextRune(slice memory self) internal returns (slice memory ret) { function nextRune(slice memory self) internal pure returns (slice memory ret) {
nextRune(self, ret); nextRune(self, ret);
} }
@ -289,40 +293,40 @@ library strings {
* @param self The slice to operate on. * @param self The slice to operate on.
* @return The number of the first codepoint in the slice. * @return The number of the first codepoint in the slice.
*/ */
function ord(slice memory self) internal returns (uint ret) { function ord(slice memory self) internal pure returns (uint ret) {
if (self._len == 0) { if (self._len == 0) {
return 0; return 0;
} }
uint word; uint word;
uint len; uint length;
uint div = 2 ** 248; uint divisor = 2 ** 248;
// Load the rune into the MSBs of b // Load the rune into the MSBs of b
assembly { word:= mload(mload(add(self, 32))) } assembly { word:= mload(mload(add(self, 32))) }
uint b = word / div; uint b = word / divisor;
if (b < 0x80) { if (b < 0x80) {
ret = b; ret = b;
len = 1; length = 1;
} else if(b < 0xE0) { } else if(b < 0xE0) {
ret = b & 0x1F; ret = b & 0x1F;
len = 2; length = 2;
} else if(b < 0xF0) { } else if(b < 0xF0) {
ret = b & 0x0F; ret = b & 0x0F;
len = 3; length = 3;
} else { } else {
ret = b & 0x07; ret = b & 0x07;
len = 4; length = 4;
} }
// Check for truncated codepoints // Check for truncated codepoints
if (len > self._len) { if (length > self._len) {
return 0; return 0;
} }
for (uint i = 1; i < len; i++) { for (uint i = 1; i < length; i++) {
div = div / 256; divisor = divisor / 256;
b = (word / div) & 0xFF; b = (word / divisor) & 0xFF;
if (b & 0xC0 != 0x80) { if (b & 0xC0 != 0x80) {
// Invalid UTF-8 sequence // Invalid UTF-8 sequence
return 0; return 0;
@ -338,7 +342,7 @@ library strings {
* @param self The slice to hash. * @param self The slice to hash.
* @return The hash of the slice. * @return The hash of the slice.
*/ */
function keccak(slice memory self) internal returns (bytes32 ret) { function keccak(slice memory self) internal pure returns (bytes32 ret) {
assembly { assembly {
ret := keccak256(mload(add(self, 32)), mload(self)) ret := keccak256(mload(add(self, 32)), mload(self))
} }
@ -350,7 +354,7 @@ library strings {
* @param needle The slice to search for. * @param needle The slice to search for.
* @return True if the slice starts with the provided text, false otherwise. * @return True if the slice starts with the provided text, false otherwise.
*/ */
function startsWith(slice memory self, slice memory needle) internal returns (bool) { function startsWith(slice memory self, slice memory needle) internal pure returns (bool) {
if (self._len < needle._len) { if (self._len < needle._len) {
return false; return false;
} }
@ -361,10 +365,10 @@ library strings {
bool equal; bool equal;
assembly { assembly {
let len := mload(needle) let length := mload(needle)
let selfptr := mload(add(self, 0x20)) let selfptr := mload(add(self, 0x20))
let needleptr := mload(add(needle, 0x20)) let needleptr := mload(add(needle, 0x20))
equal := eq(keccak256(selfptr, len), keccak256(needleptr, len)) equal := eq(keccak256(selfptr, length), keccak256(needleptr, length))
} }
return equal; return equal;
} }
@ -376,7 +380,7 @@ library strings {
* @param needle The slice to search for. * @param needle The slice to search for.
* @return `self` * @return `self`
*/ */
function beyond(slice memory self, slice memory needle) internal returns (slice memory) { function beyond(slice memory self, slice memory needle) internal pure returns (slice memory) {
if (self._len < needle._len) { if (self._len < needle._len) {
return self; return self;
} }
@ -384,10 +388,10 @@ library strings {
bool equal = true; bool equal = true;
if (self._ptr != needle._ptr) { if (self._ptr != needle._ptr) {
assembly { assembly {
let len := mload(needle) let length := mload(needle)
let selfptr := mload(add(self, 0x20)) let selfptr := mload(add(self, 0x20))
let needleptr := mload(add(needle, 0x20)) let needleptr := mload(add(needle, 0x20))
equal := eq(keccak256(selfptr, len), keccak256(needleptr, len)) equal := eq(keccak256(selfptr, length), keccak256(needleptr, length))
} }
} }
@ -405,7 +409,7 @@ library strings {
* @param needle The slice to search for. * @param needle The slice to search for.
* @return True if the slice starts with the provided text, false otherwise. * @return True if the slice starts with the provided text, false otherwise.
*/ */
function endsWith(slice memory self, slice memory needle) internal returns (bool) { function endsWith(slice memory self, slice memory needle) internal pure returns (bool) {
if (self._len < needle._len) { if (self._len < needle._len) {
return false; return false;
} }
@ -418,9 +422,9 @@ library strings {
bool equal; bool equal;
assembly { assembly {
let len := mload(needle) let length := mload(needle)
let needleptr := mload(add(needle, 0x20)) let needleptr := mload(add(needle, 0x20))
equal := eq(keccak256(selfptr, len), keccak256(needleptr, len)) equal := eq(keccak256(selfptr, length), keccak256(needleptr, length))
} }
return equal; return equal;
@ -433,7 +437,7 @@ library strings {
* @param needle The slice to search for. * @param needle The slice to search for.
* @return `self` * @return `self`
*/ */
function until(slice memory self, slice memory needle) internal returns (slice memory) { function until(slice memory self, slice memory needle) internal pure returns (slice memory) {
if (self._len < needle._len) { if (self._len < needle._len) {
return self; return self;
} }
@ -442,9 +446,9 @@ library strings {
bool equal = true; bool equal = true;
if (selfptr != needle._ptr) { if (selfptr != needle._ptr) {
assembly { assembly {
let len := mload(needle) let length := mload(needle)
let needleptr := mload(add(needle, 0x20)) let needleptr := mload(add(needle, 0x20))
equal := eq(keccak256(selfptr, len), keccak256(needleptr, len)) equal := eq(keccak256(selfptr, length), keccak256(needleptr, length))
} }
} }
@ -457,31 +461,33 @@ library strings {
// Returns the memory address of the first byte of the first occurrence of // Returns the memory address of the first byte of the first occurrence of
// `needle` in `self`, or the first byte after `self` if not found. // `needle` in `self`, or the first byte after `self` if not found.
function findPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private returns (uint) { function findPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private pure returns (uint) {
uint ptr; uint ptr = selfptr;
uint idx; uint idx;
if (needlelen <= selflen) { if (needlelen <= selflen) {
if (needlelen <= 32) { if (needlelen <= 32) {
// Optimized assembly for 68 gas per byte on short strings bytes32 mask = bytes32(~(2 ** (8 * (32 - needlelen)) - 1));
assembly {
let mask := not(sub(exp(2, mul(8, sub(32, needlelen))), 1)) bytes32 needledata;
let needledata := and(mload(needleptr), mask) assembly { needledata := and(mload(needleptr), mask) }
let end := add(selfptr, sub(selflen, needlelen))
ptr := selfptr uint end = selfptr + selflen - needlelen;
loop: bytes32 ptrdata;
jumpi(exit, eq(and(mload(ptr), mask), needledata)) assembly { ptrdata := and(mload(ptr), mask) }
ptr := add(ptr, 1)
jumpi(loop, lt(sub(ptr, 1), end)) while (ptrdata != needledata) {
ptr := add(selfptr, selflen) if (ptr >= end)
exit: return selfptr + selflen;
ptr++;
assembly { ptrdata := and(mload(ptr), mask) }
} }
return ptr; return ptr;
} else { } else {
// For long needles, use hashing // For long needles, use hashing
bytes32 hash; bytes32 hash;
assembly { hash := keccak256(needleptr, needlelen) } assembly { hash := keccak256(needleptr, needlelen) }
ptr = selfptr;
for (idx = 0; idx <= selflen - needlelen; idx++) { for (idx = 0; idx <= selflen - needlelen; idx++) {
bytes32 testHash; bytes32 testHash;
assembly { testHash := keccak256(ptr, needlelen) } assembly { testHash := keccak256(ptr, needlelen) }
@ -496,27 +502,27 @@ library strings {
// Returns the memory address of the first byte after the last occurrence of // Returns the memory address of the first byte after the last occurrence of
// `needle` in `self`, or the address of `self` if not found. // `needle` in `self`, or the address of `self` if not found.
function rfindPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private returns (uint) { function rfindPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private pure returns (uint) {
uint ptr; uint ptr;
if (needlelen <= selflen) { if (needlelen <= selflen) {
if (needlelen <= 32) { if (needlelen <= 32) {
// Optimized assembly for 69 gas per byte on short strings bytes32 mask = bytes32(~(2 ** (8 * (32 - needlelen)) - 1));
assembly {
let mask := not(sub(exp(2, mul(8, sub(32, needlelen))), 1)) bytes32 needledata;
let needledata := and(mload(needleptr), mask) assembly { needledata := and(mload(needleptr), mask) }
ptr := add(selfptr, sub(selflen, needlelen))
loop: ptr = selfptr + selflen - needlelen;
jumpi(ret, eq(and(mload(ptr), mask), needledata)) bytes32 ptrdata;
ptr := sub(ptr, 1) assembly { ptrdata := and(mload(ptr), mask) }
jumpi(loop, gt(add(ptr, 1), selfptr))
ptr := selfptr while (ptrdata != needledata) {
jump(exit) if (ptr <= selfptr)
ret: return selfptr;
ptr := add(ptr, needlelen) ptr--;
exit: assembly { ptrdata := and(mload(ptr), mask) }
} }
return ptr; return ptr + needlelen;
} else { } else {
// For long needles, use hashing // For long needles, use hashing
bytes32 hash; bytes32 hash;
@ -542,7 +548,7 @@ library strings {
* @param needle The text to search for. * @param needle The text to search for.
* @return `self`. * @return `self`.
*/ */
function find(slice memory self, slice memory needle) internal returns (slice memory) { function find(slice memory self, slice memory needle) internal pure returns (slice memory) {
uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr); uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr);
self._len -= ptr - self._ptr; self._len -= ptr - self._ptr;
self._ptr = ptr; self._ptr = ptr;
@ -557,7 +563,7 @@ library strings {
* @param needle The text to search for. * @param needle The text to search for.
* @return `self`. * @return `self`.
*/ */
function rfind(slice memory self, slice memory needle) internal returns (slice memory) { function rfind(slice memory self, slice memory needle) internal pure returns (slice memory) {
uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr); uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr);
self._len = ptr - self._ptr; self._len = ptr - self._ptr;
return self; return self;
@ -573,7 +579,7 @@ library strings {
* @param token An output parameter to which the first token is written. * @param token An output parameter to which the first token is written.
* @return `token`. * @return `token`.
*/ */
function split(slice memory self, slice memory needle, slice memory token) internal returns (slice memory) { function split(slice memory self, slice memory needle, slice memory token) internal pure returns (slice memory) {
uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr); uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr);
token._ptr = self._ptr; token._ptr = self._ptr;
token._len = ptr - self._ptr; token._len = ptr - self._ptr;
@ -596,7 +602,7 @@ library strings {
* @param needle The text to search for in `self`. * @param needle The text to search for in `self`.
* @return The part of `self` up to the first occurrence of `delim`. * @return The part of `self` up to the first occurrence of `delim`.
*/ */
function split(slice memory self, slice memory needle) internal returns (slice memory token) { function split(slice memory self, slice memory needle) internal pure returns (slice memory token) {
split(self, needle, token); split(self, needle, token);
} }
@ -610,7 +616,7 @@ library strings {
* @param token An output parameter to which the first token is written. * @param token An output parameter to which the first token is written.
* @return `token`. * @return `token`.
*/ */
function rsplit(slice memory self, slice memory needle, slice memory token) internal returns (slice memory) { function rsplit(slice memory self, slice memory needle, slice memory token) internal pure returns (slice memory) {
uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr); uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr);
token._ptr = ptr; token._ptr = ptr;
token._len = self._len - (ptr - self._ptr); token._len = self._len - (ptr - self._ptr);
@ -632,7 +638,7 @@ library strings {
* @param needle The text to search for in `self`. * @param needle The text to search for in `self`.
* @return The part of `self` after the last occurrence of `delim`. * @return The part of `self` after the last occurrence of `delim`.
*/ */
function rsplit(slice memory self, slice memory needle) internal returns (slice memory token) { function rsplit(slice memory self, slice memory needle) internal pure returns (slice memory token) {
rsplit(self, needle, token); rsplit(self, needle, token);
} }
@ -642,10 +648,10 @@ library strings {
* @param needle The text to search for in `self`. * @param needle The text to search for in `self`.
* @return The number of occurrences of `needle` found in `self`. * @return The number of occurrences of `needle` found in `self`.
*/ */
function count(slice memory self, slice memory needle) internal returns (uint count) { function count(slice memory self, slice memory needle) internal pure returns (uint cnt) {
uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr) + needle._len; uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr) + needle._len;
while (ptr <= self._ptr + self._len) { while (ptr <= self._ptr + self._len) {
count++; cnt++;
ptr = findPtr(self._len - (ptr - self._ptr), ptr, needle._len, needle._ptr) + needle._len; ptr = findPtr(self._len - (ptr - self._ptr), ptr, needle._len, needle._ptr) + needle._len;
} }
} }
@ -656,7 +662,7 @@ library strings {
* @param needle The text to search for in `self`. * @param needle The text to search for in `self`.
* @return True if `needle` is found in `self`, false otherwise. * @return True if `needle` is found in `self`, false otherwise.
*/ */
function contains(slice memory self, slice memory needle) internal returns (bool) { function contains(slice memory self, slice memory needle) internal pure returns (bool) {
return rfindPtr(self._len, self._ptr, needle._len, needle._ptr) != self._ptr; return rfindPtr(self._len, self._ptr, needle._len, needle._ptr) != self._ptr;
} }
@ -667,7 +673,7 @@ library strings {
* @param other The second slice to concatenate. * @param other The second slice to concatenate.
* @return The concatenation of the two strings. * @return The concatenation of the two strings.
*/ */
function concat(slice memory self, slice memory other) internal returns (string memory) { function concat(slice memory self, slice memory other) internal pure returns (string memory) {
string memory ret = new string(self._len + other._len); string memory ret = new string(self._len + other._len);
uint retptr; uint retptr;
assembly { retptr := add(ret, 32) } assembly { retptr := add(ret, 32) }
@ -684,19 +690,19 @@ library strings {
* @return A newly allocated string containing all the slices in `parts`, * @return A newly allocated string containing all the slices in `parts`,
* joined with `self`. * joined with `self`.
*/ */
function join(slice memory self, slice[] memory parts) internal returns (string memory) { function join(slice memory self, slice[] memory parts) internal pure returns (string memory) {
if (parts.length == 0) if (parts.length == 0)
return ""; return "";
uint len = self._len * (parts.length - 1); uint length = self._len * (parts.length - 1);
for(uint i = 0; i < parts.length; i++) for(uint i = 0; i < parts.length; i++)
len += parts[i]._len; length += parts[i]._len;
string memory ret = new string(len); string memory ret = new string(length);
uint retptr; uint retptr;
assembly { retptr := add(ret, 32) } assembly { retptr := add(ret, 32) }
for(uint i = 0; i < parts.length; i++) { for(i = 0; i < parts.length; i++) {
memcpy(retptr, parts[i]._ptr, parts[i]._len); memcpy(retptr, parts[i]._ptr, parts[i]._len);
retptr += parts[i]._len; retptr += parts[i]._len;
if (i < parts.length - 1) { if (i < parts.length - 1) {