From 17fa85a2fbe27d97d080b17d89b064ba3dc078fe Mon Sep 17 00:00:00 2001 From: Paarth Madan Date: Fri, 11 Feb 2022 22:10:36 -0500 Subject: [PATCH] Add Base64 test cases with and without inline assembly Adds two implementations of Base64 encoding as specified in RFC4648. Implementation (1) uses inline assembly, while Implementation (2) is written purely in Solidity. Assertions are added to replicate the test vectors specified in the RFC for Base64 to ensure both implementations to specification. --- .../_base64/base64_inline_asm.sol | 96 +++++++++++++++++++ .../_base64/base64_no_inline_asm.sol | 39 ++++++++ .../externalContracts/base64.sol | 61 ++++++++++++ 3 files changed, 196 insertions(+) create mode 100644 test/libsolidity/semanticTests/externalContracts/_base64/base64_inline_asm.sol create mode 100644 test/libsolidity/semanticTests/externalContracts/_base64/base64_no_inline_asm.sol create mode 100644 test/libsolidity/semanticTests/externalContracts/base64.sol diff --git a/test/libsolidity/semanticTests/externalContracts/_base64/base64_inline_asm.sol b/test/libsolidity/semanticTests/externalContracts/_base64/base64_inline_asm.sol new file mode 100644 index 000000000..7df0c902d --- /dev/null +++ b/test/libsolidity/semanticTests/externalContracts/_base64/base64_inline_asm.sol @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: MIT + +pragma solidity ^0.8.0; + +/** + * @dev Provides a set of functions to operate with Base64 strings. + */ +library InlineAsmBase64 { + /** + * @dev Base64 Encoding/Decoding Table + */ + string internal constant _TABLE = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + /** + * @dev Converts a `bytes` to its Bytes64 `string` representation. + */ + function encode(bytes memory data) internal pure returns (string memory) { + /** + * Inspired by OpenZepplin Base64 implementation + * https://github.com/OpenZeppelin/openzeppelin-contracts/pull/2884/commits/157c32b65a15cb0b58257543643cafa1cebf883a + */ + if (data.length == 0) return ""; + + // Loads the table into memory + string memory table = _TABLE; + + // Encoding takes 3 bytes chunks of binary data from `bytes` data parameter + // and split into 4 numbers of 6 bits. + // The final Base64 length should be `bytes` data length multiplied by 4/3 rounded up + // - `data.length + 2` -> Round up + // - `/ 3` -> Number of 3-bytes chunks + // - `4 *` -> 4 characters for each chunk + uint256 encodedLen = 4 * ((data.length + 2) / 3); + + // Add some extra buffer at the end required for the writing + string memory result = new string(encodedLen); + + assembly { + // Store the actual result length in memory + mstore(result, encodedLen) + + // Prepare the lookup table + let tablePtr := add(table, 1) + + // Prepare input pointer + let dataPtr := data + let endPtr := add(dataPtr, mload(data)) + + // Prepare result pointer, jump over length + let resultPtr := add(result, 32) + + // Run over the input, 3 bytes at a time + for { + + } lt(dataPtr, endPtr) { + + } { + // Advance 3 bytes + dataPtr := add(dataPtr, 3) + let input := mload(dataPtr) + + // To write each character, shift the 3 bytes (24 bits) chunk 4 + // times in blocks of 6 bits for each character (18, 12, 6, 0) + // and apply logical AND with 0x3F to extract the 6-bit group. + // Add the 6-bit group with the table ptr to index into the + // table and acquire the character to write. Finally, write + // the character to the result pointer. + + mstore8(resultPtr, mload(add(tablePtr, and(shr(18, input), 0x3F)))) + resultPtr := add(resultPtr, 1) // Advance + + mstore8(resultPtr, mload(add(tablePtr, and(shr(12, input), 0x3F)))) + resultPtr := add(resultPtr, 1) // Advance + + mstore8(resultPtr, mload(add(tablePtr, and(shr(6, input), 0x3F)))) + resultPtr := add(resultPtr, 1) // Advance + + mstore8(resultPtr, mload(add(tablePtr, and(input, 0x3F)))) + resultPtr := add(resultPtr, 1) // Advance + } + + // When data `bytes` is not exactly 3 bytes long + // it is padded with `=` characters at the end + switch mod(mload(data), 3) + case 1 { + mstore8(sub(resultPtr, 1), 0x3d) + mstore8(sub(resultPtr, 2), 0x3d) + } + case 2 { + mstore8(sub(resultPtr, 1), 0x3d) + } + } + + return result; + } +} diff --git a/test/libsolidity/semanticTests/externalContracts/_base64/base64_no_inline_asm.sol b/test/libsolidity/semanticTests/externalContracts/_base64/base64_no_inline_asm.sol new file mode 100644 index 000000000..f54d2f925 --- /dev/null +++ b/test/libsolidity/semanticTests/externalContracts/_base64/base64_no_inline_asm.sol @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT + +pragma solidity ^0.8.0; + +/** + * @dev Provides a set of functions to operate with Base64 strings. + */ +library NoAsmBase64 { + bytes private constant TABLE = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + function encode(bytes memory data) internal pure returns (string memory) { + if (data.length == 0) return ""; + + bytes memory table = TABLE; + bytes memory result = new bytes(4 * ((data.length + 2) / 3)); + uint256 resultPtr = 0; + + for (uint256 dataPtr = 0; dataPtr < data.length; dataPtr += 3) { + uint24 chunk = ( (uint24(uint8(data[dataPtr + 0])) << 16)) + + (dataPtr + 1 < data.length ? (uint24(uint8(data[dataPtr + 1])) << 8) : 0) + + (dataPtr + 2 < data.length ? (uint24(uint8(data[dataPtr + 2])) ) : 0); + + result[resultPtr++] = table[uint8(chunk >> 18) & 0x3f]; + result[resultPtr++] = table[uint8(chunk >> 12) & 0x3f]; + result[resultPtr++] = table[uint8(chunk >> 6) & 0x3f]; + result[resultPtr++] = table[uint8(chunk ) & 0x3f]; + } + + if (data.length % 3 == 1) { + result[--resultPtr] = 0x3d; + result[--resultPtr] = 0x3d; + } + else if (data.length % 3 == 2) { + result[--resultPtr] = 0x3d; + } + + return (string(result)); + } +} diff --git a/test/libsolidity/semanticTests/externalContracts/base64.sol b/test/libsolidity/semanticTests/externalContracts/base64.sol new file mode 100644 index 000000000..fd17c5d78 --- /dev/null +++ b/test/libsolidity/semanticTests/externalContracts/base64.sol @@ -0,0 +1,61 @@ +==== ExternalSource: _base64/base64_inline_asm.sol ==== +==== ExternalSource: _base64/base64_no_inline_asm.sol ==== +==== Source: base64.sol ==== + +import "_base64/base64_inline_asm.sol"; +import "_base64/base64_no_inline_asm.sol"; + +contract test { + function encode_inline_asm(bytes memory data) external pure returns (string memory) { + return InlineAsmBase64.encode(data); + } + + function encode_no_asm(bytes memory data) external pure returns (string memory) { + return NoAsmBase64.encode(data); + } + + function encode_inline_asm_large() external { + for (uint i = 0; i < 1000; i++) { + InlineAsmBase64.encode("foo"); + } + } + + function encode_no_asm_large() external { + for (uint i = 0; i < 1000; i++) { + NoAsmBase64.encode("foo"); + } + } +} +// Test cases derived from Base64 specification: RFC4648 +// https://datatracker.ietf.org/doc/html/rfc4648#section-10 +// +// ==== +// EVMVersion: >=constantinople +// compileViaYul: also +// ---- +// constructor() +// gas irOptimized: 450044 +// gas legacy: 766936 +// gas legacyOptimized: 543094 +// encode_inline_asm(bytes): 0x20, 0 -> 0x20, 0 +// encode_inline_asm(bytes): 0x20, 1, "f" -> 0x20, 4, "Zg==" +// encode_inline_asm(bytes): 0x20, 2, "fo" -> 0x20, 4, "Zm8=" +// encode_inline_asm(bytes): 0x20, 3, "foo" -> 0x20, 4, "Zm9v" +// encode_inline_asm(bytes): 0x20, 4, "foob" -> 0x20, 8, "Zm9vYg==" +// encode_inline_asm(bytes): 0x20, 5, "fooba" -> 0x20, 8, "Zm9vYmE=" +// encode_inline_asm(bytes): 0x20, 6, "foobar" -> 0x20, 8, "Zm9vYmFy" +// encode_no_asm(bytes): 0x20, 0 -> 0x20, 0 +// encode_no_asm(bytes): 0x20, 1, "f" -> 0x20, 4, "Zg==" +// encode_no_asm(bytes): 0x20, 2, "fo" -> 0x20, 4, "Zm8=" +// encode_no_asm(bytes): 0x20, 3, "foo" -> 0x20, 4, "Zm9v" +// encode_no_asm(bytes): 0x20, 4, "foob" -> 0x20, 8, "Zm9vYg==" +// encode_no_asm(bytes): 0x20, 5, "fooba" -> 0x20, 8, "Zm9vYmE=" +// encode_no_asm(bytes): 0x20, 6, "foobar" -> 0x20, 8, "Zm9vYmFy" +// encode_inline_asm_large() +// gas irOptimized: 1385047 +// gas legacy: 1658033 +// gas legacyOptimized: 1210033 +// encode_no_asm_large() +// gas irOptimized: 3335101 +// gas legacy: 4801077 +// gas legacyOptimized: 2929077