solidity/scripts/isolate_tests.py

#!/usr/bin/env python3
#
# This script reads C++ or RST source files and writes all
# multi-line strings into individual files.
# This can be used to extract the Solidity test cases
# into files for e.g. fuzz testing as
# scripts/isolate_tests.py test/libsolidity/*

import re
import os
import hashlib
from os.path import join, isfile, basename
from argparse import ArgumentParser
from textwrap import indent, dedent

def extract_test_cases(path):
    with open(path, encoding="utf8", errors='ignore', mode='r', newline='') as file:
        lines = file.read().splitlines()

    inside = False
    delimiter = ''
    tests = []

    for l in lines:
        if inside:
            if l.strip().endswith(')' + delimiter + '";'):
                inside = False
            else:
                tests[-1] += l + '\n'
        else:
            m = re.search(r'R"([^(]*)\($', l.strip())
            if m:
                inside = True
                delimiter = m.group(1)
                tests += ['']

    return tests

def extract_solidity_docs_cases(path):
    tests = extract_docs_cases(path, [".. code-block:: solidity", '::'])

    codeStart = "(// SPDX-License-Identifier:|pragma solidity|contract.*{|library.*{|interface.*{)"

    # Filter out tests that are not supposed to be compilable.
    return [
        test.lstrip("\n")
        for test in tests
        if re.search(r'^\s{4}' + codeStart, test, re.MULTILINE) is not None
    ]

def extract_yul_docs_cases(path):
    tests = extract_docs_cases(path, [".. code-block:: yul"])

    def wrap_in_object(code):
        for line in code.splitlines():
            line = line.lstrip()
            if line.startswith("//"):
                continue
            if not line.startswith("object") and not line.startswith("{"):
                return indent(f"{{\n{code.rstrip()}\n}}\n\n", "    ")
            break

        return code

    return [
        wrap_in_object(test)
        for test in tests
        if test.strip() != ""
    ]

# Extract code examples based on the 'beginMarker' parameter
# up until we reach EOF or a line that is not empty and doesn't start with 4
# spaces.
def extract_docs_cases(path, beginMarkers):
    immediatelyAfterMarker = False
    insideBlock = False
    tests = []

    # Collect all snippets of indented blocks
    with open(path, mode='r', errors='ignore', encoding='utf8', newline='') as f:
        lines = f.read().splitlines()

    for line in lines:
        if insideBlock:
            if immediatelyAfterMarker:
                # Skip Sphinx instructions and empty lines between them
                if line == '' or line.lstrip().startswith(":"):
                    continue

            if line == '' or line.startswith(" "):
                tests[-1] += line + "\n"
                immediatelyAfterMarker = False
                continue

            insideBlock = False
        if any(map(line.lower().startswith, beginMarkers)):
            insideBlock = True
            immediatelyAfterMarker = True
            tests += ['']

    return tests

def write_cases(f, solidityTests, yulTests):
    cleaned_filename = f.replace(".","_").replace("-","_").replace(" ","_").lower()
    for language, test in [("sol", t) for t in solidityTests] + [("yul", t) for t in yulTests]:
        # When code examples are extracted they are indented by 8 spaces, which violates the style guide,
        # so before checking remove 4 spaces from each line.
        remainder = dedent(test)
        source_code_hash = hashlib.sha256(test.encode("utf-8")).hexdigest()
        sol_filename = f'test_{source_code_hash}_{cleaned_filename}.{language}'
        with open(sol_filename, mode='w', encoding='utf8', newline='') as fi:
            fi.write(remainder)

def extract_and_write(path, language):
    assert language in ["solidity", "yul", ""]
    yulCases = []
    cases = []

    if path.lower().endswith('.rst'):
        if language in ("solidity", ""):
            cases = extract_solidity_docs_cases(path)

        if language in ("yul", ""):
            yulCases  = extract_yul_docs_cases(path)
    elif path.endswith('.sol'):
        if language in ("solidity", ""):
            with open(path, mode='r', encoding='utf8', newline='') as f:
                cases = [f.read()]
    else:
        cases = extract_test_cases(path)

    write_cases(basename(path), cases, yulCases)

if __name__ == '__main__':
    script_description = (
        "Reads Solidity, C++ or RST source files and extracts compilable solidity and yul code blocks from them. "
        "Can be used to generate test cases to validate code examples. "
    )

    parser = ArgumentParser(description=script_description)
    parser.add_argument(dest='path', help='Path to file or directory to look for code in.')
    parser.add_argument(
        '-l', '--language',
        dest='language',
        choices=["yul", "solidity"],
        default="",
        action='store',
        help="Extract only code blocks in the given language"
    )
    options = parser.parse_args()
    path = options.path

    if isfile(path):
        extract_and_write(path, options.language)
    else:
        for root, subdirs, files in os.walk(path):
            if '_build' in subdirs:
                subdirs.remove('_build')
            if 'compilationTests' in subdirs:
                subdirs.remove('compilationTests')
            for f in files:
                if basename(f) == "invalid_utf8_sequence.sol":
                    continue  # ignore the test with broken utf-8 encoding
                path = join(root, f)
                extract_and_write(path, options.language)
Use Python 3 instead of Python 2 (EOL'd) 2020-01-13 15:14:18 +00:00			`#!/usr/bin/env python3`
Add script to extract test cases. 2016-09-30 11:09:45 +00:00			`#`
Upgrade isolate_tests.py to support extracting code from docs 2017-07-10 21:52:47 +00:00			`# This script reads C++ or RST source files and writes all`
Add script to extract test cases. 2016-09-30 11:09:45 +00:00			`# multi-line strings into individual files.`
			`# This can be used to extract the Solidity test cases`
Extend comment. 2016-10-10 20:04:11 +00:00			`# into files for e.g. fuzz testing as`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00			`# scripts/isolate_tests.py test/libsolidity/*`
Add script to extract test cases. 2016-09-30 11:09:45 +00:00
Correctly find contracts with other delimiters. 2017-03-15 11:07:59 +00:00			`import re`
Isolate test cases from all tests and store under hash. 2017-03-22 19:19:20 +00:00			`import os`
			`import hashlib`
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`from os.path import join, isfile, basename`
isolate_tests: Extract code blocks from documentation using code block header 2021-07-01 13:19:38 +00:00			`from argparse import ArgumentParser`
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`from textwrap import indent, dedent`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00
Upgrade isolate_tests.py to support extracting code from docs 2017-07-10 21:52:47 +00:00			`def extract_test_cases(path):`
Fix pylint issues 2021-06-30 08:21:41 +00:00			`with open(path, encoding="utf8", errors='ignore', mode='r', newline='') as file:`
			`lines = file.read().splitlines()`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00
			`inside = False`
Correctly find contracts with other delimiters. 2017-03-15 11:07:59 +00:00			`delimiter = ''`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00			`tests = []`

			`for l in lines:`
Fix inconsistent indentation in scripts/ - NOT reindenting all files. Just choosing one style in files that were using multiple. 2020-04-17 12:32:38 +00:00			`if inside:`
			`if l.strip().endswith(')' + delimiter + '";'):`
			`inside = False`
			`else:`
			`tests[-1] += l + '\n'`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00			`else:`
Fix inconsistent indentation in scripts/ - NOT reindenting all files. Just choosing one style in files that were using multiple. 2020-04-17 12:32:38 +00:00			`m = re.search(r'R"([^(]*)\($', l.strip())`
			`if m:`
			`inside = True`
			`delimiter = m.group(1)`
			`tests += ['']`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00
			`return tests`

Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`def extract_solidity_docs_cases(path):`
			`tests = extract_docs_cases(path, [".. code-block:: solidity", '::'])`

			`codeStart = "(// SPDX-License-Identifier:\|pragma solidity\|contract.{\|library.{\|interface.*{)"`

			`# Filter out tests that are not supposed to be compilable.`
			`return [`
			`test.lstrip("\n")`
			`for test in tests`
			`if re.search(r'^\s{4}' + codeStart, test, re.MULTILINE) is not None`
			`]`

			`def extract_yul_docs_cases(path):`
			`tests = extract_docs_cases(path, [".. code-block:: yul"])`

			`def wrap_in_object(code):`
			`for line in code.splitlines():`
			`line = line.lstrip()`
			`if line.startswith("//"):`
			`continue`
			`if not line.startswith("object") and not line.startswith("{"):`
Fix pylint warnings about the usage of f-strings 2021-09-16 17:22:23 +00:00			`return indent(f"{{\n{code.rstrip()}\n}}\n\n", " ")`
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`break`

			`return code`

			`return [`
			`wrap_in_object(test)`
			`for test in tests`
			`if test.strip() != ""`
			`]`

			`# Extract code examples based on the 'beginMarker' parameter`
isolate_tests: Extract code blocks from documentation using code block header 2021-07-01 13:19:38 +00:00			`# up until we reach EOF or a line that is not empty and doesn't start with 4`
			`# spaces.`
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`def extract_docs_cases(path, beginMarkers):`
isolate_tests: Extract code blocks from documentation using code block header 2021-07-01 13:19:38 +00:00			`immediatelyAfterMarker = False`
isolate_tests.py: Parse Sphinx block parameters correctly 2021-06-21 18:53:21 +00:00			`insideBlock = False`
Upgrade isolate_tests.py to support extracting code from docs 2017-07-10 21:52:47 +00:00			`tests = []`

Also extract tests that do not start with a pragma. 2018-08-09 18:48:41 +00:00			`# Collect all snippets of indented blocks`
Fix pylint issues 2021-06-30 08:21:41 +00:00			`with open(path, mode='r', errors='ignore', encoding='utf8', newline='') as f:`
			`lines = f.read().splitlines()`
isolate_tests: Extract code blocks from documentation using code block header 2021-07-01 13:19:38 +00:00
			`for line in lines:`
isolate_tests.py: Parse Sphinx block parameters correctly 2021-06-21 18:53:21 +00:00			`if insideBlock:`
isolate_tests: Extract code blocks from documentation using code block header 2021-07-01 13:19:38 +00:00			`if immediatelyAfterMarker:`
			`# Skip Sphinx instructions and empty lines between them`
			`if line == '' or line.lstrip().startswith(":"):`
			`continue`

			`if line == '' or line.startswith(" "):`
			`tests[-1] += line + "\n"`
			`immediatelyAfterMarker = False`
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`continue`

			`insideBlock = False`
			`if any(map(line.lower().startswith, beginMarkers)):`
isolate_tests: Extract code blocks from documentation using code block header 2021-07-01 13:19:38 +00:00			`insideBlock = True`
			`immediatelyAfterMarker = True`
			`tests += ['']`
Fixes source extraction from docs. 2019-10-23 20:13:17 +00:00
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`return tests`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`def write_cases(f, solidityTests, yulTests):`
Include origin filename in the filenames generated by isolate_tests.py. 2018-09-06 09:37:44 +00:00			`cleaned_filename = f.replace(".","_").replace("-","_").replace(" ","_").lower()`
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`for language, test in [("sol", t) for t in solidityTests] + [("yul", t) for t in yulTests]:`
Fix typos in isolate_tests.py 2021-03-10 19:40:18 +00:00			`# When code examples are extracted they are indented by 8 spaces, which violates the style guide,`
Add style checker Rename files Changes from review Update test/docsCodeStyle.sh Co-Authored-By: chriseth <chris@ethereum.org> Update test/docsCodeStyle.sh Co-Authored-By: chriseth <chris@ethereum.org> Remove extraneous brackets 2019-08-05 11:00:30 +00:00			`# so before checking remove 4 spaces from each line.`
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`remainder = dedent(test)`
pylint: Enable and fix redefined-builtin warnings 2021-12-21 14:26:54 +00:00			`source_code_hash = hashlib.sha256(test.encode("utf-8")).hexdigest()`
			`sol_filename = f'test_{source_code_hash}_{cleaned_filename}.{language}'`
Fix pylint issues 2021-06-30 08:21:41 +00:00			`with open(sol_filename, mode='w', encoding='utf8', newline='') as fi:`
			`fi.write(remainder)`
Change isolate_tests to support single files as an input On the documentation the examples for the usage of isolate_tests.py are shown with single files, and it's currently not working. It only works for folders or wildcards that return more than one file, since that's how os.walk works within a loop for that cases. Proposed an simple and easy fix. I extracted the core functionality for extracting tests from files, and made another function called `extract_and_write` If the program receives a single file the function `extract_and_write` is called once, it even works for `docs` when specified. If the program receives a path or a wildcard, works as used to. 2018-07-05 00:20:17 +00:00
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`def extract_and_write(path, language):`
			`assert language in ["solidity", "yul", ""]`
			`yulCases = []`
			`cases = []`

isolate_tests: Extract code blocks from documentation using code block header 2021-07-01 13:19:38 +00:00			`if path.lower().endswith('.rst'):`
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`if language in ("solidity", ""):`
			`cases = extract_solidity_docs_cases(path)`

			`if language in ("yul", ""):`
			`yulCases = extract_yul_docs_cases(path)`
isolate_tests: Extract code blocks from documentation using code block header 2021-07-01 13:19:38 +00:00			`elif path.endswith('.sol'):`
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`if language in ("solidity", ""):`
			`with open(path, mode='r', encoding='utf8', newline='') as f:`
			`cases = [f.read()]`
Fix inconsistent indentation in scripts/ - NOT reindenting all files. Just choosing one style in files that were using multiple. 2020-04-17 12:32:38 +00:00			`else:`
isolate_tests: Extract code blocks from documentation using code block header 2021-07-01 13:19:38 +00:00			`cases = extract_test_cases(path)`

Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`write_cases(basename(path), cases, yulCases)`
Change isolate_tests to support single files as an input On the documentation the examples for the usage of isolate_tests.py are shown with single files, and it's currently not working. It only works for folders or wildcards that return more than one file, since that's how os.walk works within a loop for that cases. Proposed an simple and easy fix. I extracted the core functionality for extracting tests from files, and made another function called `extract_and_write` If the program receives a single file the function `extract_and_write` is called once, it even works for `docs` when specified. If the program receives a path or a wildcard, works as used to. 2018-07-05 00:20:17 +00:00
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00			`if __name__ == '__main__':`
isolate_tests: Extract code blocks from documentation using code block header 2021-07-01 13:19:38 +00:00			`script_description = (`
			`"Reads Solidity, C++ or RST source files and extracts compilable solidity and yul code blocks from them. "`
isolate_tests: Fix typo in script description 2021-10-28 15:34:57 +00:00			`"Can be used to generate test cases to validate code examples. "`
isolate_tests: Extract code blocks from documentation using code block header 2021-07-01 13:19:38 +00:00			`)`
Fix isolate_tests when run with single files and add usage information 2021-07-01 13:18:32 +00:00
isolate_tests: Extract code blocks from documentation using code block header 2021-07-01 13:19:38 +00:00			`parser = ArgumentParser(description=script_description)`
			`parser.add_argument(dest='path', help='Path to file or directory to look for code in.')`
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`parser.add_argument(`
			`'-l', '--language',`
			`dest='language',`
			`choices=["yul", "solidity"],`
			`default="",`
			`action='store',`
			`help="Extract only code blocks in the given language"`
			`)`
isolate_tests: Extract code blocks from documentation using code block header 2021-07-01 13:19:38 +00:00			`options = parser.parse_args()`
			`path = options.path`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00
Change isolate_tests to support single files as an input On the documentation the examples for the usage of isolate_tests.py are shown with single files, and it's currently not working. It only works for folders or wildcards that return more than one file, since that's how os.walk works within a loop for that cases. Proposed an simple and easy fix. I extracted the core functionality for extracting tests from files, and made another function called `extract_and_write` If the program receives a single file the function `extract_and_write` is called once, it even works for `docs` when specified. If the program receives a path or a wildcard, works as used to. 2018-07-05 00:20:17 +00:00			`if isfile(path):`
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`extract_and_write(path, options.language)`
Remove trailing whitespace for all files in the repository. 2018-09-03 13:01:15 +00:00			`else:`
Change isolate_tests to support single files as an input On the documentation the examples for the usage of isolate_tests.py are shown with single files, and it's currently not working. It only works for folders or wildcards that return more than one file, since that's how os.walk works within a loop for that cases. Proposed an simple and easy fix. I extracted the core functionality for extracting tests from files, and made another function called `extract_and_write` If the program receives a single file the function `extract_and_write` is called once, it even works for `docs` when specified. If the program receives a path or a wildcard, works as used to. 2018-07-05 00:20:17 +00:00			`for root, subdirs, files in os.walk(path):`
			`if '_build' in subdirs:`
			`subdirs.remove('_build')`
			`if 'compilationTests' in subdirs:`
			`subdirs.remove('compilationTests')`
			`for f in files:`
isolate_tests: Extract code blocks from documentation using code block header 2021-07-01 13:19:38 +00:00			`if basename(f) == "invalid_utf8_sequence.sol":`
Improve error coverage of syntax checker 2020-08-20 00:35:15 +00:00			`continue # ignore the test with broken utf-8 encoding`
Change isolate_tests to support single files as an input On the documentation the examples for the usage of isolate_tests.py are shown with single files, and it's currently not working. It only works for folders or wildcards that return more than one file, since that's how os.walk works within a loop for that cases. Proposed an simple and easy fix. I extracted the core functionality for extracting tests from files, and made another function called `extract_and_write` If the program receives a single file the function `extract_and_write` is called once, it even works for `docs` when specified. If the program receives a path or a wildcard, works as used to. 2018-07-05 00:20:17 +00:00			`path = join(root, f)`
Test yul code blocks in documentation. 2021-07-05 17:38:41 +00:00			`extract_and_write(path, options.language)`