solidity/scripts/isolate_tests.py

#!/usr/bin/env python3
#
# This script reads C++ or RST source files and writes all
# multi-line strings into individual files.
# This can be used to extract the Solidity test cases
# into files for e.g. fuzz testing as
# scripts/isolate_tests.py test/libsolidity/*

import sys
import re
import os
import hashlib
from os.path import join, isfile, split

def extract_test_cases(path):
    lines = open(path, encoding="utf8", errors='ignore', mode='r', newline='').read().splitlines()

    inside = False
    delimiter = ''
    tests = []

    for l in lines:
        if inside:
            if l.strip().endswith(')' + delimiter + '";'):
                inside = False
            else:
                tests[-1] += l + '\n'
        else:
            m = re.search(r'R"([^(]*)\($', l.strip())
            if m:
                inside = True
                delimiter = m.group(1)
                tests += ['']

    return tests

# Contract sources are indented by 4 spaces.
# Look for `pragma solidity`, `contract`, `library` or `interface`
# and abort a line not indented properly.
def extract_docs_cases(path):
    inside = False
    extractedLines = []
    tests = []

    # Collect all snippets of indented blocks
    for l in open(path, mode='r', errors='ignore', encoding='utf8', newline='').read().splitlines():
        if l != '':
            if not inside and l.startswith(' '):
                # start new test
                extractedLines += ['']
            inside = l.startswith(' ')
        if inside:
            extractedLines[-1] += l + '\n'

    codeStart = "(// SPDX-License-Identifier:|pragma solidity|contract.*{|library.*{|interface.*{)"

    # Filter all tests that do not contain Solidity or are indented incorrectly.
    for lines in extractedLines:
        if re.search(r'^\s{0,3}' + codeStart, lines, re.MULTILINE):
            print("Indentation error in " + path + ":")
            print(lines)
            exit(1)
        if re.search(r'^\s{4}' + codeStart, lines, re.MULTILINE):
            tests.append(lines)

    return tests

def write_cases(f, tests):
    cleaned_filename = f.replace(".","_").replace("-","_").replace(" ","_").lower()
    for test in tests:
        # When code examples are extracted they are indented by 8 spaces, which violates the style guide,
        # so before checking remove 4 spaces from each line.
        remainder = re.sub(r'^ {4}', '', test, 0, re.MULTILINE)
        sol_filename = 'test_%s_%s.sol' % (hashlib.sha256(test.encode("utf-8")).hexdigest(), cleaned_filename)
        open(sol_filename, mode='w', encoding='utf8', newline='').write(remainder)

def extract_and_write(f, path):
    if docs:
        cases = extract_docs_cases(path)
    else:
        if f.endswith('.sol'):
            cases = [open(path, mode='r', encoding='utf8', newline='').read()]
        else:
            cases = extract_test_cases(path)
    write_cases(f, cases)

if __name__ == '__main__':
    path = sys.argv[1]
    docs = False
    if len(sys.argv) > 2 and sys.argv[2] == 'docs':
        docs = True

    if isfile(path):
        extract_and_write(path, path)
    else:
        for root, subdirs, files in os.walk(path):
            if '_build' in subdirs:
                subdirs.remove('_build')
            if 'compilationTests' in subdirs:
                subdirs.remove('compilationTests')
            for f in files:
                _, tail = split(f)
                if tail == "invalid_utf8_sequence.sol":
                    continue  # ignore the test with broken utf-8 encoding
                path = join(root, f)
                extract_and_write(f, path)
Use Python 3 instead of Python 2 (EOL'd) 2020-01-13 15:14:18 +00:00			`#!/usr/bin/env python3`
Add script to extract test cases. 2016-09-30 11:09:45 +00:00			`#`
Upgrade isolate_tests.py to support extracting code from docs 2017-07-10 21:52:47 +00:00			`# This script reads C++ or RST source files and writes all`
Add script to extract test cases. 2016-09-30 11:09:45 +00:00			`# multi-line strings into individual files.`
			`# This can be used to extract the Solidity test cases`
Extend comment. 2016-10-10 20:04:11 +00:00			`# into files for e.g. fuzz testing as`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00			`# scripts/isolate_tests.py test/libsolidity/*`
Add script to extract test cases. 2016-09-30 11:09:45 +00:00
			`import sys`
Correctly find contracts with other delimiters. 2017-03-15 11:07:59 +00:00			`import re`
Isolate test cases from all tests and store under hash. 2017-03-22 19:19:20 +00:00			`import os`
			`import hashlib`
Improve error coverage of syntax checker 2020-08-20 00:35:15 +00:00			`from os.path import join, isfile, split`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00
Upgrade isolate_tests.py to support extracting code from docs 2017-07-10 21:52:47 +00:00			`def extract_test_cases(path):`
Read/write files in python with newline='' option to preserve newlines as \n on Windows 2021-01-19 15:56:27 +00:00			`lines = open(path, encoding="utf8", errors='ignore', mode='r', newline='').read().splitlines()`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00
			`inside = False`
Correctly find contracts with other delimiters. 2017-03-15 11:07:59 +00:00			`delimiter = ''`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00			`tests = []`

			`for l in lines:`
Fix inconsistent indentation in scripts/ - NOT reindenting all files. Just choosing one style in files that were using multiple. 2020-04-17 12:32:38 +00:00			`if inside:`
			`if l.strip().endswith(')' + delimiter + '";'):`
			`inside = False`
			`else:`
			`tests[-1] += l + '\n'`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00			`else:`
Fix inconsistent indentation in scripts/ - NOT reindenting all files. Just choosing one style in files that were using multiple. 2020-04-17 12:32:38 +00:00			`m = re.search(r'R"([^(]*)\($', l.strip())`
			`if m:`
			`inside = True`
			`delimiter = m.group(1)`
			`tests += ['']`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00
			`return tests`

Upgrade isolate_tests.py to support extracting code from docs 2017-07-10 21:52:47 +00:00			`# Contract sources are indented by 4 spaces.`
Also extract tests that do not start with a pragma. 2018-08-09 18:48:41 +00:00			# Look for `pragma solidity`, `contract`, `library` or `interface`
			`# and abort a line not indented properly.`
Upgrade isolate_tests.py to support extracting code from docs 2017-07-10 21:52:47 +00:00			`def extract_docs_cases(path):`
			`inside = False`
Fixes source extraction from docs. 2019-10-23 20:13:17 +00:00			`extractedLines = []`
Upgrade isolate_tests.py to support extracting code from docs 2017-07-10 21:52:47 +00:00			`tests = []`

Also extract tests that do not start with a pragma. 2018-08-09 18:48:41 +00:00			`# Collect all snippets of indented blocks`
Read/write files in python with newline='' option to preserve newlines as \n on Windows 2021-01-19 15:56:27 +00:00			`for l in open(path, mode='r', errors='ignore', encoding='utf8', newline='').read().splitlines():`
Also extract tests that do not start with a pragma. 2018-08-09 18:48:41 +00:00			`if l != '':`
			`if not inside and l.startswith(' '):`
			`# start new test`
Fixes source extraction from docs. 2019-10-23 20:13:17 +00:00			`extractedLines += ['']`
Also extract tests that do not start with a pragma. 2018-08-09 18:48:41 +00:00			`inside = l.startswith(' ')`
			`if inside:`
Fixes source extraction from docs. 2019-10-23 20:13:17 +00:00			`extractedLines[-1] += l + '\n'`

Update test extraction script to recognize license identifier. 2020-05-13 15:41:56 +00:00			`codeStart = "(// SPDX-License-Identifier:\|pragma solidity\|contract.{\|library.{\|interface.*{)"`
Fixes source extraction from docs. 2019-10-23 20:13:17 +00:00
Fix typos in isolate_tests.py 2021-03-10 19:40:18 +00:00			`# Filter all tests that do not contain Solidity or are indented incorrectly.`
Fixes source extraction from docs. 2019-10-23 20:13:17 +00:00			`for lines in extractedLines:`
			`if re.search(r'^\s{0,3}' + codeStart, lines, re.MULTILINE):`
Fix typos in isolate_tests.py 2021-03-10 19:40:18 +00:00			`print("Indentation error in " + path + ":")`
Fixes source extraction from docs. 2019-10-23 20:13:17 +00:00			`print(lines)`
			`exit(1)`
			`if re.search(r'^\s{4}' + codeStart, lines, re.MULTILINE):`
			`tests.append(lines)`

			`return tests`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00
Include origin filename in the filenames generated by isolate_tests.py. 2018-09-06 09:37:44 +00:00			`def write_cases(f, tests):`
			`cleaned_filename = f.replace(".","_").replace("-","_").replace(" ","_").lower()`
Isolate test cases from all tests and store under hash. 2017-03-22 19:19:20 +00:00			`for test in tests:`
Fix typos in isolate_tests.py 2021-03-10 19:40:18 +00:00			`# When code examples are extracted they are indented by 8 spaces, which violates the style guide,`
Add style checker Rename files Changes from review Update test/docsCodeStyle.sh Co-Authored-By: chriseth <chris@ethereum.org> Update test/docsCodeStyle.sh Co-Authored-By: chriseth <chris@ethereum.org> Remove extraneous brackets 2019-08-05 11:00:30 +00:00			`# so before checking remove 4 spaces from each line.`
			`remainder = re.sub(r'^ {4}', '', test, 0, re.MULTILINE)`
Use Python 3 instead of Python 2 (EOL'd) 2020-01-13 15:14:18 +00:00			`sol_filename = 'test_%s_%s.sol' % (hashlib.sha256(test.encode("utf-8")).hexdigest(), cleaned_filename)`
Read/write files in python with newline='' option to preserve newlines as \n on Windows 2021-01-19 15:56:27 +00:00			`open(sol_filename, mode='w', encoding='utf8', newline='').write(remainder)`
Change isolate_tests to support single files as an input On the documentation the examples for the usage of isolate_tests.py are shown with single files, and it's currently not working. It only works for folders or wildcards that return more than one file, since that's how os.walk works within a loop for that cases. Proposed an simple and easy fix. I extracted the core functionality for extracting tests from files, and made another function called `extract_and_write` If the program receives a single file the function `extract_and_write` is called once, it even works for `docs` when specified. If the program receives a path or a wildcard, works as used to. 2018-07-05 00:20:17 +00:00
			`def extract_and_write(f, path):`
Fix inconsistent indentation in scripts/ - NOT reindenting all files. Just choosing one style in files that were using multiple. 2020-04-17 12:32:38 +00:00			`if docs:`
			`cases = extract_docs_cases(path)`
			`else:`
			`if f.endswith('.sol'):`
Read/write files in python with newline='' option to preserve newlines as \n on Windows 2021-01-19 15:56:27 +00:00			`cases = [open(path, mode='r', encoding='utf8', newline='').read()]`
Change isolate_tests to support single files as an input On the documentation the examples for the usage of isolate_tests.py are shown with single files, and it's currently not working. It only works for folders or wildcards that return more than one file, since that's how os.walk works within a loop for that cases. Proposed an simple and easy fix. I extracted the core functionality for extracting tests from files, and made another function called `extract_and_write` If the program receives a single file the function `extract_and_write` is called once, it even works for `docs` when specified. If the program receives a path or a wildcard, works as used to. 2018-07-05 00:20:17 +00:00			`else:`
Fix inconsistent indentation in scripts/ - NOT reindenting all files. Just choosing one style in files that were using multiple. 2020-04-17 12:32:38 +00:00			`cases = extract_test_cases(path)`
			`write_cases(f, cases)`
Change isolate_tests to support single files as an input On the documentation the examples for the usage of isolate_tests.py are shown with single files, and it's currently not working. It only works for folders or wildcards that return more than one file, since that's how os.walk works within a loop for that cases. Proposed an simple and easy fix. I extracted the core functionality for extracting tests from files, and made another function called `extract_and_write` If the program receives a single file the function `extract_and_write` is called once, it even works for `docs` when specified. If the program receives a path or a wildcard, works as used to. 2018-07-05 00:20:17 +00:00
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00			`if __name__ == '__main__':`
Isolate test cases from all tests and store under hash. 2017-03-22 19:19:20 +00:00			`path = sys.argv[1]`
Upgrade isolate_tests.py to support extracting code from docs 2017-07-10 21:52:47 +00:00			`docs = False`
			`if len(sys.argv) > 2 and sys.argv[2] == 'docs':`
Fix inconsistent indentation in scripts/ - NOT reindenting all files. Just choosing one style in files that were using multiple. 2020-04-17 12:32:38 +00:00			`docs = True`
Cleanup and fix scripts/isolate_tests.py 2016-12-06 22:21:38 +00:00
Change isolate_tests to support single files as an input On the documentation the examples for the usage of isolate_tests.py are shown with single files, and it's currently not working. It only works for folders or wildcards that return more than one file, since that's how os.walk works within a loop for that cases. Proposed an simple and easy fix. I extracted the core functionality for extracting tests from files, and made another function called `extract_and_write` If the program receives a single file the function `extract_and_write` is called once, it even works for `docs` when specified. If the program receives a path or a wildcard, works as used to. 2018-07-05 00:20:17 +00:00			`if isfile(path):`
			`extract_and_write(path, path)`
Remove trailing whitespace for all files in the repository. 2018-09-03 13:01:15 +00:00			`else:`
Change isolate_tests to support single files as an input On the documentation the examples for the usage of isolate_tests.py are shown with single files, and it's currently not working. It only works for folders or wildcards that return more than one file, since that's how os.walk works within a loop for that cases. Proposed an simple and easy fix. I extracted the core functionality for extracting tests from files, and made another function called `extract_and_write` If the program receives a single file the function `extract_and_write` is called once, it even works for `docs` when specified. If the program receives a path or a wildcard, works as used to. 2018-07-05 00:20:17 +00:00			`for root, subdirs, files in os.walk(path):`
			`if '_build' in subdirs:`
			`subdirs.remove('_build')`
			`if 'compilationTests' in subdirs:`
			`subdirs.remove('compilationTests')`
			`for f in files:`
Improve error coverage of syntax checker 2020-08-20 00:35:15 +00:00			`_, tail = split(f)`
			`if tail == "invalid_utf8_sequence.sol":`
			`continue # ignore the test with broken utf-8 encoding`
Change isolate_tests to support single files as an input On the documentation the examples for the usage of isolate_tests.py are shown with single files, and it's currently not working. It only works for folders or wildcards that return more than one file, since that's how os.walk works within a loop for that cases. Proposed an simple and easy fix. I extracted the core functionality for extracting tests from files, and made another function called `extract_and_write` If the program receives a single file the function `extract_and_write` is called once, it even works for `docs` when specified. If the program receives a path or a wildcard, works as used to. 2018-07-05 00:20:17 +00:00			`path = join(root, f)`
			`extract_and_write(f, path)`