Merge pull request #11605 from ethereum/issue-8191

isolate_tests: Extract code blocks from documentation using code block header
2023-10-03 13:03:40 +00:00 · 2021-07-08 14:28:47 +02:00 · 2021-07-08 14:28:47 +02:00 · 62934b60ca
commit 62934b60ca
parent 3f70acfd74 5291ca2dd4
8 changed files with 62 additions and 58 deletions
--- a/docs/contracts/events.rst
+++ b/docs/contracts/events.rst
@ -73,7 +73,7 @@ four indexed arguments rather than three.
    In particular, it is possible to "fake" the signature of another event
    using an anonymous event.
-::
+.. code-block:: solidity
    // SPDX-License-Identifier: GPL-3.0
    pragma solidity >=0.4.21 <0.9.0;
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@ -324,7 +324,7 @@ from the documentation or the other tests:
    # extract from tests:
    path/to/solidity/scripts/isolate_tests.py path/to/solidity/test/libsolidity/SolidityEndToEndTest.cpp
    # extract from documentation:
-    path/to/solidity/scripts/isolate_tests.py path/to/solidity/docs docs
+    path/to/solidity/scripts/isolate_tests.py path/to/solidity/docs
 The AFL documentation states that the corpus (the initial input files) should not be
 too large. The files themselves should not be larger than 1 kB and there should be
--- a/docs/natspec-format.rst
+++ b/docs/natspec-format.rst
@ -58,7 +58,7 @@ The following example shows a contract and a function using all available tags.
  This may change in the future.
-.. code:: Solidity
+.. code-block:: Solidity
    // SPDX-License-Identifier: GPL-3.0
    pragma solidity >=0.8.2 < 0.9.0;
--- a/scripts/docs_version_pragma_check.sh
+++ b/scripts/docs_version_pragma_check.sh
@ -136,7 +136,7 @@ SOLTMPDIR=$(mktemp -d)
 (
    set -e
    cd "$SOLTMPDIR"
-    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/ docs
+    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/
    getAllAvailableVersions
--- a/scripts/isolate_tests.py
+++ b/scripts/isolate_tests.py
@ -10,7 +10,8 @@ import sys
 import re
 import os
 import hashlib
-from os.path import join, isfile, split
+from os.path import join, isfile, split, basename
 from argparse import ArgumentParser
 def extract_test_cases(path):
    with open(path, encoding="utf8", errors='ignore', mode='r', newline='') as file:
@ -35,54 +36,50 @@ def extract_test_cases(path):
    return tests
-# Contract sources are indented by 4 spaces.
+# Extract code examples based on a start marker
-# Look for `pragma solidity`, `contract`, `library` or `interface`
+# up until we reach EOF or a line that is not empty and doesn't start with 4
-# and abort a line not indented properly.
+# spaces.
 def extract_docs_cases(path):
    beginMarkers = ['.. code-block:: solidity', '::']
    immediatelyAfterMarker = False
    insideBlock = False
    insideBlockParameters = False
    pastBlockParameters = False
    extractedLines = []
    tests = []
    # Collect all snippets of indented blocks
    with open(path, mode='r', errors='ignore', encoding='utf8', newline='') as f:
        lines = f.read().splitlines()
    for l in lines:
        if l != '':
            if not insideBlock and l.startswith(' '):
                # start new test
                extractedLines += ['']
                insideBlockParameters = False
                pastBlockParameters = False
            insideBlock = l.startswith(' ')
        if insideBlock:
            if not pastBlockParameters:
                # NOTE: For simplicity this allows blank lines between block parameters even
                # though Sphinx does not. This does not matter since the first non-empty line in
                # a Solidity file cannot start with a colon anyway.
                if not l.strip().startswith(':') and (l != '' or not insideBlockParameters):
                    insideBlockParameters = False
                    pastBlockParameters = True
                else:
                    insideBlockParameters = True
-            if not insideBlockParameters:
+    for line in lines:
-                extractedLines[-1] += l + '\n'
+        if insideBlock:
            if immediatelyAfterMarker:
                # Skip Sphinx instructions and empty lines between them
                if line == '' or line.lstrip().startswith(":"):
                    continue
            if line == '' or line.startswith(" "):
                tests[-1] += line + "\n"
                immediatelyAfterMarker = False
            else:
                insideBlock = False
        elif any(map(line.lower().startswith, beginMarkers)):
            insideBlock = True
            immediatelyAfterMarker = True
            tests += ['']
    codeStart = "(// SPDX-License-Identifier:|pragma solidity|contract.*{|library.*{|interface.*{)"
-    # Filter all tests that do not contain Solidity or are indented incorrectly.
+    for test in tests:
-    for lines in extractedLines:
+        if re.search(r'^\s{0,3}' + codeStart, test, re.MULTILINE):
        if re.search(r'^\s{0,3}' + codeStart, lines, re.MULTILINE):
            print("Indentation error in " + path + ":")
-            print(lines)
+            print(test)
            exit(1)
        if re.search(r'^\s{4}' + codeStart, lines, re.MULTILINE):
            tests.append(lines)
-    return tests
+    # Filter out tests that are not supposed to be compilable.
    return [
        test.lstrip("\n")
        for test in tests
        if re.search(r'^\s{4}' + codeStart, test, re.MULTILINE) is not None
    ]
 def write_cases(f, tests):
    cleaned_filename = f.replace(".","_").replace("-","_").replace(" ","_").lower()
@ -94,25 +91,30 @@ def write_cases(f, tests):
        with open(sol_filename, mode='w', encoding='utf8', newline='') as fi:
            fi.write(remainder)
-def extract_and_write(f, path):
+def extract_and_write(path):
-    if docs:
+    if path.lower().endswith('.rst'):
        cases = extract_docs_cases(path)
    elif path.endswith('.sol'):
        with open(path, mode='r', encoding='utf8', newline='') as f:
            cases = [f.read()]
    else:
-        if f.endswith('.sol'):
+        cases = extract_test_cases(path)
-            with open(path, mode='r', encoding='utf8', newline='') as _f:
+
-                cases = [_f.read()]
+    write_cases(basename(path), cases)
        else:
            cases = extract_test_cases(path)
    write_cases(f, cases)
 if __name__ == '__main__':
-    path = sys.argv[1]
+    script_description = (
-    docs = False
+        "Reads Solidity, C++ or RST source files and extracts compilable solidity and yul code blocks from them. "
-    if len(sys.argv) > 2 and sys.argv[2] == 'docs':
+        "Can be used to generate test cases to validade code examples. "
-        docs = True
+    )
    parser = ArgumentParser(description=script_description)
    parser.add_argument(dest='path', help='Path to file or directory to look for code in.')
    options = parser.parse_args()
    path = options.path
    if isfile(path):
-        extract_and_write(path, path)
+        extract_and_write(path)
    else:
        for root, subdirs, files in os.walk(path):
            if '_build' in subdirs:
@ -120,8 +122,7 @@ if __name__ == '__main__':
            if 'compilationTests' in subdirs:
                subdirs.remove('compilationTests')
            for f in files:
-                _, tail = split(f)
+                if basename(f) == "invalid_utf8_sequence.sol":
                if tail == "invalid_utf8_sequence.sol":
                    continue  # ignore the test with broken utf-8 encoding
                path = join(root, f)
-                extract_and_write(f, path)
+                extract_and_write(path)
--- a/scripts/wasm-rebuild/docker-scripts/isolate_tests.py
+++ b/scripts/wasm-rebuild/docker-scripts/isolate_tests.py
@ -1,4 +1,7 @@
 #!/usr/bin/env python2
 #
 # Not actively tested or maintained. Exists in case we want to rebuild an
 # ancient release.
 import sys
 import re
--- a/test/cmdlineTests.sh
+++ b/test/cmdlineTests.sh
@ -361,7 +361,7 @@ SOLTMPDIR=$(mktemp -d)
 (
    set -e
    cd "$SOLTMPDIR"
-    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/ docs
+    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/
    developmentVersion=$("$REPO_ROOT/scripts/get_version.sh")
    for f in *.sol
@ -510,7 +510,7 @@ SOLTMPDIR=$(mktemp -d)
    set -e
    cd "$SOLTMPDIR"
    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/test/
-    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/ docs
+    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/
    echo ./*.sol | xargs -P 4 -n 50 "${SOLIDITY_BUILD_DIR}/test/tools/solfuzzer" --quiet --input-files
    echo ./*.sol | xargs -P 4 -n 50 "${SOLIDITY_BUILD_DIR}/test/tools/solfuzzer" --without-optimizer --quiet --input-files
--- a/test/docsCodeStyle.sh
+++ b/test/docsCodeStyle.sh
@ -22,7 +22,7 @@ SOLTMPDIR=$(mktemp -d)
 (
    set -e
    cd "$SOLTMPDIR"
-    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/ docs
+    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/
    if npm -v >/dev/null 2>&1; then
        if npm list -g | grep solhint >/dev/null 2>&1; then