From 7f65f1cc02861077f84465d92b561dd1c0f1f758 Mon Sep 17 00:00:00 2001
From: Marenz <github@supradigital.org>
Date: Thu, 1 Jul 2021 15:17:42 +0200
Subject: [PATCH 1/3] Fix code-start headers in documentation

---
 docs/contracts/events.rst | 2 +-
 docs/natspec-format.rst   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/contracts/events.rst b/docs/contracts/events.rst
index aae16fb8b..d282499dc 100644
--- a/docs/contracts/events.rst
+++ b/docs/contracts/events.rst
@@ -73,7 +73,7 @@ four indexed arguments rather than three.
     In particular, it is possible to "fake" the signature of another event
     using an anonymous event.
 
-::
+.. code-block:: solidity
 
     // SPDX-License-Identifier: GPL-3.0
     pragma solidity >=0.4.21 <0.9.0;
diff --git a/docs/natspec-format.rst b/docs/natspec-format.rst
index 7007fe4a1..a49ddd895 100644
--- a/docs/natspec-format.rst
+++ b/docs/natspec-format.rst
@@ -58,7 +58,7 @@ The following example shows a contract and a function using all available tags.
 
   This may change in the future.
 
-.. code:: Solidity
+.. code-block:: Solidity
 
     // SPDX-License-Identifier: GPL-3.0
     pragma solidity >=0.8.2 < 0.9.0;

From f62b80530bf2b5f1df053ea61fb1ac1067ef783c Mon Sep 17 00:00:00 2001
From: Marenz <github@supradigital.org>
Date: Thu, 1 Jul 2021 15:18:32 +0200
Subject: [PATCH 2/3] Fix isolate_tests when run with single files and add
 usage information

---
 scripts/isolate_tests.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/scripts/isolate_tests.py b/scripts/isolate_tests.py
index 494c2a7aa..57d1c991a 100755
--- a/scripts/isolate_tests.py
+++ b/scripts/isolate_tests.py
@@ -106,13 +106,18 @@ def extract_and_write(f, path):
     write_cases(f, cases)
 
 if __name__ == '__main__':
+    if len(sys.argv) == 1:
+        print("Usage: " + sys.argv[0] + " path-to-file-or-folder-to-extract-code-from [docs]")
+        exit(1)
+
     path = sys.argv[1]
     docs = False
     if len(sys.argv) > 2 and sys.argv[2] == 'docs':
         docs = True
 
     if isfile(path):
-        extract_and_write(path, path)
+        _, tail = split(path)
+        extract_and_write(tail, path)
     else:
         for root, subdirs, files in os.walk(path):
             if '_build' in subdirs:

From 5291ca2dd4ee6b9ce9ebef54a44610df848ec8b1 Mon Sep 17 00:00:00 2001
From: Marenz <github@supradigital.org>
Date: Thu, 1 Jul 2021 15:19:38 +0200
Subject: [PATCH 3/3] isolate_tests: Extract code blocks from documentation
 using code block header

---
 docs/contributing.rst                         |   2 +-
 scripts/docs_version_pragma_check.sh          |   2 +-
 scripts/isolate_tests.py                      | 106 +++++++++---------
 .../docker-scripts/isolate_tests.py           |   3 +
 test/cmdlineTests.sh                          |   4 +-
 test/docsCodeStyle.sh                         |   2 +-
 6 files changed, 59 insertions(+), 60 deletions(-)

diff --git a/docs/contributing.rst b/docs/contributing.rst
index eaae6db3e..62d5bad1f 100644
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@@ -324,7 +324,7 @@ from the documentation or the other tests:
     # extract from tests:
     path/to/solidity/scripts/isolate_tests.py path/to/solidity/test/libsolidity/SolidityEndToEndTest.cpp
     # extract from documentation:
-    path/to/solidity/scripts/isolate_tests.py path/to/solidity/docs docs
+    path/to/solidity/scripts/isolate_tests.py path/to/solidity/docs
 
 The AFL documentation states that the corpus (the initial input files) should not be
 too large. The files themselves should not be larger than 1 kB and there should be
diff --git a/scripts/docs_version_pragma_check.sh b/scripts/docs_version_pragma_check.sh
index 8ffd0afb6..d7ca3f2ad 100755
--- a/scripts/docs_version_pragma_check.sh
+++ b/scripts/docs_version_pragma_check.sh
@@ -136,7 +136,7 @@ SOLTMPDIR=$(mktemp -d)
 (
     set -e
     cd "$SOLTMPDIR"
-    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/ docs
+    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/
 
     getAllAvailableVersions
 
diff --git a/scripts/isolate_tests.py b/scripts/isolate_tests.py
index 57d1c991a..fdb398c4b 100755
--- a/scripts/isolate_tests.py
+++ b/scripts/isolate_tests.py
@@ -10,7 +10,8 @@ import sys
 import re
 import os
 import hashlib
-from os.path import join, isfile, split
+from os.path import join, isfile, split, basename
+from argparse import ArgumentParser
 
 def extract_test_cases(path):
     with open(path, encoding="utf8", errors='ignore', mode='r', newline='') as file:
@@ -35,54 +36,50 @@ def extract_test_cases(path):
 
     return tests
 
-# Contract sources are indented by 4 spaces.
-# Look for `pragma solidity`, `contract`, `library` or `interface`
-# and abort a line not indented properly.
+# Extract code examples based on a start marker
+# up until we reach EOF or a line that is not empty and doesn't start with 4
+# spaces.
 def extract_docs_cases(path):
+    beginMarkers = ['.. code-block:: solidity', '::']
+    immediatelyAfterMarker = False
     insideBlock = False
-    insideBlockParameters = False
-    pastBlockParameters = False
-    extractedLines = []
     tests = []
 
     # Collect all snippets of indented blocks
-
     with open(path, mode='r', errors='ignore', encoding='utf8', newline='') as f:
         lines = f.read().splitlines()
-    for l in lines:
-        if l != '':
-            if not insideBlock and l.startswith(' '):
-                # start new test
-                extractedLines += ['']
-                insideBlockParameters = False
-                pastBlockParameters = False
-            insideBlock = l.startswith(' ')
-        if insideBlock:
-            if not pastBlockParameters:
-                # NOTE: For simplicity this allows blank lines between block parameters even
-                # though Sphinx does not. This does not matter since the first non-empty line in
-                # a Solidity file cannot start with a colon anyway.
-                if not l.strip().startswith(':') and (l != '' or not insideBlockParameters):
-                    insideBlockParameters = False
-                    pastBlockParameters = True
-                else:
-                    insideBlockParameters = True
 
-            if not insideBlockParameters:
-                extractedLines[-1] += l + '\n'
+    for line in lines:
+        if insideBlock:
+            if immediatelyAfterMarker:
+                # Skip Sphinx instructions and empty lines between them
+                if line == '' or line.lstrip().startswith(":"):
+                    continue
+
+            if line == '' or line.startswith(" "):
+                tests[-1] += line + "\n"
+                immediatelyAfterMarker = False
+            else:
+                insideBlock = False
+        elif any(map(line.lower().startswith, beginMarkers)):
+            insideBlock = True
+            immediatelyAfterMarker = True
+            tests += ['']
 
     codeStart = "(// SPDX-License-Identifier:|pragma solidity|contract.*{|library.*{|interface.*{)"
 
-    # Filter all tests that do not contain Solidity or are indented incorrectly.
-    for lines in extractedLines:
-        if re.search(r'^\s{0,3}' + codeStart, lines, re.MULTILINE):
+    for test in tests:
+        if re.search(r'^\s{0,3}' + codeStart, test, re.MULTILINE):
             print("Indentation error in " + path + ":")
-            print(lines)
+            print(test)
             exit(1)
-        if re.search(r'^\s{4}' + codeStart, lines, re.MULTILINE):
-            tests.append(lines)
 
-    return tests
+    # Filter out tests that are not supposed to be compilable.
+    return [
+        test.lstrip("\n")
+        for test in tests
+        if re.search(r'^\s{4}' + codeStart, test, re.MULTILINE) is not None
+    ]
 
 def write_cases(f, tests):
     cleaned_filename = f.replace(".","_").replace("-","_").replace(" ","_").lower()
@@ -94,30 +91,30 @@ def write_cases(f, tests):
         with open(sol_filename, mode='w', encoding='utf8', newline='') as fi:
             fi.write(remainder)
 
-def extract_and_write(f, path):
-    if docs:
+def extract_and_write(path):
+    if path.lower().endswith('.rst'):
         cases = extract_docs_cases(path)
+    elif path.endswith('.sol'):
+        with open(path, mode='r', encoding='utf8', newline='') as f:
+            cases = [f.read()]
     else:
-        if f.endswith('.sol'):
-            with open(path, mode='r', encoding='utf8', newline='') as _f:
-                cases = [_f.read()]
-        else:
-            cases = extract_test_cases(path)
-    write_cases(f, cases)
+        cases = extract_test_cases(path)
+
+    write_cases(basename(path), cases)
 
 if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print("Usage: " + sys.argv[0] + " path-to-file-or-folder-to-extract-code-from [docs]")
-        exit(1)
+    script_description = (
+        "Reads Solidity, C++ or RST source files and extracts compilable solidity and yul code blocks from them. "
+        "Can be used to generate test cases to validade code examples. "
+    )
 
-    path = sys.argv[1]
-    docs = False
-    if len(sys.argv) > 2 and sys.argv[2] == 'docs':
-        docs = True
+    parser = ArgumentParser(description=script_description)
+    parser.add_argument(dest='path', help='Path to file or directory to look for code in.')
+    options = parser.parse_args()
+    path = options.path
 
     if isfile(path):
-        _, tail = split(path)
-        extract_and_write(tail, path)
+        extract_and_write(path)
     else:
         for root, subdirs, files in os.walk(path):
             if '_build' in subdirs:
@@ -125,8 +122,7 @@ if __name__ == '__main__':
             if 'compilationTests' in subdirs:
                 subdirs.remove('compilationTests')
             for f in files:
-                _, tail = split(f)
-                if tail == "invalid_utf8_sequence.sol":
+                if basename(f) == "invalid_utf8_sequence.sol":
                     continue  # ignore the test with broken utf-8 encoding
                 path = join(root, f)
-                extract_and_write(f, path)
+                extract_and_write(path)
diff --git a/scripts/wasm-rebuild/docker-scripts/isolate_tests.py b/scripts/wasm-rebuild/docker-scripts/isolate_tests.py
index 973140e51..568c61423 100755
--- a/scripts/wasm-rebuild/docker-scripts/isolate_tests.py
+++ b/scripts/wasm-rebuild/docker-scripts/isolate_tests.py
@@ -1,4 +1,7 @@
 #!/usr/bin/env python2
+#
+# Not actively tested or maintained. Exists in case we want to rebuild an
+# ancient release.
 
 import sys
 import re
diff --git a/test/cmdlineTests.sh b/test/cmdlineTests.sh
index 4d99961ca..7a14fe6c8 100755
--- a/test/cmdlineTests.sh
+++ b/test/cmdlineTests.sh
@@ -361,7 +361,7 @@ SOLTMPDIR=$(mktemp -d)
 (
     set -e
     cd "$SOLTMPDIR"
-    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/ docs
+    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/
     developmentVersion=$("$REPO_ROOT/scripts/get_version.sh")
 
     for f in *.sol
@@ -510,7 +510,7 @@ SOLTMPDIR=$(mktemp -d)
     set -e
     cd "$SOLTMPDIR"
     "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/test/
-    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/ docs
+    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/
 
     echo ./*.sol | xargs -P 4 -n 50 "${SOLIDITY_BUILD_DIR}/test/tools/solfuzzer" --quiet --input-files
     echo ./*.sol | xargs -P 4 -n 50 "${SOLIDITY_BUILD_DIR}/test/tools/solfuzzer" --without-optimizer --quiet --input-files
diff --git a/test/docsCodeStyle.sh b/test/docsCodeStyle.sh
index 9e082b7fb..33b787e25 100755
--- a/test/docsCodeStyle.sh
+++ b/test/docsCodeStyle.sh
@@ -22,7 +22,7 @@ SOLTMPDIR=$(mktemp -d)
 (
     set -e
     cd "$SOLTMPDIR"
-    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/ docs
+    "$REPO_ROOT"/scripts/isolate_tests.py "$REPO_ROOT"/docs/
 
     if npm -v >/dev/null 2>&1; then
         if npm list -g | grep solhint >/dev/null 2>&1; then