solidity/scripts/error_codes.py

#! /usr/bin/env python3
import random
import re
import os
import getopt
import sys
from os import path

ENCODING = "utf-8"
SOURCE_FILE_PATTERN = r"\b\d+_error\b"


def read_file(file_name):
    content = None
    try:
        with open(file_name, "r", encoding=ENCODING) as f:
            content = f.read()
    finally:
        if content == None:
            print(f"Error reading: {file_name}")
    return content


def write_file(file_name, content):
    with open(file_name, "w", encoding=ENCODING) as f:
        f.write(content)


def in_comment(source, pos):
    slash_slash_pos = source.rfind("//", 0, pos)
    lf_pos = source.rfind("\n", 0, pos)
    if slash_slash_pos > lf_pos:
        return True
    slash_star_pos = source.rfind("/*", 0, pos)
    star_slash_pos = source.rfind("*/", 0, pos)
    return slash_star_pos > star_slash_pos


def find_ids_in_source_file(file_name, ids):
    source = read_file(file_name)
    for m in re.finditer(SOURCE_FILE_PATTERN, source):
        if in_comment(source, m.start()):
            continue
        underscore_pos = m.group(0).index("_")
        id = m.group(0)[0:underscore_pos]
        if id in ids:
            ids[id] += 1
        else:
            ids[id] = 1


def get_used_ids(file_names):
    used_ids = {}
    for file_name in file_names:
        find_ids_in_source_file(file_name, used_ids)
    return used_ids


def get_id(available_ids, used_ids):
    while len(available_ids) > 0:
        k = random.randrange(len(available_ids))
        id = list(available_ids.keys())[k]
        del available_ids[id]
        if id not in used_ids:
            return id
    assert False, "Out of IDs"


def fix_ids_in_file(file_name, available_ids, used_ids):
    source = read_file(file_name)

    k = 0
    destination = []
    for m in re.finditer(SOURCE_FILE_PATTERN, source):
        destination.extend(source[k:m.start()])

        underscore_pos = m.group(0).index("_")
        id = m.group(0)[0:underscore_pos]

        # incorrect id or id has a duplicate somewhere
        if not in_comment(source, m.start()) and (len(id) != 4 or id[0] == "0" or used_ids[id] > 1):
            assert id in used_ids
            new_id = get_id(available_ids, used_ids)
            used_ids[id] -= 1
        else:
            new_id = id

        destination.extend(new_id + "_error")
        k = m.end()

    destination.extend(source[k:])

    destination = ''.join(destination)
    if source != destination:
        write_file(file_name, destination)
        print(f"Fixed file: {file_name}")


def fix_ids(used_ids, file_names):
    available_ids = {str(id): None for id in range(1000, 10000)}
    for file_name in file_names:
        fix_ids_in_file(file_name, available_ids, used_ids)


def find_files(top_dir, sub_dirs, extensions):
    """Builds a list of files with given extensions in specified subdirectories"""

    source_file_names = []
    for dir in sub_dirs:
        for root, _, file_names in os.walk(os.path.join(top_dir, dir), onerror=lambda e: exit(f"Walk error: {e}")):
            for file_name in file_names:
                _, ext = path.splitext(file_name)
                if ext in extensions:
                    source_file_names.append(path.join(root, file_name))

    return source_file_names


def find_ids_in_test_file(file_name):
    source = read_file(file_name)
    pattern = r"^// (.*Error|Warning) \d\d\d\d:"
    return {m.group(0)[-5:-1] for m in re.finditer(pattern, source, flags=re.MULTILINE)}


def find_ids_in_test_files(file_names):
    used_ids = set()
    for file_name in file_names:
        used_ids |= find_ids_in_test_file(file_name)
    return used_ids


def print_ids(ids):
    for k, id in enumerate(sorted(ids)):
        if k % 10 > 0:
            print(" ", end="")
        elif k > 0:
            print()
        print(id, end="")


def examine_id_coverage(top_dir, used_ids):
    test_sub_dirs = [
        path.join("test", "libsolidity", "errorRecoveryTests"),
        path.join("test", "libsolidity", "smtCheckerTests"),
        path.join("test", "libsolidity", "syntaxTests")
    ]
    test_file_names = find_files(
        top_dir,
        test_sub_dirs,
        [".sol"]
    )
    covered_ids = find_ids_in_test_files(test_file_names)

    print(f"IDs in source files: {len(used_ids)}")
    print(f"IDs in test files  : {len(covered_ids)} ({len(covered_ids) - len(used_ids)})")
    print()

    unused_covered_ids = covered_ids - used_ids
    if len(unused_covered_ids) != 0:
        print("Error. The following error codes found in tests, but not in sources:")
        print_ids(unused_covered_ids)
        return 1

    used_uncovered_ids = used_ids - covered_ids
    if len(used_uncovered_ids) != 0:
        print("The following error codes found in sources, but not in tests:")
        print_ids(used_uncovered_ids)
        print("\n\nPlease make sure to add appropriate tests.")
        return 1

    return 0


def main(argv):
    # pylint: disable=too-many-branches, too-many-locals

    check = False
    fix = False
    noconfirm = False
    examine_coverage = False
    opts, args = getopt.getopt(argv, "", ["check", "fix", "noconfirm", "examine-coverage"])

    for opt, arg in opts:
        if opt == '--check':
            check = True
        elif opt == "--fix":
            fix = True
        elif opt == '--noconfirm':
            noconfirm = True
        elif opt == '--examine-coverage':
            examine_coverage = True

    if not check and not fix and not examine_coverage:
        print("usage: python error_codes.py --check | --fix [--noconfirm] | --examine-coverage")
        exit(1)

    cwd = os.getcwd()

    source_file_names = find_files(
        cwd,
        ["libevmasm", "liblangutil", "libsolc", "libsolidity", "libsolutil", "libyul", "solc"],
        [".h", ".cpp"]
    )
    used_ids = get_used_ids(source_file_names)

    ok = True
    for id in sorted(used_ids):
        if len(id) != 4:
            print(f"ID {id} length != 4")
            ok = False
        if id[0] == "0":
            print(f"ID {id} starts with zero")
            ok = False
        if used_ids[id] > 1:
            print(f"ID {id} appears {used_ids[id]} times")
            ok = False

    if examine_coverage:
        if not ok:
            print("Incorrect IDs has to be fixed before applying --examine-coverage")
        res = examine_id_coverage(cwd, used_ids.keys())
        exit(res)

    if ok:
        print("No incorrect IDs found")
        exit(0)

    if check:
        exit(1)

    assert fix, "Unexpected state, should not come here without --fix"

    if not noconfirm:
        answer = input(
            "\nDo you want to fix incorrect IDs?\n"
            "Please commit current changes first, and review the results when the script finishes.\n"
            "[Y/N]? "
        )
        while len(answer) == 0 or answer not in "YNyn":
            answer = input("[Y/N]? ")
        if answer not in "yY":
            exit(1)

    random.seed()
    fix_ids(used_ids, source_file_names)
    print("Fixing completed")
    exit(2)


if __name__ == "__main__":
    main(sys.argv[1:])
Check for wrong error codes in the CI 2020-05-27 10:13:37 +00:00			`#! /usr/bin/env python3`
Add a script to correct IDs 2020-05-06 12:25:13 +00:00			`import random`
			`import re`
			`import os`
Check for wrong error codes in the CI 2020-05-27 10:13:37 +00:00			`import getopt`
			`import sys`
Add a script to correct IDs 2020-05-06 12:25:13 +00:00			`from os import path`

			`ENCODING = "utf-8"`
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`SOURCE_FILE_PATTERN = r"\b\d+_error\b"`
Add a script to correct IDs 2020-05-06 12:25:13 +00:00

			`def read_file(file_name):`
			`content = None`
			`try:`
			`with open(file_name, "r", encoding=ENCODING) as f:`
			`content = f.read()`
			`finally:`
			`if content == None:`
			`print(f"Error reading: {file_name}")`
			`return content`


			`def write_file(file_name, content):`
			`with open(file_name, "w", encoding=ENCODING) as f:`
			`f.write(content)`


			`def in_comment(source, pos):`
			`slash_slash_pos = source.rfind("//", 0, pos)`
			`lf_pos = source.rfind("\n", 0, pos)`
			`if slash_slash_pos > lf_pos:`
			`return True`
			`slash_star_pos = source.rfind("/*", 0, pos)`
			`star_slash_pos = source.rfind("*/", 0, pos)`
			`return slash_star_pos > star_slash_pos`


Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`def find_ids_in_source_file(file_name, ids):`
Add a script to correct IDs 2020-05-06 12:25:13 +00:00			`source = read_file(file_name)`
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`for m in re.finditer(SOURCE_FILE_PATTERN, source):`
Add a script to correct IDs 2020-05-06 12:25:13 +00:00			`if in_comment(source, m.start()):`
			`continue`
			`underscore_pos = m.group(0).index("_")`
			`id = m.group(0)[0:underscore_pos]`
			`if id in ids:`
			`ids[id] += 1`
			`else:`
			`ids[id] = 1`


			`def get_used_ids(file_names):`
			`used_ids = {}`
			`for file_name in file_names:`
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`find_ids_in_source_file(file_name, used_ids)`
Add a script to correct IDs 2020-05-06 12:25:13 +00:00			`return used_ids`


			`def get_id(available_ids, used_ids):`
			`while len(available_ids) > 0:`
			`k = random.randrange(len(available_ids))`
			`id = list(available_ids.keys())[k]`
			`del available_ids[id]`
			`if id not in used_ids:`
			`return id`
			`assert False, "Out of IDs"`


			`def fix_ids_in_file(file_name, available_ids, used_ids):`
			`source = read_file(file_name)`

			`k = 0`
			`destination = []`
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`for m in re.finditer(SOURCE_FILE_PATTERN, source):`
Add a script to correct IDs 2020-05-06 12:25:13 +00:00			`destination.extend(source[k:m.start()])`

			`underscore_pos = m.group(0).index("_")`
			`id = m.group(0)[0:underscore_pos]`

			`# incorrect id or id has a duplicate somewhere`
			`if not in_comment(source, m.start()) and (len(id) != 4 or id[0] == "0" or used_ids[id] > 1):`
			`assert id in used_ids`
			`new_id = get_id(available_ids, used_ids)`
			`used_ids[id] -= 1`
			`else:`
			`new_id = id`

			`destination.extend(new_id + "_error")`
			`k = m.end()`

			`destination.extend(source[k:])`

			`destination = ''.join(destination)`
			`if source != destination:`
			`write_file(file_name, destination)`
			`print(f"Fixed file: {file_name}")`


			`def fix_ids(used_ids, file_names):`
			`available_ids = {str(id): None for id in range(1000, 10000)}`
			`for file_name in file_names:`
			`fix_ids_in_file(file_name, available_ids, used_ids)`


Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`def find_files(top_dir, sub_dirs, extensions):`
			`"""Builds a list of files with given extensions in specified subdirectories"""`
Add a script to correct IDs 2020-05-06 12:25:13 +00:00
			`source_file_names = []`
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`for dir in sub_dirs:`
Error ids only walk into a given set of directories 2020-05-12 07:30:29 +00:00			`for root, _, file_names in os.walk(os.path.join(top_dir, dir), onerror=lambda e: exit(f"Walk error: {e}")):`
			`for file_name in file_names:`
			`_, ext = path.splitext(file_name)`
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`if ext in extensions:`
Error ids only walk into a given set of directories 2020-05-12 07:30:29 +00:00			`source_file_names.append(path.join(root, file_name))`
Add a script to correct IDs 2020-05-06 12:25:13 +00:00
			`return source_file_names`


Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`def find_ids_in_test_file(file_name):`
			`source = read_file(file_name)`
			`pattern = r"^// (.*Error\|Warning) \d\d\d\d:"`
			`return {m.group(0)[-5:-1] for m in re.finditer(pattern, source, flags=re.MULTILINE)}`


			`def find_ids_in_test_files(file_names):`
			`used_ids = set()`
			`for file_name in file_names:`
			`used_ids \|= find_ids_in_test_file(file_name)`
			`return used_ids`


			`def print_ids(ids):`
			`for k, id in enumerate(sorted(ids)):`
			`if k % 10 > 0:`
			`print(" ", end="")`
			`elif k > 0:`
			`print()`
			`print(id, end="")`


			`def examine_id_coverage(top_dir, used_ids):`
			`test_sub_dirs = [`
			`path.join("test", "libsolidity", "errorRecoveryTests"),`
			`path.join("test", "libsolidity", "smtCheckerTests"),`
			`path.join("test", "libsolidity", "syntaxTests")`
			`]`
			`test_file_names = find_files(`
			`top_dir,`
			`test_sub_dirs,`
			`[".sol"]`
			`)`
			`covered_ids = find_ids_in_test_files(test_file_names)`

			`print(f"IDs in source files: {len(used_ids)}")`
			`print(f"IDs in test files : {len(covered_ids)} ({len(covered_ids) - len(used_ids)})")`
			`print()`

			`unused_covered_ids = covered_ids - used_ids`
			`if len(unused_covered_ids) != 0:`
			`print("Error. The following error codes found in tests, but not in sources:")`
			`print_ids(unused_covered_ids)`
			`return 1`

			`used_uncovered_ids = used_ids - covered_ids`
			`if len(used_uncovered_ids) != 0:`
			`print("The following error codes found in sources, but not in tests:")`
			`print_ids(used_uncovered_ids)`
			`print("\n\nPlease make sure to add appropriate tests.")`
			`return 1`

			`return 0`


Check for wrong error codes in the CI 2020-05-27 10:13:37 +00:00			`def main(argv):`
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`# pylint: disable=too-many-branches, too-many-locals`

			`check = False`
			`fix = False`
Check for wrong error codes in the CI 2020-05-27 10:13:37 +00:00			`noconfirm = False`
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`examine_coverage = False`
			`opts, args = getopt.getopt(argv, "", ["check", "fix", "noconfirm", "examine-coverage"])`
Check for wrong error codes in the CI 2020-05-27 10:13:37 +00:00
			`for opt, arg in opts:`
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`if opt == '--check':`
			`check = True`
			`elif opt == "--fix":`
			`fix = True`
Refactor error ID checker script 2020-06-02 00:52:29 +00:00			`elif opt == '--noconfirm':`
Check for wrong error codes in the CI 2020-05-27 10:13:37 +00:00			`noconfirm = True`
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`elif opt == '--examine-coverage':`
			`examine_coverage = True`
Check for wrong error codes in the CI 2020-05-27 10:13:37 +00:00
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`if not check and not fix and not examine_coverage:`
			`print("usage: python error_codes.py --check \| --fix [--noconfirm] \| --examine-coverage")`
			`exit(1)`
Check for wrong error codes in the CI 2020-05-27 10:13:37 +00:00
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`cwd = os.getcwd()`
Add a script to correct IDs 2020-05-06 12:25:13 +00:00
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`source_file_names = find_files(`
			`cwd,`
			`["libevmasm", "liblangutil", "libsolc", "libsolidity", "libsolutil", "libyul", "solc"],`
			`[".h", ".cpp"]`
			`)`
Add a script to correct IDs 2020-05-06 12:25:13 +00:00			`used_ids = get_used_ids(source_file_names)`

			`ok = True`
			`for id in sorted(used_ids):`
			`if len(id) != 4:`
			`print(f"ID {id} length != 4")`
			`ok = False`
			`if id[0] == "0":`
			`print(f"ID {id} starts with zero")`
			`ok = False`
			`if used_ids[id] > 1:`
			`print(f"ID {id} appears {used_ids[id]} times")`
			`ok = False`

Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`if examine_coverage:`
			`if not ok:`
			`print("Incorrect IDs has to be fixed before applying --examine-coverage")`
			`res = examine_id_coverage(cwd, used_ids.keys())`
			`exit(res)`

Add a script to correct IDs 2020-05-06 12:25:13 +00:00			`if ok:`
			`print("No incorrect IDs found")`
Check for wrong error codes in the CI 2020-05-27 10:13:37 +00:00			`exit(0)`
Refactor error ID checker script 2020-06-02 00:52:29 +00:00
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`if check:`
Check for wrong error codes in the CI 2020-05-27 10:13:37 +00:00			`exit(1)`
Add a script to correct IDs 2020-05-06 12:25:13 +00:00
Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`assert fix, "Unexpected state, should not come here without --fix"`

Refactor error ID checker script 2020-06-02 00:52:29 +00:00			`if not noconfirm:`
			`answer = input(`
			`"\nDo you want to fix incorrect IDs?\n"`
			`"Please commit current changes first, and review the results when the script finishes.\n"`
			`"[Y/N]? "`
			`)`
			`while len(answer) == 0 or answer not in "YNyn":`
			`answer = input("[Y/N]? ")`
			`if answer not in "yY":`
			`exit(1)`

Add --examine-coverage to fix_error_ids.py 2020-06-27 23:46:42 +00:00			`random.seed()`
Refactor error ID checker script 2020-06-02 00:52:29 +00:00			`fix_ids(used_ids, source_file_names)`
			`print("Fixing completed")`
			`exit(2)`

Add a script to correct IDs 2020-05-06 12:25:13 +00:00
			`if __name__ == "__main__":`
Check for wrong error codes in the CI 2020-05-27 10:13:37 +00:00			`main(sys.argv[1:])`