From 919572d6ecf85b0883c2f281ee660d5dd0b56773 Mon Sep 17 00:00:00 2001
From: a3d4 <a3d4@pm.me>
Date: Sun, 28 Jun 2020 01:46:42 +0200
Subject: [PATCH] Add --examine-coverage to fix_error_ids.py

---
 .circleci/config.yml                         |   2 +-
 liblangutil/Exceptions.h                     |   2 +-
 scripts/{fix_error_ids.py => error_codes.py} | 114 +++++++++++++++----
 3 files changed, 97 insertions(+), 21 deletions(-)
 rename scripts/{fix_error_ids.py => error_codes.py} (56%)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 4bebea97f..5157f0661 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -323,7 +323,7 @@ jobs:
       - checkout
       - run:
           name: Check for error codes
-          command: ./scripts/fix_error_ids.py --check-only
+          command: ./scripts/error_codes.py --check
 
   chk_pylint:
     docker:
diff --git a/liblangutil/Exceptions.h b/liblangutil/Exceptions.h
index 083a5e3cd..bd786fc3f 100644
--- a/liblangutil/Exceptions.h
+++ b/liblangutil/Exceptions.h
@@ -61,7 +61,7 @@ struct InvalidAstError: virtual util::Exception {};
  * They are passed as the first parameter of error reporting functions.
  * Suffix _error helps to find them in the sources.
  * The struct ErrorId prevents incidental calls like typeError(3141) instead of typeError(3141_error).
- * To create a new ID, one can add 0000_error and then run "python ./scripts/fix_error_ids.py"
+ * To create a new ID, one can add 0000_error and then run "python ./scripts/error_codes.py --fix"
  * from the root of the repo.
  */
 struct ErrorId
diff --git a/scripts/fix_error_ids.py b/scripts/error_codes.py
similarity index 56%
rename from scripts/fix_error_ids.py
rename to scripts/error_codes.py
index d9a201115..7be125c6e 100755
--- a/scripts/fix_error_ids.py
+++ b/scripts/error_codes.py
@@ -7,7 +7,7 @@ import sys
 from os import path
 
 ENCODING = "utf-8"
-PATTERN = r"\b\d+_error\b"
+SOURCE_FILE_PATTERN = r"\b\d+_error\b"
 
 
 def read_file(file_name):
@@ -36,9 +36,9 @@ def in_comment(source, pos):
     return slash_star_pos > star_slash_pos
 
 
-def find_ids_in_file(file_name, ids):
+def find_ids_in_source_file(file_name, ids):
     source = read_file(file_name)
-    for m in re.finditer(PATTERN, source):
+    for m in re.finditer(SOURCE_FILE_PATTERN, source):
         if in_comment(source, m.start()):
             continue
         underscore_pos = m.group(0).index("_")
@@ -52,7 +52,7 @@ def find_ids_in_file(file_name, ids):
 def get_used_ids(file_names):
     used_ids = {}
     for file_name in file_names:
-        find_ids_in_file(file_name, used_ids)
+        find_ids_in_source_file(file_name, used_ids)
     return used_ids
 
 
@@ -71,7 +71,7 @@ def fix_ids_in_file(file_name, available_ids, used_ids):
 
     k = 0
     destination = []
-    for m in re.finditer(PATTERN, source):
+    for m in re.finditer(SOURCE_FILE_PATTERN, source):
         destination.extend(source[k:m.start()])
 
         underscore_pos = m.group(0).index("_")
@@ -102,38 +102,105 @@ def fix_ids(used_ids, file_names):
         fix_ids_in_file(file_name, available_ids, used_ids)
 
 
-def find_source_files(top_dir):
-    """Builds the list of .h and .cpp files in top_dir directory"""
+def find_files(top_dir, sub_dirs, extensions):
+    """Builds a list of files with given extensions in specified subdirectories"""
 
     source_file_names = []
-    dirs = ['libevmasm', 'liblangutil', 'libsolc', 'libsolidity', 'libsolutil', 'libyul', 'solc']
-
-    for dir in dirs:
+    for dir in sub_dirs:
         for root, _, file_names in os.walk(os.path.join(top_dir, dir), onerror=lambda e: exit(f"Walk error: {e}")):
             for file_name in file_names:
                 _, ext = path.splitext(file_name)
-                if ext in [".h", ".cpp"]:
+                if ext in extensions:
                     source_file_names.append(path.join(root, file_name))
 
     return source_file_names
 
 
+def find_ids_in_test_file(file_name):
+    source = read_file(file_name)
+    pattern = r"^// (.*Error|Warning) \d\d\d\d:"
+    return {m.group(0)[-5:-1] for m in re.finditer(pattern, source, flags=re.MULTILINE)}
+
+
+def find_ids_in_test_files(file_names):
+    used_ids = set()
+    for file_name in file_names:
+        used_ids |= find_ids_in_test_file(file_name)
+    return used_ids
+
+
+def print_ids(ids):
+    for k, id in enumerate(sorted(ids)):
+        if k % 10 > 0:
+            print(" ", end="")
+        elif k > 0:
+            print()
+        print(id, end="")
+
+
+def examine_id_coverage(top_dir, used_ids):
+    test_sub_dirs = [
+        path.join("test", "libsolidity", "errorRecoveryTests"),
+        path.join("test", "libsolidity", "smtCheckerTests"),
+        path.join("test", "libsolidity", "syntaxTests")
+    ]
+    test_file_names = find_files(
+        top_dir,
+        test_sub_dirs,
+        [".sol"]
+    )
+    covered_ids = find_ids_in_test_files(test_file_names)
+
+    print(f"IDs in source files: {len(used_ids)}")
+    print(f"IDs in test files  : {len(covered_ids)} ({len(covered_ids) - len(used_ids)})")
+    print()
+
+    unused_covered_ids = covered_ids - used_ids
+    if len(unused_covered_ids) != 0:
+        print("Error. The following error codes found in tests, but not in sources:")
+        print_ids(unused_covered_ids)
+        return 1
+
+    used_uncovered_ids = used_ids - covered_ids
+    if len(used_uncovered_ids) != 0:
+        print("The following error codes found in sources, but not in tests:")
+        print_ids(used_uncovered_ids)
+        print("\n\nPlease make sure to add appropriate tests.")
+        return 1
+
+    return 0
+
+
 def main(argv):
-    check_only = False
+    # pylint: disable=too-many-branches, too-many-locals
+
+    check = False
+    fix = False
     noconfirm = False
-    opts, args = getopt.getopt(argv, "", ["check-only", "noconfirm"])
+    examine_coverage = False
+    opts, args = getopt.getopt(argv, "", ["check", "fix", "noconfirm", "examine-coverage"])
 
     for opt, arg in opts:
-        if opt == '--check-only':
-            check_only = True
+        if opt == '--check':
+            check = True
+        elif opt == "--fix":
+            fix = True
         elif opt == '--noconfirm':
             noconfirm = True
+        elif opt == '--examine-coverage':
+            examine_coverage = True
+
+    if not check and not fix and not examine_coverage:
+        print("usage: python error_codes.py --check | --fix [--noconfirm] | --examine-coverage")
+        exit(1)
 
-    random.seed()
     cwd = os.getcwd()
 
-    source_file_names = find_source_files(cwd)
-
+    source_file_names = find_files(
+        cwd,
+        ["libevmasm", "liblangutil", "libsolc", "libsolidity", "libsolutil", "libyul", "solc"],
+        [".h", ".cpp"]
+    )
     used_ids = get_used_ids(source_file_names)
 
     ok = True
@@ -148,13 +215,21 @@ def main(argv):
             print(f"ID {id} appears {used_ids[id]} times")
             ok = False
 
+    if examine_coverage:
+        if not ok:
+            print("Incorrect IDs has to be fixed before applying --examine-coverage")
+        res = examine_id_coverage(cwd, used_ids.keys())
+        exit(res)
+
     if ok:
         print("No incorrect IDs found")
         exit(0)
 
-    if check_only:
+    if check:
         exit(1)
 
+    assert fix, "Unexpected state, should not come here without --fix"
+
     if not noconfirm:
         answer = input(
             "\nDo you want to fix incorrect IDs?\n"
@@ -166,6 +241,7 @@ def main(argv):
         if answer not in "yY":
             exit(1)
 
+    random.seed()
     fix_ids(used_ids, source_file_names)
     print("Fixing completed")
     exit(2)