2020-05-27 10:13:37 +00:00
|
|
|
#! /usr/bin/env python3
|
2020-05-06 12:25:13 +00:00
|
|
|
import random
|
|
|
|
import re
|
|
|
|
import os
|
2020-05-27 10:13:37 +00:00
|
|
|
import getopt
|
|
|
|
import sys
|
2020-05-06 12:25:13 +00:00
|
|
|
from os import path
|
|
|
|
|
|
|
|
ENCODING = "utf-8"
|
2020-06-27 23:46:42 +00:00
|
|
|
SOURCE_FILE_PATTERN = r"\b\d+_error\b"
|
2020-05-06 12:25:13 +00:00
|
|
|
|
|
|
|
|
|
|
|
def read_file(file_name):
|
|
|
|
content = None
|
2020-08-20 00:35:15 +00:00
|
|
|
_, tail = path.split(file_name)
|
|
|
|
is_latin = tail == "invalid_utf8_sequence.sol"
|
2020-05-06 12:25:13 +00:00
|
|
|
try:
|
2020-08-20 00:35:15 +00:00
|
|
|
with open(file_name, "r", encoding="latin-1" if is_latin else ENCODING) as f:
|
2020-05-06 12:25:13 +00:00
|
|
|
content = f.read()
|
|
|
|
finally:
|
|
|
|
if content == None:
|
|
|
|
print(f"Error reading: {file_name}")
|
|
|
|
return content
|
|
|
|
|
|
|
|
|
|
|
|
def write_file(file_name, content):
|
|
|
|
with open(file_name, "w", encoding=ENCODING) as f:
|
|
|
|
f.write(content)
|
|
|
|
|
|
|
|
|
|
|
|
def in_comment(source, pos):
|
|
|
|
slash_slash_pos = source.rfind("//", 0, pos)
|
|
|
|
lf_pos = source.rfind("\n", 0, pos)
|
|
|
|
if slash_slash_pos > lf_pos:
|
|
|
|
return True
|
|
|
|
slash_star_pos = source.rfind("/*", 0, pos)
|
|
|
|
star_slash_pos = source.rfind("*/", 0, pos)
|
|
|
|
return slash_star_pos > star_slash_pos
|
|
|
|
|
|
|
|
|
2020-07-16 20:06:55 +00:00
|
|
|
def find_ids_in_source_file(file_name, id_to_file_names):
|
2020-05-06 12:25:13 +00:00
|
|
|
source = read_file(file_name)
|
2020-06-27 23:46:42 +00:00
|
|
|
for m in re.finditer(SOURCE_FILE_PATTERN, source):
|
2020-05-06 12:25:13 +00:00
|
|
|
if in_comment(source, m.start()):
|
|
|
|
continue
|
|
|
|
underscore_pos = m.group(0).index("_")
|
|
|
|
id = m.group(0)[0:underscore_pos]
|
2020-07-16 20:06:55 +00:00
|
|
|
if id in id_to_file_names:
|
|
|
|
id_to_file_names[id].append(file_name)
|
2020-05-06 12:25:13 +00:00
|
|
|
else:
|
2020-07-16 20:06:55 +00:00
|
|
|
id_to_file_names[id] = [file_name]
|
2020-05-06 12:25:13 +00:00
|
|
|
|
|
|
|
|
2020-07-16 20:06:55 +00:00
|
|
|
def find_ids_in_source_files(file_names):
|
|
|
|
"""Returns a dictionary with list of source files for every appearance of every id"""
|
|
|
|
|
|
|
|
id_to_file_names = {}
|
2020-05-06 12:25:13 +00:00
|
|
|
for file_name in file_names:
|
2020-07-16 20:06:55 +00:00
|
|
|
find_ids_in_source_file(file_name, id_to_file_names)
|
|
|
|
return id_to_file_names
|
2020-05-06 12:25:13 +00:00
|
|
|
|
|
|
|
|
2020-07-02 23:26:26 +00:00
|
|
|
def get_next_id(available_ids):
|
|
|
|
assert len(available_ids) > 0, "Out of IDs"
|
|
|
|
next_id = random.choice(list(available_ids))
|
|
|
|
available_ids.remove(next_id)
|
|
|
|
return next_id
|
2020-05-06 12:25:13 +00:00
|
|
|
|
|
|
|
|
2020-07-16 20:06:55 +00:00
|
|
|
def fix_ids_in_source_file(file_name, id_to_count, available_ids):
|
2020-05-06 12:25:13 +00:00
|
|
|
source = read_file(file_name)
|
|
|
|
|
|
|
|
k = 0
|
|
|
|
destination = []
|
2020-06-27 23:46:42 +00:00
|
|
|
for m in re.finditer(SOURCE_FILE_PATTERN, source):
|
2020-05-06 12:25:13 +00:00
|
|
|
destination.extend(source[k:m.start()])
|
|
|
|
|
|
|
|
underscore_pos = m.group(0).index("_")
|
|
|
|
id = m.group(0)[0:underscore_pos]
|
|
|
|
|
|
|
|
# incorrect id or id has a duplicate somewhere
|
2020-07-16 20:06:55 +00:00
|
|
|
if not in_comment(source, m.start()) and (len(id) != 4 or id[0] == "0" or id_to_count[id] > 1):
|
|
|
|
assert id in id_to_count
|
2020-07-02 23:26:26 +00:00
|
|
|
new_id = get_next_id(available_ids)
|
2020-07-16 20:06:55 +00:00
|
|
|
assert new_id not in id_to_count
|
|
|
|
id_to_count[id] -= 1
|
2020-05-06 12:25:13 +00:00
|
|
|
else:
|
|
|
|
new_id = id
|
|
|
|
|
|
|
|
destination.extend(new_id + "_error")
|
|
|
|
k = m.end()
|
|
|
|
|
|
|
|
destination.extend(source[k:])
|
|
|
|
|
|
|
|
destination = ''.join(destination)
|
|
|
|
if source != destination:
|
|
|
|
write_file(file_name, destination)
|
|
|
|
print(f"Fixed file: {file_name}")
|
|
|
|
|
|
|
|
|
2020-07-16 20:06:55 +00:00
|
|
|
def fix_ids_in_source_files(file_names, id_to_count):
|
|
|
|
"""
|
|
|
|
Fixes ids in given source files;
|
|
|
|
id_to_count contains number of appearances of every id in sources
|
|
|
|
"""
|
|
|
|
|
|
|
|
available_ids = {str(id) for id in range(1000, 10000)} - id_to_count.keys()
|
2020-05-06 12:25:13 +00:00
|
|
|
for file_name in file_names:
|
2020-07-16 20:06:55 +00:00
|
|
|
fix_ids_in_source_file(file_name, id_to_count, available_ids)
|
2020-05-06 12:25:13 +00:00
|
|
|
|
|
|
|
|
2020-06-27 23:46:42 +00:00
|
|
|
def find_files(top_dir, sub_dirs, extensions):
|
|
|
|
"""Builds a list of files with given extensions in specified subdirectories"""
|
2020-05-06 12:25:13 +00:00
|
|
|
|
|
|
|
source_file_names = []
|
2020-06-27 23:46:42 +00:00
|
|
|
for dir in sub_dirs:
|
2020-05-12 07:30:29 +00:00
|
|
|
for root, _, file_names in os.walk(os.path.join(top_dir, dir), onerror=lambda e: exit(f"Walk error: {e}")):
|
|
|
|
for file_name in file_names:
|
|
|
|
_, ext = path.splitext(file_name)
|
2020-06-27 23:46:42 +00:00
|
|
|
if ext in extensions:
|
2020-05-12 07:30:29 +00:00
|
|
|
source_file_names.append(path.join(root, file_name))
|
2020-05-06 12:25:13 +00:00
|
|
|
|
|
|
|
return source_file_names
|
|
|
|
|
|
|
|
|
2020-06-27 23:46:42 +00:00
|
|
|
def find_ids_in_test_file(file_name):
|
|
|
|
source = read_file(file_name)
|
|
|
|
pattern = r"^// (.*Error|Warning) \d\d\d\d:"
|
|
|
|
return {m.group(0)[-5:-1] for m in re.finditer(pattern, source, flags=re.MULTILINE)}
|
|
|
|
|
|
|
|
|
|
|
|
def find_ids_in_test_files(file_names):
|
2020-07-16 20:06:55 +00:00
|
|
|
"""Returns a set containing all ids in tests"""
|
|
|
|
|
|
|
|
ids = set()
|
2020-06-27 23:46:42 +00:00
|
|
|
for file_name in file_names:
|
2020-07-16 20:06:55 +00:00
|
|
|
ids |= find_ids_in_test_file(file_name)
|
|
|
|
return ids
|
2020-06-27 23:46:42 +00:00
|
|
|
|
|
|
|
|
2020-07-09 12:39:05 +00:00
|
|
|
def find_ids_in_cmdline_test_err(file_name):
|
|
|
|
source = read_file(file_name)
|
|
|
|
pattern = r' \(\d\d\d\d\):'
|
|
|
|
return {m.group(0)[-6:-2] for m in re.finditer(pattern, source, flags=re.MULTILINE)}
|
|
|
|
|
|
|
|
|
2020-06-27 23:46:42 +00:00
|
|
|
def print_ids(ids):
|
|
|
|
for k, id in enumerate(sorted(ids)):
|
|
|
|
if k % 10 > 0:
|
|
|
|
print(" ", end="")
|
|
|
|
elif k > 0:
|
|
|
|
print()
|
|
|
|
print(id, end="")
|
|
|
|
|
|
|
|
|
2020-07-16 20:06:55 +00:00
|
|
|
def print_ids_per_file(ids, id_to_file_names, top_dir):
|
|
|
|
file_name_to_ids = {}
|
|
|
|
for id in ids:
|
|
|
|
for file_name in id_to_file_names[id]:
|
|
|
|
relpath = path.relpath(file_name, top_dir)
|
|
|
|
if relpath not in file_name_to_ids:
|
|
|
|
file_name_to_ids[relpath] = []
|
|
|
|
file_name_to_ids[relpath].append(id)
|
|
|
|
|
|
|
|
for file_name in sorted(file_name_to_ids):
|
|
|
|
print(file_name)
|
|
|
|
for id in sorted(file_name_to_ids[file_name]):
|
|
|
|
print(f" {id}", end="")
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
2020-08-03 02:28:35 +00:00
|
|
|
def examine_id_coverage(top_dir, source_id_to_file_names, new_ids_only=False):
|
2020-06-27 23:46:42 +00:00
|
|
|
test_sub_dirs = [
|
|
|
|
path.join("test", "libsolidity", "errorRecoveryTests"),
|
|
|
|
path.join("test", "libsolidity", "smtCheckerTests"),
|
2020-08-04 16:18:57 +00:00
|
|
|
path.join("test", "libsolidity", "syntaxTests"),
|
|
|
|
path.join("test", "libyul", "yulSyntaxTests")
|
2020-06-27 23:46:42 +00:00
|
|
|
]
|
|
|
|
test_file_names = find_files(
|
|
|
|
top_dir,
|
|
|
|
test_sub_dirs,
|
2020-08-04 16:18:57 +00:00
|
|
|
[".sol", ".yul"]
|
2020-06-27 23:46:42 +00:00
|
|
|
)
|
2020-07-16 20:06:55 +00:00
|
|
|
source_ids = source_id_to_file_names.keys()
|
|
|
|
test_ids = find_ids_in_test_files(test_file_names)
|
2020-06-27 23:46:42 +00:00
|
|
|
|
2020-07-09 12:39:05 +00:00
|
|
|
# special case, we are interested in warnings which are ignored by regular tests:
|
|
|
|
# Warning (1878): SPDX license identifier not provided in source file. ....
|
|
|
|
# Warning (3420): Source file does not specify required compiler version!
|
2020-07-16 20:06:55 +00:00
|
|
|
test_ids |= find_ids_in_cmdline_test_err(path.join(top_dir, "test", "cmdlineTests", "error_codes", "err"))
|
2020-07-09 12:39:05 +00:00
|
|
|
|
2020-07-22 01:57:18 +00:00
|
|
|
# white list of ids which are not covered by tests
|
|
|
|
white_ids = {
|
2021-07-12 10:05:26 +00:00
|
|
|
"9804", # Tested in test/libyul/ObjectParser.cpp.
|
2021-06-16 10:38:34 +00:00
|
|
|
"2674",
|
2021-07-12 10:05:26 +00:00
|
|
|
"6367",
|
2020-07-22 01:57:18 +00:00
|
|
|
"3805", # "This is a pre-release compiler version, please do not use it in production."
|
|
|
|
# The warning may or may not exist in a compiler build.
|
2020-12-21 18:00:21 +00:00
|
|
|
"4591", # "There are more than 256 warnings. Ignoring the rest."
|
2020-07-22 01:57:18 +00:00
|
|
|
# Due to 3805, the warning lists look different for different compiler builds.
|
2020-12-21 18:00:21 +00:00
|
|
|
"1834" # Unimplemented feature error, as we do not test it anymore via cmdLineTests
|
2020-07-22 01:57:18 +00:00
|
|
|
}
|
|
|
|
assert len(test_ids & white_ids) == 0, "The sets are not supposed to intersect"
|
|
|
|
test_ids |= white_ids
|
|
|
|
|
2020-07-16 20:06:55 +00:00
|
|
|
test_only_ids = test_ids - source_ids
|
|
|
|
source_only_ids = source_ids - test_ids
|
2020-06-27 23:46:42 +00:00
|
|
|
|
2020-08-03 02:28:35 +00:00
|
|
|
if not new_ids_only:
|
|
|
|
print(f"IDs in source files: {len(source_ids)}")
|
|
|
|
print(f"IDs in test files : {len(test_ids)} ({len(test_ids) - len(source_ids)})")
|
|
|
|
print()
|
|
|
|
|
|
|
|
if len(test_only_ids) != 0:
|
|
|
|
print("Error. The following error codes found in tests, but not in sources:")
|
|
|
|
print_ids(test_only_ids)
|
|
|
|
return False
|
|
|
|
|
|
|
|
if len(source_only_ids) != 0:
|
|
|
|
print("The following error codes found in sources, but not in tests:")
|
|
|
|
print_ids_per_file(source_only_ids, source_id_to_file_names, top_dir)
|
|
|
|
print("\n\nPlease make sure to add appropriate tests.")
|
|
|
|
return False
|
|
|
|
|
|
|
|
old_source_only_ids = {
|
2021-02-15 19:52:10 +00:00
|
|
|
"1584", "1823",
|
2021-02-16 12:22:22 +00:00
|
|
|
"1988", "2066", "3356",
|
2020-11-04 23:18:44 +00:00
|
|
|
"3893", "3996", "4010", "4802",
|
2021-04-15 10:31:45 +00:00
|
|
|
"5272", "5622", "7128", "7400",
|
2021-05-19 15:35:19 +00:00
|
|
|
"7589", "7593", "7649", "7710",
|
|
|
|
"8065", "8084", "8140",
|
2021-04-15 10:31:45 +00:00
|
|
|
"8312", "8592", "9134", "9609",
|
2020-08-03 02:28:35 +00:00
|
|
|
}
|
2020-11-02 14:20:33 +00:00
|
|
|
|
2020-08-03 02:28:35 +00:00
|
|
|
new_source_only_ids = source_only_ids - old_source_only_ids
|
|
|
|
if len(new_source_only_ids) != 0:
|
|
|
|
print("The following new error code(s), not covered by tests, found:")
|
|
|
|
print_ids(new_source_only_ids)
|
|
|
|
print(
|
|
|
|
"\nYou can:\n"
|
|
|
|
"- create appropriate test(s);\n"
|
|
|
|
"- add the error code(s) to old_source_only_ids in error_codes.py\n"
|
|
|
|
" (to silence the checking script, with a promise to add a test later);\n"
|
|
|
|
"- add the error code(s) to white_ids in error_codes.py\n"
|
|
|
|
" (for rare cases when the error is not supposed to be tested)"
|
|
|
|
)
|
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
2020-06-27 23:46:42 +00:00
|
|
|
|
|
|
|
|
2020-05-27 10:13:37 +00:00
|
|
|
def main(argv):
|
2020-07-02 23:46:10 +00:00
|
|
|
# pylint: disable=too-many-branches, too-many-locals, too-many-statements
|
2020-06-27 23:46:42 +00:00
|
|
|
|
|
|
|
check = False
|
|
|
|
fix = False
|
2020-07-02 23:46:10 +00:00
|
|
|
no_confirm = False
|
2020-06-27 23:46:42 +00:00
|
|
|
examine_coverage = False
|
2020-07-16 20:06:55 +00:00
|
|
|
next_id = False
|
2020-07-02 23:46:10 +00:00
|
|
|
opts, args = getopt.getopt(argv, "", ["check", "fix", "no-confirm", "examine-coverage", "next"])
|
2020-05-27 10:13:37 +00:00
|
|
|
|
|
|
|
for opt, arg in opts:
|
2020-07-16 20:06:55 +00:00
|
|
|
if opt == "--check":
|
2020-06-27 23:46:42 +00:00
|
|
|
check = True
|
|
|
|
elif opt == "--fix":
|
|
|
|
fix = True
|
2020-07-16 20:06:55 +00:00
|
|
|
elif opt == "--no-confirm":
|
2020-07-02 23:46:10 +00:00
|
|
|
no_confirm = True
|
2020-07-16 20:06:55 +00:00
|
|
|
elif opt == "--examine-coverage":
|
2020-06-27 23:46:42 +00:00
|
|
|
examine_coverage = True
|
2020-07-16 20:06:55 +00:00
|
|
|
elif opt == "--next":
|
|
|
|
next_id = True
|
2020-05-27 10:13:37 +00:00
|
|
|
|
2020-07-16 20:06:55 +00:00
|
|
|
if [check, fix, examine_coverage, next_id].count(True) != 1:
|
2020-07-02 23:46:10 +00:00
|
|
|
print("usage: python error_codes.py --check | --fix [--no-confirm] | --examine-coverage | --next")
|
2020-06-27 23:46:42 +00:00
|
|
|
exit(1)
|
2020-05-27 10:13:37 +00:00
|
|
|
|
2020-06-27 23:46:42 +00:00
|
|
|
cwd = os.getcwd()
|
2020-05-06 12:25:13 +00:00
|
|
|
|
2020-06-27 23:46:42 +00:00
|
|
|
source_file_names = find_files(
|
|
|
|
cwd,
|
|
|
|
["libevmasm", "liblangutil", "libsolc", "libsolidity", "libsolutil", "libyul", "solc"],
|
|
|
|
[".h", ".cpp"]
|
|
|
|
)
|
2020-07-16 20:06:55 +00:00
|
|
|
source_id_to_file_names = find_ids_in_source_files(source_file_names)
|
2020-05-06 12:25:13 +00:00
|
|
|
|
|
|
|
ok = True
|
2020-07-16 20:06:55 +00:00
|
|
|
for id in sorted(source_id_to_file_names):
|
2020-05-06 12:25:13 +00:00
|
|
|
if len(id) != 4:
|
|
|
|
print(f"ID {id} length != 4")
|
|
|
|
ok = False
|
|
|
|
if id[0] == "0":
|
|
|
|
print(f"ID {id} starts with zero")
|
|
|
|
ok = False
|
2020-07-16 20:06:55 +00:00
|
|
|
if len(source_id_to_file_names[id]) > 1:
|
|
|
|
print(f"ID {id} appears {len(source_id_to_file_names[id])} times")
|
2020-05-06 12:25:13 +00:00
|
|
|
ok = False
|
|
|
|
|
2020-06-27 23:46:42 +00:00
|
|
|
if examine_coverage:
|
|
|
|
if not ok:
|
2020-07-02 23:46:10 +00:00
|
|
|
print("Incorrect IDs have to be fixed before applying --examine-coverage")
|
2020-07-16 20:06:55 +00:00
|
|
|
exit(1)
|
2020-08-03 02:28:35 +00:00
|
|
|
res = 0 if examine_id_coverage(cwd, source_id_to_file_names) else 1
|
2020-06-27 23:46:42 +00:00
|
|
|
exit(res)
|
|
|
|
|
2020-08-03 02:28:35 +00:00
|
|
|
ok &= examine_id_coverage(cwd, source_id_to_file_names, new_ids_only=True)
|
|
|
|
|
2020-07-02 23:46:10 +00:00
|
|
|
random.seed()
|
|
|
|
|
2020-07-16 20:06:55 +00:00
|
|
|
if next_id:
|
2020-07-02 23:46:10 +00:00
|
|
|
if not ok:
|
|
|
|
print("Incorrect IDs have to be fixed before applying --next")
|
2020-07-16 20:06:55 +00:00
|
|
|
exit(1)
|
|
|
|
available_ids = {str(id) for id in range(1000, 10000)} - source_id_to_file_names.keys()
|
2020-07-02 23:46:10 +00:00
|
|
|
next_id = get_next_id(available_ids)
|
|
|
|
print(f"Next ID: {next_id}")
|
|
|
|
exit(0)
|
|
|
|
|
2020-05-06 12:25:13 +00:00
|
|
|
if ok:
|
|
|
|
print("No incorrect IDs found")
|
2020-05-27 10:13:37 +00:00
|
|
|
exit(0)
|
2020-06-02 00:52:29 +00:00
|
|
|
|
2020-06-27 23:46:42 +00:00
|
|
|
if check:
|
2020-05-27 10:13:37 +00:00
|
|
|
exit(1)
|
2020-05-06 12:25:13 +00:00
|
|
|
|
2020-06-27 23:46:42 +00:00
|
|
|
assert fix, "Unexpected state, should not come here without --fix"
|
|
|
|
|
2020-07-02 23:46:10 +00:00
|
|
|
if not no_confirm:
|
2020-06-02 00:52:29 +00:00
|
|
|
answer = input(
|
|
|
|
"\nDo you want to fix incorrect IDs?\n"
|
|
|
|
"Please commit current changes first, and review the results when the script finishes.\n"
|
|
|
|
"[Y/N]? "
|
|
|
|
)
|
|
|
|
while len(answer) == 0 or answer not in "YNyn":
|
|
|
|
answer = input("[Y/N]? ")
|
|
|
|
if answer not in "yY":
|
|
|
|
exit(1)
|
|
|
|
|
2020-07-16 20:06:55 +00:00
|
|
|
# number of appearances for every id
|
|
|
|
source_id_to_count = { id: len(file_names) for id, file_names in source_id_to_file_names.items() }
|
|
|
|
|
|
|
|
fix_ids_in_source_files(source_file_names, source_id_to_count)
|
2020-06-02 00:52:29 +00:00
|
|
|
print("Fixing completed")
|
|
|
|
exit(2)
|
|
|
|
|
2020-05-06 12:25:13 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2020-05-27 10:13:37 +00:00
|
|
|
main(sys.argv[1:])
|