#!/usr/bin/env python3 from textwrap import dedent import json import unittest from unittest_helpers import FIXTURE_DIR, load_fixture # NOTE: This test file file only works with scripts/ added to PYTHONPATH so pylint can't find the imports # pragma pylint: disable=import-error from externalTests.benchmark_diff import BenchmarkDiffer, DifferenceStyle, DiffTableSet, DiffTableFormatter, OutputFormat # pragma pylint: enable=import-error SUMMARIZED_BENCHMARKS_DEVELOP_JSON_PATH = FIXTURE_DIR / 'summarized-benchmarks-develop.json' SUMMARIZED_BENCHMARKS_BRANCH_JSON_PATH = FIXTURE_DIR / 'summarized-benchmarks-branch.json' SUMMARIZED_DIFF_HUMANIZED_MD_PATH = FIXTURE_DIR / 'summarized-benchmark-diff-develop-branch-humanized.md' SUMMARIZED_DIFF_HUMANIZED_MD = load_fixture(SUMMARIZED_DIFF_HUMANIZED_MD_PATH) class TestBenchmarkDiff(unittest.TestCase): def setUp(self): self.maxDiff = 10000 def test_benchmark_diff(self): report_before = json.loads(load_fixture(SUMMARIZED_BENCHMARKS_DEVELOP_JSON_PATH)) report_after = json.loads(load_fixture(SUMMARIZED_BENCHMARKS_BRANCH_JSON_PATH)) expected_diff = { "bleeps": { "ir-optimize-evm+yul": { # Numerical difference -> negative/positive/zero. # Zeros are not skipped to differentiate them from missing values. "bytecode_size": 132868 - 132165, "deployment_gas": 0, "method_gas": 39289198 - 39289935, }, "legacy-optimize-evm+yul": { # No differences within preset -> zeros still present. "bytecode_size": 0, "deployment_gas": 0, "method_gas": 0, }, }, "colony": { # Preset missing on one side -> replace dict with string "ir-optimize-evm+yul": "!A", "legacy-no-optimize": "!B", "legacy-optimize-evm+yul": { "bytecode_size": 0, # Attribute missing on both sides -> skip #"deployment_gas": #"method_gas": }, }, "elementfi": { "legacy-no-optimize": { # Attributes null on one side -> replace value with string "bytecode_size": "!A", "deployment_gas": "!B", # Attribute null on both sides -> skip #"method_gas": }, "legacy-optimize-evm+yul": { # Attributes missing on one side -> replace value with string "bytecode_size": "!A", "deployment_gas": "!B", # Attribute missing on both sides -> skip #"method_gas": }, "ir-no-optimize": { # Attributes missing on one side, null on the other -> skip #"bytecode_size": #"deployment_gas": "method_gas": 0, }, # Empty preset missing on one side -> replace dict with string "legacy-optimize-evm-only": "!A", "ir-optimize-evm-only": "!B", }, "euler": { # Matching versions -> show attributes, skip version "ir-no-optimize": { "bytecode_size": 328540 - 323909, "deployment_gas": 0, "method_gas": 3537419168 - 3452105184, }, # Different versions, different values -> replace whole preset with string "legacy-no-optimize": "!V", # Different versions, same values -> replace whole preset with string "legacy-optimize-evm+yul": "!V", # Different versions (not a commit hash), different values -> replace whole preset with string "legacy-optimize-evm-only": "!V", # Version missing on one side -> replace whole preset with string "ir-optimize-evm-only": "!V", # Version missing on both sides -> assume same version "ir-optimize-evm+yul": { "bytecode_size": 205211 - 182190, "deployment_gas": 39459629 - 35236828, "method_gas": 0, }, }, "zeppelin": { "legacy-optimize-evm+yul": { # Whole project identical -> attributes still present, with zeros "bytecode_size": 0, "deployment_gas": 0, # Field missing on both sides -> skip #"method_gas": } }, # Empty project missing on one side -> replace its dict with a string "gnosis": "!B", "ens": "!A", } differ = BenchmarkDiffer(DifferenceStyle.ABSOLUTE, None, OutputFormat.JSON) self.assertEqual(differ.run(report_before, report_after), expected_diff) class TestBenchmarkDiffer(unittest.TestCase): def setUp(self): self.maxDiff = 10000 @staticmethod def _nest(value, levels): nested_value = value for level in levels: nested_value = {level: nested_value} return nested_value def _assert_single_value_diff_matches(self, differ, cases, nest_result=True, nestings=None): if nestings is None: nestings = [[], ['p'], ['p', 's'], ['p', 's', 'a']] for levels in nestings: for (before, after, expected_diff) in cases: self.assertEqual( differ.run(self._nest(before, levels), self._nest(after, levels)), self._nest(expected_diff, levels) if nest_result else expected_diff, f'Wrong diff for {self._nest(before, levels)} vs {self._nest(after, levels)}' ) def test_empty(self): for style in DifferenceStyle: differ = BenchmarkDiffer(style, None, OutputFormat.JSON) self._assert_single_value_diff_matches(differ, [({}, {}, {})], nest_result=False) def test_null(self): for style in DifferenceStyle: differ = BenchmarkDiffer(style, None, OutputFormat.JSON) self._assert_single_value_diff_matches(differ, [(None, None, {})], nest_result=False) def test_number_diff_absolute_json(self): for output_format in OutputFormat: self._assert_single_value_diff_matches( BenchmarkDiffer(DifferenceStyle.ABSOLUTE, 4, output_format), [ (2, 2, 0), (2, 5, 3), (5, 2, -3), (2.0, 2.0, 0), (2, 2.0, 0), (2.0, 2, 0), (2, 2.5, 2.5 - 2), (2.5, 2, 2 - 2.5), (0, 0, 0), (0, 2, 2), (0, -2, -2), (-3, -1, 2), (-1, -3, -2), (2, 0, -2), (-2, 0, 2), (1.00006, 1, 1 - 1.00006), (1, 1.00006, 1.00006 - 1), (1.00004, 1, 1 - 1.00004), (1, 1.00004, 1.00004 - 1), ], ) def test_number_diff_json(self): for output_format in OutputFormat: self._assert_single_value_diff_matches( BenchmarkDiffer(DifferenceStyle.RELATIVE, 4, output_format), [ (2, 2, 0), (2, 5, (5 - 2) / 2), (5, 2, (2 - 5) / 5), (2.0, 2.0, 0), (2, 2.0, 0), (2.0, 2, 0), (2, 2.5, (2.5 - 2) / 2), (2.5, 2, (2 - 2.5) / 2.5), (0, 0, 0), (0, 2, '+INF'), (0, -2, '-INF'), (-3, -1, 0.6667), (-1, -3, -2), (2, 0, -1), (-2, 0, 1), (1.00006, 1, -0.0001), (1, 1.00006, 0.0001), (1.000004, 1, '-0'), (1, 1.000004, '+0'), ], ) def test_number_diff_humanized_json_and_console(self): for output_format in [OutputFormat.JSON, OutputFormat.CONSOLE]: self._assert_single_value_diff_matches( BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, output_format), [ (2, 2, '0%'), (2, 5, '+150%'), (5, 2, '-60%'), (2.0, 2.0, '0%'), (2, 2.0, '0%'), (2.0, 2, '0%'), (2, 2.5, '+25%'), (2.5, 2, '-20%'), (0, 0, '0%'), (0, 2, '+INF%'), (0, -2, '-INF%'), (-3, -1, '+66.67%'), (-1, -3, '-200%'), (2, 0, '-100%'), (-2, 0, '+100%'), (1.00006, 1, '-0.01%'), (1, 1.00006, '+0.01%'), (1.000004, 1, '-0%'), (1, 1.000004, '+0%'), ], ) def test_number_diff_humanized_markdown(self): self._assert_single_value_diff_matches( BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, OutputFormat.MARKDOWN), [ (2, 2, '`0%`'), (2, 5, '**`+150% ❌`**'), (5, 2, '**`-60% ✅`**'), (2.0, 2.0, '`0%`'), (2, 2.0, '`0%`'), (2.0, 2, '`0%`'), (2, 2.5, '**`+25% ❌`**'), (2.5, 2, '**`-20% ✅`**'), (0, 0, '`0%`'), (0, 2, '`+INF%`'), (0, -2, '`-INF%`'), (-3, -1, '**`+66.67% ❌`**'), (-1, -3, '**`-200% ✅`**'), (2, 0, '**`-100% ✅`**'), (-2, 0, '**`+100% ❌`**'), (1.00006, 1, '**`-0.01% ✅`**'), (1, 1.00006, '**`+0.01% ❌`**'), (1.000004, 1, '`-0%`'), (1, 1.000004, '`+0%`'), ], ) def test_type_mismatch(self): for style in DifferenceStyle: self._assert_single_value_diff_matches( BenchmarkDiffer(style, 4, OutputFormat.JSON), [ (1, {}, '!T'), ({}, 1, '!T'), (1.5, {}, '!T'), ({}, 1.5, '!T'), ('1', {}, '!T'), ({}, '1', '!T'), (1, '1', '!T'), ('1', 1, '!T'), (1.5, '1', '!T'), ('1', 1.5, '!T'), ('1', '1', '!T'), ], ) def test_version_mismatch(self): for style in DifferenceStyle: self._assert_single_value_diff_matches( BenchmarkDiffer(style, 4, OutputFormat.JSON), [ ({'a': 123, 'version': 1}, {'a': 123, 'version': 2}, '!V'), ({'a': 123, 'version': 2}, {'a': 123, 'version': 1}, '!V'), ({'a': 123, 'version': 'a'}, {'a': 123, 'version': 'b'}, '!V'), ({'a': 123, 'version': 'a'}, {'a': 123, 'version': 1}, '!V'), ({'a': 'a', 'version': 1}, {'a': 'a', 'version': 2}, '!V'), ({'a': {}, 'version': 1}, {'a': {}, 'version': 2}, '!V'), ({'s': {'a': 1}, 'version': 1}, {'s': {'a': 1}, 'version': 2}, '!V'), ({'a': 123, 'version': 1}, {'a': 456, 'version': 2}, '!V'), ({'a': 'a', 'version': 1}, {'a': 'b', 'version': 2}, '!V'), ({'s': {'a': 1}, 'version': 1}, {'s': {'a': 2}, 'version': 2}, '!V'), ], ) def test_missing(self): for style in DifferenceStyle: self._assert_single_value_diff_matches( BenchmarkDiffer(style, None, OutputFormat.JSON), [ (1, None, '!A'), (None, 1, '!B'), ('1', None, '!A'), (None, '1', '!B'), ({}, None, '!A'), (None, {}, '!B'), ({'x': 1}, {}, {'x': '!A'}), ({}, {'x': 1}, {'x': '!B'}), ({'x': 1}, {'x': None}, {'x': '!A'}), ({'x': None}, {'x': 1}, {'x': '!B'}), ({'x': 1}, {'y': 1}, {'x': '!A', 'y': '!B'}), ({'x': {}}, {}, {'x': '!A'}), ({}, {'x': {}}, {'x': '!B'}), ({'p': {'x': {}}}, {}, {'p': '!A'}), ({}, {'p': {'x': {}}}, {'p': '!B'}), ], ) def test_missing_vs_null(self): for style in DifferenceStyle: self._assert_single_value_diff_matches( BenchmarkDiffer(style, None, OutputFormat.JSON), [ ({'a': None}, {}, {}), ({}, {'a': None}, {}), ], nest_result=False, ) class TestDiffTableFormatter(unittest.TestCase): def setUp(self): self.maxDiff = 10000 self.report_before = { 'project A': { 'preset X': {'A1': 99, 'A2': 50, 'version': 1}, 'preset Y': {'A1': 0, 'A2': 50, 'version': 1}, }, 'project B': { 'preset X': { 'A2': 50}, 'preset Y': {'A1': 0}, }, 'project C': { 'preset X': {'A1': 0, 'A2': 50, 'version': 1}, }, 'project D': { 'preset X': {'A1': 999}, }, } self.report_after = { 'project A': { 'preset X': {'A1': 100, 'A2': 50, 'version': 1}, 'preset Y': {'A1': 500, 'A2': 500, 'version': 2}, }, 'project B': { 'preset X': {'A1': 0}, 'preset Y': { 'A2': 50}, }, 'project C': { 'preset Y': {'A1': 0, 'A2': 50, 'version': 1}, }, 'project E': { 'preset Y': { 'A2': 999}, }, } def test_diff_table_formatter(self): report_before = json.loads(load_fixture(SUMMARIZED_BENCHMARKS_DEVELOP_JSON_PATH)) report_after = json.loads(load_fixture(SUMMARIZED_BENCHMARKS_BRANCH_JSON_PATH)) differ = BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, OutputFormat.MARKDOWN) diff = differ.run(report_before, report_after) self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.MARKDOWN), SUMMARIZED_DIFF_HUMANIZED_MD) def test_diff_table_formatter_json_absolute(self): differ = BenchmarkDiffer(DifferenceStyle.ABSOLUTE, 4, OutputFormat.JSON) diff = differ.run(self.report_before, self.report_after) expected_formatted_table = dedent("""\ { "preset X": { "project A": { "A1": 1, "A2": 0 }, "project B": { "A1": "!B", "A2": "!A" }, "project C": { "A1": "!A", "A2": "!A" }, "project D": { "A1": "!A", "A2": "!A" }, "project E": { "A1": "!B", "A2": "!B" } }, "preset Y": { "project A": { "A1": "!V", "A2": "!V" }, "project B": { "A1": "!A", "A2": "!B" }, "project C": { "A1": "!B", "A2": "!B" }, "project D": { "A1": "!A", "A2": "!A" }, "project E": { "A1": "!B", "A2": "!B" } } }""" ) self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.JSON), expected_formatted_table) def test_diff_table_formatter_console_relative(self): differ = BenchmarkDiffer(DifferenceStyle.RELATIVE, 4, OutputFormat.CONSOLE) diff = differ.run(self.report_before, self.report_after) expected_formatted_table = dedent(""" PRESET X |-----------|--------|----| | project | A1 | A2 | |-----------|--------|----| | project A | 0.0101 | 0 | | project B | !B | !A | | project C | !A | !A | | project D | !A | !A | | project E | !B | !B | |-----------|--------|----| PRESET Y |-----------|----|----| | project | A1 | A2 | |-----------|----|----| | project A | !V | !V | | project B | !A | !B | | project C | !B | !B | | project D | !A | !A | | project E | !B | !B | |-----------|----|----| """) self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.CONSOLE), expected_formatted_table) def test_diff_table_formatter_markdown_humanized(self): differ = BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, OutputFormat.MARKDOWN) diff = differ.run(self.report_before, self.report_after) expected_formatted_table = dedent(""" ### `preset X` | project | A1 | A2 | |:---------:|---------------:|-----:| | project A | **`+1.01% ❌`** | `0%` | | project B | `!B` | `!A` | | project C | `!A` | `!A` | | project D | `!A` | `!A` | | project E | `!B` | `!B` | ### `preset Y` | project | A1 | A2 | |:---------:|-----:|-----:| | project A | `!V` | `!V` | | project B | `!A` | `!B` | | project C | `!B` | `!B` | | project D | `!A` | `!A` | | project E | `!B` | `!B` | `!V` = version mismatch `!B` = no value in the "before" version `!A` = no value in the "after" version `!T` = one or both values were not numeric and could not be compared `-0` = very small negative value rounded to zero `+0` = very small positive value rounded to zero """) self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.MARKDOWN), expected_formatted_table)