benchmark_diff: Add table mode with support for json, console and markdown output

2023-10-03 13:03:40 +00:00 · 2022-03-18 14:51:11 +01:00 · 2022-03-18 14:51:11 +01:00 · 8c9856c52c
commit 8c9856c52c
parent ee5e878ad7
3 changed files with 605 additions and 90 deletions
--- a/scripts/externalTests/benchmark_diff.py
+++ b/scripts/externalTests/benchmark_diff.py
@ -4,19 +4,44 @@ from argparse import ArgumentParser
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import Path
-from typing import Any, Optional, Union
+from textwrap import dedent
+from typing import Any, Mapping, Optional, Set, Sequence, Union
 import json
 import sys


+class DiffMode(Enum):
+    IN_PLACE = 'inplace'
+    TABLE = 'table'
+
+
 class DifferenceStyle(Enum):
    ABSOLUTE = 'absolute'
    RELATIVE = 'relative'
    HUMANIZED = 'humanized'


+class OutputFormat(Enum):
+    JSON = 'json'
+    CONSOLE = 'console'
+    MARKDOWN = 'markdown'
+
+
 DEFAULT_RELATIVE_PRECISION = 4
-DEFAULT_DIFFERENCE_STYLE = DifferenceStyle.ABSOLUTE
+
+DEFAULT_DIFFERENCE_STYLE = {
+    DiffMode.IN_PLACE: DifferenceStyle.ABSOLUTE,
+    DiffMode.TABLE: DifferenceStyle.HUMANIZED,
+}
+assert all(t in DiffMode for t in DEFAULT_DIFFERENCE_STYLE)
+assert all(d in DifferenceStyle for d in DEFAULT_DIFFERENCE_STYLE.values())
+
+DEFAULT_OUTPUT_FORMAT = {
+    DiffMode.IN_PLACE: OutputFormat.JSON,
+    DiffMode.TABLE: OutputFormat.CONSOLE,
+}
+assert all(m in DiffMode for m in DEFAULT_OUTPUT_FORMAT)
+assert all(o in OutputFormat for o in DEFAULT_OUTPUT_FORMAT.values())


 class ValidationError(Exception):
@ -30,14 +55,17 @@ class CommandLineError(ValidationError):
 class BenchmarkDiffer:
    difference_style: DifferenceStyle
    relative_precision: Optional[int]
+    output_format: OutputFormat

    def __init__(
        self,
        difference_style: DifferenceStyle,
        relative_precision: Optional[int],
+        output_format: OutputFormat,
    ):
        self.difference_style = difference_style
        self.relative_precision = relative_precision
+        self.output_format = output_format

    def run(self, before: Any, after: Any) -> Optional[Union[dict, str, int, float]]:
        if not isinstance(before, dict) or not isinstance(after, dict):
@ -106,8 +134,13 @@ class BenchmarkDiffer:
        return diff

    def _humanize_diff(self, diff: Union[str, int, float]) -> str:
+        def wrap(value: str, symbol: str):
+            return f"{symbol}{value}{symbol}"
+
+        markdown = (self.output_format == OutputFormat.MARKDOWN)
+
        if isinstance(diff, str) and diff.startswith('!'):
-            return diff
+            return wrap(diff, '`' if markdown else '')

        value: Union[str, int, float]
        if isinstance(diff, (int, float)):
@ -118,32 +151,209 @@ class BenchmarkDiffer:
                value = round(value, self.relative_precision - 2)
                if isinstance(value, float) and value.is_integer():
                    value = int(value)
+            suffix = ''
            prefix = ''
            if diff < 0:
                prefix = ''
+                if markdown:
+                    suffix += ' ✅'
            elif diff > 0:
                prefix = '+'
+                if markdown:
+                    suffix += ' ❌'
+            important = (diff != 0)
        else:
            value = diff
+            important = False
            prefix = ''
+            suffix = ''

-        return f"{prefix}{value}%"
+        return wrap(
+            wrap(
+                f"{prefix}{value}%{suffix}",
+                '`' if markdown else ''
+            ),
+            '**' if important and markdown else ''
+        )
+
+
+@dataclass(frozen=True)
+class DiffTable:
+    columns: Mapping[str, Sequence[Union[int, float, str]]]
+
+
+class DiffTableSet:
+    table_headers: Sequence[str]
+    row_headers: Sequence[str]
+    column_headers: Sequence[str]
+
+    # Cells is a nested dict rather than a 3D array so that conversion to JSON is straightforward
+    cells: Mapping[str, Mapping[str, Mapping[str, Union[int, float, str]]]] # preset -> project -> attribute
+
+    def __init__(self, diff: dict):
+        self.table_headers = sorted(self._find_all_preset_names(diff))
+        self.column_headers = sorted(self._find_all_attribute_names(diff))
+        self.row_headers = sorted(project for project in diff)
+
+        # All dimensions must have unique values
+        assert len(self.table_headers) == len(set(self.table_headers))
+        assert len(self.column_headers) == len(set(self.column_headers))
+        assert len(self.row_headers) == len(set(self.row_headers))
+
+        self.cells = {
+            preset: {
+                project: {
+                    attribute: self._cell_content(diff, project, preset, attribute)
+                    for attribute in self.column_headers
+                }
+                for project in self.row_headers
+            }
+            for preset in self.table_headers
+        }
+
+    def calculate_row_column_width(self) -> int:
+        return max(len(h) for h in self.row_headers)
+
+    def calculate_column_widths(self, table_header: str) -> Sequence[int]:
+        assert table_header in self.table_headers
+
+        return [
+            max(
+                len(column_header),
+                max(
+                    len(str(self.cells[table_header][row_header][column_header]))
+                    for row_header in self.row_headers
+                )
+            )
+            for column_header in self.column_headers
+        ]
+
+    @classmethod
+    def _find_all_preset_names(cls, diff: dict) -> Set[str]:
+        return {
+            preset
+            for project, project_diff in diff.items()
+            if isinstance(project_diff, dict)
+            for preset in project_diff
+        }
+
+    @classmethod
+    def _find_all_attribute_names(cls, diff: dict) -> Set[str]:
+        return {
+            attribute
+            for project, project_diff in diff.items()
+            if isinstance(project_diff, dict)
+            for preset, preset_diff in project_diff.items()
+            if isinstance(preset_diff, dict)
+            for attribute in preset_diff
+        }
+
+    @classmethod
+    def _cell_content(cls, diff: dict, project: str, preset: str, attribute: str) -> str:
+        assert project in diff
+
+        if isinstance(diff[project], str):
+            return diff[project]
+        if preset not in diff[project]:
+            return ''
+        if isinstance(diff[project][preset], str):
+            return diff[project][preset]
+        if attribute not in diff[project][preset]:
+            return ''
+
+        return diff[project][preset][attribute]
+
+
+class DiffTableFormatter:
+    LEGEND = dedent("""
+        `!V` = version mismatch
+        `!B` = no value in the "before" version
+        `!A` = no value in the "after" version
+        `!T` = one or both values were not numeric and could not be compared
+        `-0` = very small negative value rounded to zero
+        `+0` = very small positive value rounded to zero
+    """)
+
+    @classmethod
+    def run(cls, diff_table_set: DiffTableSet, output_format: OutputFormat):
+        if output_format == OutputFormat.JSON:
+            return json.dumps(diff_table_set.cells, indent=4, sort_keys=True)
+        else:
+            assert output_format in {OutputFormat.CONSOLE, OutputFormat.MARKDOWN}
+
+            output = ''
+            for table_header in diff_table_set.table_headers:
+                column_widths = ([
+                    diff_table_set.calculate_row_column_width(),
+                    *diff_table_set.calculate_column_widths(table_header)
+                ])
+
+                if output_format == OutputFormat.MARKDOWN:
+                    output += f'\n### `{table_header}`\n'
+                else:
+                    output += f'\n{table_header.upper()}\n'
+
+                if output_format == OutputFormat.CONSOLE:
+                    output += cls._format_separator_row(column_widths, output_format) + '\n'
+                output += cls._format_data_row(['project', *diff_table_set.column_headers], column_widths) + '\n'
+                output += cls._format_separator_row(column_widths, output_format) + '\n'
+
+                for row_header in diff_table_set.row_headers:
+                    row = [
+                        diff_table_set.cells[table_header][row_header][column_header]
+                        for column_header in diff_table_set.column_headers
+                    ]
+                    output += cls._format_data_row([row_header, *row], column_widths) + '\n'
+
+                if output_format == OutputFormat.CONSOLE:
+                    output += cls._format_separator_row(column_widths, output_format) + '\n'
+
+            if output_format == OutputFormat.MARKDOWN:
+                output += f'\n{cls.LEGEND}\n'
+            return output
+
+    @classmethod
+    def _format_separator_row(cls, widths: Sequence[int], output_format: OutputFormat):
+        assert output_format in {OutputFormat.CONSOLE, OutputFormat.MARKDOWN}
+
+        if output_format == OutputFormat.MARKDOWN:
+            return '|:' + ':|-'.join('-' * width for width in widths) + ':|'
+        else:
+            return '|-' + '-|-'.join('-' * width for width in widths) + '-|'
+
+    @classmethod
+    def _format_data_row(cls, cells: Sequence[Union[int, float, str]], widths: Sequence[int]):
+        assert len(cells) == len(widths)
+
+        return '| ' + ' | '.join(str(cell).rjust(width) for cell, width in zip(cells, widths)) + ' |'


@dataclass(frozen=True)
 class CommandLineOptions:
+    diff_mode: DiffMode
    report_before: Path
    report_after: Path
    difference_style: DifferenceStyle
    relative_precision: int
+    output_format: OutputFormat


 def process_commandline() -> CommandLineOptions:
    script_description = (
-        "Compares summarized benchmark reports and outputs JSON with the same structure but listing only differences."
+        "Compares summarized benchmark reports and outputs JSON with the same structure but listing only differences. "
+        "Can also print the output as markdown table and format the values to make differences stand out more."
    )

    parser = ArgumentParser(description=script_description)
+    parser.add_argument(
+        dest='diff_mode',
+        choices=[m.value for m in DiffMode],
+        help=(
+            "Diff mode: "
+            f"'{DiffMode.IN_PLACE.value}' preserves input JSON structure and replace values with differences; "
+            f"'{DiffMode.TABLE.value}' creates a table assuming 3-level project/preset/attribute structure."
+        )
+    )
    parser.add_argument(dest='report_before', help="Path to a JSON file containing original benchmark results.")
    parser.add_argument(dest='report_after', help="Path to a JSON file containing new benchmark results.")
    parser.add_argument(
@ -156,7 +366,8 @@ def process_commandline() -> CommandLineOptions:
            f"'{DifferenceStyle.RELATIVE.value}' also divides by the original; "
            f"'{DifferenceStyle.HUMANIZED.value}' is like relative but value is a percentage and "
            "positive/negative changes are emphasized. "
-            f"(default: '{DEFAULT_DIFFERENCE_STYLE}')."
+            f"(default: '{DEFAULT_DIFFERENCE_STYLE[DiffMode.IN_PLACE]}' in '{DiffMode.IN_PLACE.value}' mode, "
+            f"'{DEFAULT_DIFFERENCE_STYLE[DiffMode.TABLE]}' in '{DiffMode.TABLE.value}' mode)"
        )
    )
    # NOTE: Negative values are valid for precision. round() handles them in a sensible way.
@ -173,21 +384,47 @@ def process_commandline() -> CommandLineOptions:
            f"(default: {DEFAULT_RELATIVE_PRECISION})"
        )
    )
+    parser.add_argument(
+        '--output-format',
+        dest='output_format',
+        choices=[o.value for o in OutputFormat],
+        help=(
+            "The format to use for the diff: "
+            f"'{OutputFormat.JSON.value}' is raw JSON; "
+            f"'{OutputFormat.CONSOLE.value}' is a table with human-readable values that will look good in the console output. "
+            f"'{OutputFormat.MARKDOWN.value}' is similar '{OutputFormat.CONSOLE.value}' but adjusted to "
+            "render as proper markdown and with extra elements (legend, emoji to make non-zero values stand out more, etc)."
+            f"(default: '{DEFAULT_OUTPUT_FORMAT[DiffMode.IN_PLACE]}' in '{DiffMode.IN_PLACE.value}' mode, "
+            f"'{DEFAULT_OUTPUT_FORMAT[DiffMode.TABLE]}' in '{DiffMode.TABLE.value}' mode)"
+        )
+    )

    options = parser.parse_args()

    if options.difference_style is not None:
        difference_style = DifferenceStyle(options.difference_style)
    else:
-        difference_style = DEFAULT_DIFFERENCE_STYLE
+        difference_style = DEFAULT_DIFFERENCE_STYLE[DiffMode(options.diff_mode)]
+
+    if options.output_format is not None:
+        output_format = OutputFormat(options.output_format)
+    else:
+        output_format = DEFAULT_OUTPUT_FORMAT[DiffMode(options.diff_mode)]

    processed_options = CommandLineOptions(
+        diff_mode=DiffMode(options.diff_mode),
        report_before=Path(options.report_before),
        report_after=Path(options.report_after),
        difference_style=difference_style,
        relative_precision=options.relative_precision,
+        output_format=output_format,
    )

+    if processed_options.diff_mode == DiffMode.IN_PLACE and processed_options.output_format != OutputFormat.JSON:
+        raise CommandLineError(
+            f"Only the '{OutputFormat.JSON.value}' output format is supported in the '{DiffMode.IN_PLACE.value}' mode."
+        )
+
    return processed_options


@ -195,13 +432,17 @@ def main():
    try:
        options = process_commandline()

-        differ = BenchmarkDiffer(options.difference_style, options.relative_precision)
+        differ = BenchmarkDiffer(options.difference_style, options.relative_precision, options.output_format)
        diff = differ.run(
            json.loads(options.report_before.read_text('utf-8')),
            json.loads(options.report_after.read_text('utf-8')),
        )

-        print(json.dumps(diff, indent=4, sort_keys=True))
+        if options.diff_mode == DiffMode.IN_PLACE:
+            print(json.dumps(diff, indent=4, sort_keys=True))
+        else:
+            assert options.diff_mode == DiffMode.TABLE
+            print(DiffTableFormatter.run(DiffTableSet(diff), options.output_format))

        return 0
    except CommandLineError as exception:
--- a/test/scripts/fixtures/summarized-benchmark-diff-develop-branch-humanized.md
+++ b/test/scripts/fixtures/summarized-benchmark-diff-develop-branch-humanized.md
@ -0,0 +1,75 @@
+
+### `ir-no-optimize`
+|   project |  bytecode_size | deployment_gas |     method_gas |
+|:---------:|---------------:|---------------:|---------------:|
+|    bleeps |                |                |                |
+|    colony |                |                |                |
+| elementfi |                |                |           `0%` |
+|       ens |           `!A` |           `!A` |           `!A` |
+|     euler | **`+1.43% ❌`** |           `0%` | **`+2.47% ❌`** |
+|    gnosis |           `!B` |           `!B` |           `!B` |
+|  zeppelin |                |                |                |
+
+### `ir-optimize-evm+yul`
+|   project |   bytecode_size |  deployment_gas | method_gas |
+|:---------:|----------------:|----------------:|-----------:|
+|    bleeps |  **`+0.53% ❌`** |            `0%` |      `-0%` |
+|    colony |            `!A` |            `!A` |       `!A` |
+| elementfi |                 |                 |            |
+|       ens |            `!A` |            `!A` |       `!A` |
+|     euler | **`+12.64% ❌`** | **`+11.98% ❌`** |       `0%` |
+|    gnosis |            `!B` |            `!B` |       `!B` |
+|  zeppelin |                 |                 |            |
+
+### `ir-optimize-evm-only`
+|   project | bytecode_size | deployment_gas | method_gas |
+|:---------:|--------------:|---------------:|-----------:|
+|    bleeps |               |                |            |
+|    colony |               |                |            |
+| elementfi |          `!B` |           `!B` |       `!B` |
+|       ens |          `!A` |           `!A` |       `!A` |
+|     euler |          `!V` |           `!V` |       `!V` |
+|    gnosis |          `!B` |           `!B` |       `!B` |
+|  zeppelin |               |                |            |
+
+### `legacy-no-optimize`
+|   project | bytecode_size | deployment_gas | method_gas |
+|:---------:|--------------:|---------------:|-----------:|
+|    bleeps |               |                |            |
+|    colony |          `!B` |           `!B` |       `!B` |
+| elementfi |          `!A` |           `!B` |            |
+|       ens |          `!A` |           `!A` |       `!A` |
+|     euler |          `!V` |           `!V` |       `!V` |
+|    gnosis |          `!B` |           `!B` |       `!B` |
+|  zeppelin |               |                |            |
+
+### `legacy-optimize-evm+yul`
+|   project | bytecode_size | deployment_gas | method_gas |
+|:---------:|--------------:|---------------:|-----------:|
+|    bleeps |          `0%` |           `0%` |       `0%` |
+|    colony |          `0%` |                |            |
+| elementfi |          `!A` |           `!B` |            |
+|       ens |          `!A` |           `!A` |       `!A` |
+|     euler |          `!V` |           `!V` |       `!V` |
+|    gnosis |          `!B` |           `!B` |       `!B` |
+|  zeppelin |          `0%` |           `0%` |            |
+
+### `legacy-optimize-evm-only`
+|   project | bytecode_size | deployment_gas | method_gas |
+|:---------:|--------------:|---------------:|-----------:|
+|    bleeps |               |                |            |
+|    colony |               |                |            |
+| elementfi |          `!A` |           `!A` |       `!A` |
+|       ens |          `!A` |           `!A` |       `!A` |
+|     euler |          `!V` |           `!V` |       `!V` |
+|    gnosis |          `!B` |           `!B` |       `!B` |
+|  zeppelin |               |                |            |
+
+
+`!V` = version mismatch
+`!B` = no value in the "before" version
+`!A` = no value in the "after" version
+`!T` = one or both values were not numeric and could not be compared
+`-0` = very small negative value rounded to zero
+`+0` = very small positive value rounded to zero
+
--- a/test/scripts/test_externalTests_benchmark_diff.py
+++ b/test/scripts/test_externalTests_benchmark_diff.py
@ -1,5 +1,6 @@
 #!/usr/bin/env python3

+from textwrap import dedent
 import json
 import unittest

@ -7,12 +8,15 @@ from unittest_helpers import FIXTURE_DIR, load_fixture

 # NOTE: This test file file only works with scripts/ added to PYTHONPATH so pylint can't find the imports
 # pragma pylint: disable=import-error
-from externalTests.benchmark_diff import BenchmarkDiffer, DifferenceStyle
+from externalTests.benchmark_diff import BenchmarkDiffer, DifferenceStyle, DiffTableSet, DiffTableFormatter, OutputFormat
 # pragma pylint: enable=import-error

 SUMMARIZED_BENCHMARKS_DEVELOP_JSON_PATH = FIXTURE_DIR / 'summarized-benchmarks-develop.json'
 SUMMARIZED_BENCHMARKS_BRANCH_JSON_PATH = FIXTURE_DIR / 'summarized-benchmarks-branch.json'

+SUMMARIZED_DIFF_HUMANIZED_MD_PATH = FIXTURE_DIR / 'summarized-benchmark-diff-develop-branch-humanized.md'
+SUMMARIZED_DIFF_HUMANIZED_MD = load_fixture(SUMMARIZED_DIFF_HUMANIZED_MD_PATH)
+

 class TestBenchmarkDiff(unittest.TestCase):
    def setUp(self):
@ -108,7 +112,7 @@ class TestBenchmarkDiff(unittest.TestCase):
            "gnosis": "!B",
            "ens": "!A",
        }
-        differ = BenchmarkDiffer(DifferenceStyle.ABSOLUTE, None)
+        differ = BenchmarkDiffer(DifferenceStyle.ABSOLUTE, None, OutputFormat.JSON)
        self.assertEqual(differ.run(report_before, report_after), expected_diff)


@ -138,105 +142,137 @@ class TestBenchmarkDiffer(unittest.TestCase):

    def test_empty(self):
        for style in DifferenceStyle:
-            differ = BenchmarkDiffer(style, None)
+            differ = BenchmarkDiffer(style, None, OutputFormat.JSON)
            self._assert_single_value_diff_matches(differ, [({}, {}, {})], nest_result=False)

    def test_null(self):
        for style in DifferenceStyle:
-            differ = BenchmarkDiffer(style, None)
+            differ = BenchmarkDiffer(style, None, OutputFormat.JSON)
            self._assert_single_value_diff_matches(differ, [(None, None, {})], nest_result=False)

    def test_number_diff_absolute_json(self):
-        self._assert_single_value_diff_matches(
-            BenchmarkDiffer(DifferenceStyle.ABSOLUTE, 4),
-            [
-                (2,   2,    0),
-                (2,   5,    3),
-                (5,   2,   -3),
-                (2.0, 2.0,  0),
-                (2,   2.0,  0),
-                (2.0, 2,    0),
-                (2,   2.5,  2.5 - 2),
-                (2.5, 2,    2 - 2.5),
+        for output_format in OutputFormat:
+            self._assert_single_value_diff_matches(
+                BenchmarkDiffer(DifferenceStyle.ABSOLUTE, 4, output_format),
+                [
+                    (2,   2,    0),
+                    (2,   5,    3),
+                    (5,   2,   -3),
+                    (2.0, 2.0,  0),
+                    (2,   2.0,  0),
+                    (2.0, 2,    0),
+                    (2,   2.5,  2.5 - 2),
+                    (2.5, 2,    2 - 2.5),

-                (0,   0,    0),
-                (0,   2,    2),
-                (0,   -2,  -2),
+                    (0,   0,    0),
+                    (0,   2,    2),
+                    (0,   -2,  -2),

-                (-3, -1,    2),
-                (-1, -3,   -2),
-                (2,   0,   -2),
-                (-2,  0,    2),
+                    (-3, -1,    2),
+                    (-1, -3,   -2),
+                    (2,   0,   -2),
+                    (-2,  0,    2),

-                (1.00006, 1,  1 - 1.00006),
-                (1, 1.00006,  1.00006 - 1),
-                (1.00004, 1, 1 - 1.00004),
-                (1, 1.00004, 1.00004 - 1),
-            ],
-        )
+                    (1.00006, 1,  1 - 1.00006),
+                    (1, 1.00006,  1.00006 - 1),
+                    (1.00004, 1, 1 - 1.00004),
+                    (1, 1.00004, 1.00004 - 1),
+                ],
+            )

    def test_number_diff_json(self):
+        for output_format in OutputFormat:
+            self._assert_single_value_diff_matches(
+                BenchmarkDiffer(DifferenceStyle.RELATIVE, 4, output_format),
+                [
+                    (2,   2,   0),
+                    (2,   5,   (5 - 2) / 2),
+                    (5,   2,   (2 - 5) / 5),
+                    (2.0, 2.0, 0),
+                    (2,   2.0, 0),
+                    (2.0, 2,   0),
+                    (2,   2.5, (2.5 - 2) / 2),
+                    (2.5, 2,   (2 - 2.5) / 2.5),
+
+                    (0,   0,   0),
+                    (0,   2,   '+INF'),
+                    (0,   -2,  '-INF'),
+
+                    (-3, -1,   0.6667),
+                    (-1, -3,  -2),
+                    (2,   0,  -1),
+                    (-2,  0,   1),
+
+                    (1.00006, 1,   -0.0001),
+                    (1, 1.00006,    0.0001),
+                    (1.000004, 1, '-0'),
+                    (1, 1.000004, '+0'),
+                ],
+            )
+
+    def test_number_diff_humanized_json_and_console(self):
+        for output_format in [OutputFormat.JSON, OutputFormat.CONSOLE]:
+            self._assert_single_value_diff_matches(
+                BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, output_format),
+                [
+                    (2,   2,      '0%'),
+                    (2,   5,   '+150%'),
+                    (5,   2,    '-60%'),
+                    (2.0, 2.0,    '0%'),
+                    (2,   2.0,    '0%'),
+                    (2.0, 2,      '0%'),
+                    (2,   2.5,  '+25%'),
+                    (2.5, 2,    '-20%'),
+
+                    (0,   0,      '0%'),
+                    (0,   2,   '+INF%'),
+                    (0,   -2,  '-INF%'),
+
+                    (-3, -1, '+66.67%'),
+                    (-1, -3,   '-200%'),
+                    (2,   0,   '-100%'),
+                    (-2,  0,   '+100%'),
+
+                    (1.00006, 1,  '-0.01%'),
+                    (1, 1.00006,  '+0.01%'),
+                    (1.000004, 1,    '-0%'),
+                    (1, 1.000004,    '+0%'),
+                ],
+            )
+
+    def test_number_diff_humanized_markdown(self):
        self._assert_single_value_diff_matches(
-            BenchmarkDiffer(DifferenceStyle.RELATIVE, 4),
+            BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, OutputFormat.MARKDOWN),
            [
-                (2,   2,   0),
-                (2,   5,   (5 - 2) / 2),
-                (5,   2,   (2 - 5) / 5),
-                (2.0, 2.0, 0),
-                (2,   2.0, 0),
-                (2.0, 2,   0),
-                (2,   2.5, (2.5 - 2) / 2),
-                (2.5, 2,   (2 - 2.5) / 2.5),
+                (2,   2,             '`0%`'),
+                (2,   5,   '**`+150% ❌`**'),
+                (5,   2,    '**`-60% ✅`**'),
+                (2.0, 2.0,           '`0%`'),
+                (2,   2.0,           '`0%`'),
+                (2.0, 2,             '`0%`'),
+                (2,   2.5,  '**`+25% ❌`**'),
+                (2.5, 2,    '**`-20% ✅`**'),

-                (0,   0,   0),
-                (0,   2,   '+INF'),
-                (0,   -2,  '-INF'),
+                (0,   0,             '`0%`'),
+                (0,   2,          '`+INF%`'),
+                (0,   -2,         '`-INF%`'),

-                (-3, -1,   0.6667),
-                (-1, -3,  -2),
-                (2,   0,  -1),
-                (-2,  0,   1),
+                (-3, -1, '**`+66.67% ❌`**'),
+                (-1, -3,   '**`-200% ✅`**'),
+                (2,   0,   '**`-100% ✅`**'),
+                (-2,  0,   '**`+100% ❌`**'),

-                (1.00006, 1,   -0.0001),
-                (1, 1.00006,    0.0001),
-                (1.000004, 1, '-0'),
-                (1, 1.000004, '+0'),
-            ],
-        )
-
-    def test_number_diff_humanized_json(self):
-        self._assert_single_value_diff_matches(
-            BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4),
-            [
-                (2,   2,      '0%'),
-                (2,   5,   '+150%'),
-                (5,   2,    '-60%'),
-                (2.0, 2.0,    '0%'),
-                (2,   2.0,    '0%'),
-                (2.0, 2,      '0%'),
-                (2,   2.5,  '+25%'),
-                (2.5, 2,    '-20%'),
-
-                (0,   0,      '0%'),
-                (0,   2,   '+INF%'),
-                (0,   -2,  '-INF%'),
-
-                (-3, -1, '+66.67%'),
-                (-1, -3,   '-200%'),
-                (2,   0,   '-100%'),
-                (-2,  0,   '+100%'),
-
-                (1.00006, 1,  '-0.01%'),
-                (1, 1.00006,  '+0.01%'),
-                (1.000004, 1,    '-0%'),
-                (1, 1.000004,    '+0%'),
+                (1.00006, 1,  '**`-0.01% ✅`**'),
+                (1, 1.00006,  '**`+0.01% ❌`**'),
+                (1.000004, 1,           '`-0%`'),
+                (1, 1.000004,           '`+0%`'),
            ],
        )

    def test_type_mismatch(self):
        for style in DifferenceStyle:
            self._assert_single_value_diff_matches(
-                BenchmarkDiffer(style, 4),
+                BenchmarkDiffer(style, 4, OutputFormat.JSON),
                [
                    (1, {}, '!T'),
                    ({}, 1, '!T'),
@ -255,7 +291,7 @@ class TestBenchmarkDiffer(unittest.TestCase):
    def test_version_mismatch(self):
        for style in DifferenceStyle:
            self._assert_single_value_diff_matches(
-                BenchmarkDiffer(style, 4),
+                BenchmarkDiffer(style, 4, OutputFormat.JSON),
                [
                    ({'a': 123, 'version': 1}, {'a': 123, 'version': 2}, '!V'),
                    ({'a': 123, 'version': 2}, {'a': 123, 'version': 1}, '!V'),
@ -275,7 +311,7 @@ class TestBenchmarkDiffer(unittest.TestCase):
    def test_missing(self):
        for style in DifferenceStyle:
            self._assert_single_value_diff_matches(
-                BenchmarkDiffer(style, None),
+                BenchmarkDiffer(style, None, OutputFormat.JSON),
                [
                    (1, None, '!A'),
                    (None, 1, '!B'),
@ -300,10 +336,173 @@ class TestBenchmarkDiffer(unittest.TestCase):
    def test_missing_vs_null(self):
        for style in DifferenceStyle:
            self._assert_single_value_diff_matches(
-                BenchmarkDiffer(style, None),
+                BenchmarkDiffer(style, None, OutputFormat.JSON),
                [
                    ({'a': None}, {}, {}),
                    ({}, {'a': None}, {}),
                ],
                nest_result=False,
            )
+
+
+class TestDiffTableFormatter(unittest.TestCase):
+    def setUp(self):
+        self.maxDiff = 10000
+
+        self.report_before = {
+            'project A': {
+                'preset X': {'A1':  99, 'A2': 50, 'version': 1},
+                'preset Y': {'A1':   0, 'A2': 50, 'version': 1},
+            },
+            'project B': {
+                'preset X': {           'A2': 50},
+                'preset Y': {'A1':   0},
+            },
+            'project C': {
+                'preset X': {'A1':   0, 'A2': 50, 'version': 1},
+            },
+            'project D': {
+                'preset X': {'A1': 999},
+            },
+        }
+        self.report_after = {
+            'project A': {
+                'preset X': {'A1': 100, 'A2':  50, 'version': 1},
+                'preset Y': {'A1': 500, 'A2': 500, 'version': 2},
+            },
+            'project B': {
+                'preset X': {'A1':   0},
+                'preset Y': {           'A2': 50},
+            },
+            'project C': {
+                'preset Y': {'A1':   0, 'A2': 50, 'version': 1},
+            },
+            'project E': {
+                'preset Y': {           'A2': 999},
+            },
+        }
+
+    def test_diff_table_formatter(self):
+        report_before = json.loads(load_fixture(SUMMARIZED_BENCHMARKS_DEVELOP_JSON_PATH))
+        report_after = json.loads(load_fixture(SUMMARIZED_BENCHMARKS_BRANCH_JSON_PATH))
+        differ = BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, OutputFormat.MARKDOWN)
+        diff = differ.run(report_before, report_after)
+
+        self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.MARKDOWN), SUMMARIZED_DIFF_HUMANIZED_MD)
+
+    def test_diff_table_formatter_json_absolute(self):
+        differ = BenchmarkDiffer(DifferenceStyle.ABSOLUTE, 4, OutputFormat.JSON)
+        diff = differ.run(self.report_before, self.report_after)
+
+        expected_formatted_table = dedent("""\
+            {
+                "preset X": {
+                    "project A": {
+                        "A1": 1,
+                        "A2": 0
+                    },
+                    "project B": {
+                        "A1": "!B",
+                        "A2": "!A"
+                    },
+                    "project C": {
+                        "A1": "!A",
+                        "A2": "!A"
+                    },
+                    "project D": {
+                        "A1": "!A",
+                        "A2": "!A"
+                    },
+                    "project E": {
+                        "A1": "!B",
+                        "A2": "!B"
+                    }
+                },
+                "preset Y": {
+                    "project A": {
+                        "A1": "!V",
+                        "A2": "!V"
+                    },
+                    "project B": {
+                        "A1": "!A",
+                        "A2": "!B"
+                    },
+                    "project C": {
+                        "A1": "!B",
+                        "A2": "!B"
+                    },
+                    "project D": {
+                        "A1": "!A",
+                        "A2": "!A"
+                    },
+                    "project E": {
+                        "A1": "!B",
+                        "A2": "!B"
+                    }
+                }
+            }"""
+        )
+        self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.JSON), expected_formatted_table)
+
+    def test_diff_table_formatter_console_relative(self):
+        differ = BenchmarkDiffer(DifferenceStyle.RELATIVE, 4, OutputFormat.CONSOLE)
+        diff = differ.run(self.report_before, self.report_after)
+
+        expected_formatted_table = dedent("""
+            PRESET X
+            |-----------|--------|----|
+            |   project |     A1 | A2 |
+            |-----------|--------|----|
+            | project A | 0.0101 |  0 |
+            | project B |     !B | !A |
+            | project C |     !A | !A |
+            | project D |     !A | !A |
+            | project E |     !B | !B |
+            |-----------|--------|----|
+
+            PRESET Y
+            |-----------|----|----|
+            |   project | A1 | A2 |
+            |-----------|----|----|
+            | project A | !V | !V |
+            | project B | !A | !B |
+            | project C | !B | !B |
+            | project D | !A | !A |
+            | project E | !B | !B |
+            |-----------|----|----|
+        """)
+        self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.CONSOLE), expected_formatted_table)
+
+    def test_diff_table_formatter_markdown_humanized(self):
+        differ = BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, OutputFormat.MARKDOWN)
+        diff = differ.run(self.report_before, self.report_after)
+
+        expected_formatted_table = dedent("""
+            ### `preset X`
+            |   project |             A1 |   A2 |
+            |:---------:|---------------:|-----:|
+            | project A | **`+1.01% ❌`** | `0%` |
+            | project B |           `!B` | `!A` |
+            | project C |           `!A` | `!A` |
+            | project D |           `!A` | `!A` |
+            | project E |           `!B` | `!B` |
+
+            ### `preset Y`
+            |   project |   A1 |   A2 |
+            |:---------:|-----:|-----:|
+            | project A | `!V` | `!V` |
+            | project B | `!A` | `!B` |
+            | project C | `!B` | `!B` |
+            | project D | `!A` | `!A` |
+            | project E | `!B` | `!B` |
+
+
+            `!V` = version mismatch
+            `!B` = no value in the "before" version
+            `!A` = no value in the "after" version
+            `!T` = one or both values were not numeric and could not be compared
+            `-0` = very small negative value rounded to zero
+            `+0` = very small positive value rounded to zero
+
+        """)
+        self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.MARKDOWN), expected_formatted_table)