download_benchmarks: Script for downloading benchmark results from CircleCI

2023-10-03 13:03:40 +00:00 · 2022-03-18 22:46:36 +01:00 · 2022-03-18 22:46:36 +01:00 · 9618cb947d
commit 9618cb947d
parent f0e43921f7
5 changed files with 630 additions and 3 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -690,9 +690,16 @@ jobs:
          name: Install pip
          command: apt -q update && apt install -y python3-pip
      - run:
-          name: Install pylint
-          command: python3 -m pip install pylint z3-solver pygments-lexer-solidity parsec tabulate deepdiff colorama
-          # also z3-solver, parsec and tabulate to make sure pylint knows about this module, pygments-lexer-solidity for docs
+          name: Install pylint and dependencies of the scripts that will be linted
+          command: python3 -m pip install
+            pylint
+            z3-solver
+            pygments-lexer-solidity
+            parsec
+            tabulate
+            deepdiff
+            colorama
+            requests
      - run:
          name: Linting Python Scripts
          command: ./scripts/pylint_all.py
--- a/scripts/common/git_helpers.py
+++ b/scripts/common/git_helpers.py
@ -0,0 +1,19 @@
+import subprocess
+
+
+def run_git_command(command):
+    process = subprocess.run(
+        command,
+        encoding='utf8',
+        capture_output=True,
+        check=True,
+    )
+    return process.stdout.strip()
+
+
+def git_current_branch():
+    return run_git_command(['git', 'symbolic-ref', 'HEAD', '--short'])
+
+
+def git_commit_hash(ref: str = 'HEAD'):
+    return run_git_command(['git', 'rev-parse', '--verify', ref])
--- a/scripts/common/rest_api_helpers.py
+++ b/scripts/common/rest_api_helpers.py
@ -0,0 +1,171 @@
+from pathlib import Path
+from typing import List, Mapping, Optional
+import functools
+import json
+import operator
+import shutil
+
+import requests
+
+
+class APIHelperError(Exception):
+    pass
+
+class DataUnavailable(APIHelperError):
+    pass
+
+class InvalidResponse(APIHelperError):
+    pass
+
+class FileAlreadyExists(APIHelperError):
+    pass
+
+
+def query_api(url: str, params: Mapping[str, str], debug_requests=False) -> dict:
+    if debug_requests:
+        print(f'REQUEST URL: {url}')
+        if len(params) > 0:
+            print(f'QUERY: {params}')
+
+    response = requests.get(url, params=params)
+    response.raise_for_status()
+
+    if debug_requests:
+        json_response = response.json()
+        print('========== RESPONSE ==========')
+        if json_response is not None:
+            print(json.dumps(json_response, indent=4))
+        else:
+            print(response.content)
+        print('==============================')
+
+    return response.json()
+
+
+def download_file(url: str, target_path: Path, overwrite=False):
+    if not overwrite and target_path.exists():
+        raise FileAlreadyExists(f"Refusing to overwrite existing file: '{target_path}'.")
+
+    with requests.get(url, stream=True) as request:
+        with open(target_path, 'wb') as target_file:
+            shutil.copyfileobj(request.raw, target_file)
+
+
+class Github:
+    BASE_URL = 'https://api.github.com'
+
+    project_slug: str
+    debug_requests: bool
+
+    def __init__(self, project_slug: str, debug_requests: bool):
+        self.project_slug = project_slug
+        self.debug_requests = debug_requests
+
+    def pull_request(self, pr_id: int) -> dict:
+        return query_api(
+            f'{self.BASE_URL}/repos/{self.project_slug}/pulls/{pr_id}',
+            {},
+            self.debug_requests
+        )
+
+
+class CircleCI:
+    # None might be a more logical default for max_pages but in most cases we'll actually
+    # want some limit to prevent flooding the API with requests in case of a bug.
+    DEFAULT_MAX_PAGES = 10
+    BASE_URL = 'https://circleci.com/api/v2'
+
+    project_slug: str
+    debug_requests: bool
+
+    def __init__(self, project_slug: str, debug_requests: bool):
+        self.project_slug = project_slug
+        self.debug_requests = debug_requests
+
+    def paginated_query_api_iterator(self, url: str, params: Mapping[str, str], max_pages: int = DEFAULT_MAX_PAGES):
+        assert 'page-token' not in params
+
+        page_count = 0
+        next_page_token = None
+        while max_pages is None or page_count < max_pages:
+            if next_page_token is not None:
+                params = {**params, 'page-token': next_page_token}
+
+            json_response = query_api(url, params, self.debug_requests)
+
+            yield json_response['items']
+            next_page_token = json_response['next_page_token']
+            page_count += 1
+            if next_page_token is None:
+                break
+
+    def paginated_query_api(self, url: str, params: Mapping[str, str], max_pages: int = DEFAULT_MAX_PAGES):
+        return functools.reduce(operator.add, self.paginated_query_api_iterator(url, params, max_pages), [])
+
+    def pipelines(
+        self,
+        branch: Optional[str] = None,
+        commit_hash: Optional[str] = None,
+        excluded_trigger_types: List[str] = None,
+    ) -> List[dict]:
+        if excluded_trigger_types is None:
+            excluded_trigger_types = []
+
+        for items in self.paginated_query_api_iterator(
+            f'{self.BASE_URL}/project/gh/{self.project_slug}/pipeline',
+            {'branch': branch} if branch is not None else {},
+            max_pages=10,
+        ):
+            matching_items = [
+                item
+                for item in items
+                if (
+                    (commit_hash is None or item['vcs']['revision'] == commit_hash) and
+                    item['trigger']['type'] not in excluded_trigger_types
+                )
+            ]
+            if len(matching_items) > 0:
+                return matching_items
+
+        return []
+
+    def workflows(self, pipeline_id: str) -> dict:
+        return self.paginated_query_api(f'{self.BASE_URL}/pipeline/{pipeline_id}/workflow', {})
+
+    def jobs(self, workflow_id: str) -> Mapping[str, dict]:
+        items = self.paginated_query_api(f'{self.BASE_URL}/workflow/{workflow_id}/job', {})
+        jobs_by_name = {job['name']: job for job in items}
+
+        assert len(jobs_by_name) <= len(items)
+        if len(jobs_by_name) < len(items):
+            raise InvalidResponse("Job names in the workflow are not unique.")
+
+        return jobs_by_name
+
+    def job(self, workflow_id: str, name: str, require_success: bool = False) -> dict:
+        jobs = self.jobs(workflow_id)
+        if name not in jobs:
+            raise DataUnavailable(f"Job {name} is not present in the workflow.")
+
+        if require_success and jobs[name]['status'] != 'success':
+            raise DataUnavailable(
+                f"Job {name} has failed or is still running. "
+                f"Current status: {jobs[name]['status']}."
+            )
+
+        return jobs[name]
+
+    def artifacts(self, job_number: int) -> Mapping[str, dict]:
+        items = self.paginated_query_api(f'{self.BASE_URL}/project/gh/{self.project_slug}/{job_number}/artifacts', {})
+        artifacts_by_name = {artifact['path']: artifact for artifact in items}
+
+        assert len(artifacts_by_name) <= len(items)
+        if len(artifacts_by_name) < len(items):
+            raise InvalidResponse("Names of artifacts attached to the job are not unique.")
+
+        return artifacts_by_name
+
+    @staticmethod
+    def latest_item(items: dict) -> dict:
+        sorted_items = sorted(items, key=lambda item: item['created_at'], reverse=True)
+        return sorted_items[0] if len(sorted_items) > 0 else None
--- a/scripts/externalTests/download_benchmarks.py
+++ b/scripts/externalTests/download_benchmarks.py
@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+
+from argparse import ArgumentParser, Namespace
+from pathlib import Path
+from typing import Mapping, Optional
+import sys
+
+import requests
+
+# Our scripts/ is not a proper Python package so we need to modify PYTHONPATH to import from it
+# pragma pylint: disable=import-error,wrong-import-position
+SCRIPTS_DIR = Path(__file__).parent.parent
+sys.path.insert(0, str(SCRIPTS_DIR))
+
+from common.git_helpers import git_current_branch, git_commit_hash
+from common.rest_api_helpers import APIHelperError, CircleCI, Github, download_file
+# pragma pylint: enable=import-error,wrong-import-position
+
+
+def process_commandline() -> Namespace:
+    script_description = (
+        "Downloads benchmark results attached as artifacts to the c_ext_benchmarks job on CircleCI. "
+        "If no options are specified, downloads results for the currently checked out git branch."
+    )
+
+    parser = ArgumentParser(description=script_description)
+
+    target_definition = parser.add_mutually_exclusive_group()
+    target_definition.add_argument(
+        '--branch',
+        dest='branch',
+        help="Git branch that the job ran on.",
+    )
+    target_definition.add_argument(
+        '--pr',
+        dest='pull_request_id',
+        type=int,
+        help="Github PR ID that the job ran on.",
+    )
+    target_definition.add_argument(
+        '--base-of-pr',
+        dest='base_of_pr',
+        type=int,
+        help="ID of a Github PR that's based on top of the branch we're interested in."
+    )
+
+    parser.add_argument(
+        '--any-commit',
+        dest='ignore_commit_hash',
+        default=False,
+        action='store_true',
+        help="Include pipelines that ran on a different commit as long as branch/PR matches."
+    )
+    parser.add_argument(
+        '--overwrite',
+        dest='overwrite',
+        default=False,
+        action='store_true',
+        help="If artifacts already exist on disk, overwrite them.",
+    )
+    parser.add_argument(
+        '--debug-requests',
+        dest='debug_requests',
+        default=False,
+        action='store_true',
+        help="Print detailed info about performed API requests and received responses.",
+    )
+
+    return parser.parse_args()
+
+
+def download_benchmark_artifact(
+    artifacts: Mapping[str, dict],
+    benchmark_name: str,
+    branch: str,
+    commit_hash: str,
+    overwrite: bool,
+    silent: bool = False
+):
+    if not silent:
+        print(f"Downloading artifact: {benchmark_name}-{branch}-{commit_hash[:8]}.json.")
+
+    artifact_path = f'reports/externalTests/{benchmark_name}.json'
+
+    if artifact_path not in artifacts:
+        raise RuntimeError(f"Missing artifact: {artifact_path}.")
+
+    download_file(
+        artifacts[artifact_path]['url'],
+        Path(f'{benchmark_name}-{branch}-{commit_hash[:8]}.json'),
+        overwrite,
+    )
+
+
+def download_benchmarks(
+    branch: Optional[str],
+    pull_request_id: Optional[int],
+    base_of_pr: Optional[int],
+    ignore_commit_hash: bool = False,
+    overwrite: bool = False,
+    debug_requests: bool = False,
+    silent: bool = False,
+):
+    github = Github('ethereum/solidity', debug_requests)
+    circleci = CircleCI('ethereum/solidity', debug_requests)
+
+    expected_commit_hash = None
+    if branch is None and pull_request_id is None and base_of_pr is None:
+        branch = git_current_branch()
+        expected_commit_hash = git_commit_hash()
+    elif branch is not None:
+        expected_commit_hash = git_commit_hash(branch)
+    elif pull_request_id is not None:
+        pr_info = github.pull_request(pull_request_id)
+        branch = pr_info['head']['ref']
+        expected_commit_hash = pr_info['head']['sha']
+    elif base_of_pr is not None:
+        pr_info = github.pull_request(base_of_pr)
+        branch = pr_info['base']['ref']
+        expected_commit_hash = pr_info['base']['sha']
+
+    if not silent:
+        print(
+            f"Looking for pipelines that ran on branch {branch}" +
+            (f", commit {expected_commit_hash}." if not ignore_commit_hash else " (any commit).")
+        )
+
+    pipeline = circleci.latest_item(circleci.pipelines(
+        branch,
+        expected_commit_hash if not ignore_commit_hash else None,
+        # Skip nightly workflows. They don't have the c_ext_benchmarks job and even if they did,
+        # they would likely be running a different set of external tests.
+        excluded_trigger_types=['schedule'],
+    ))
+    if pipeline is None:
+        raise RuntimeError("No matching pipelines found.")
+
+    actual_commit_hash = pipeline['vcs']['revision']
+    workflow_id = circleci.latest_item(circleci.workflows(pipeline['id']))['id']
+    benchmark_collector_job = circleci.job(workflow_id, 'c_ext_benchmarks', require_success=True)
+
+    artifacts = circleci.artifacts(int(benchmark_collector_job['job_number']))
+
+    download_benchmark_artifact(artifacts, 'summarized-benchmarks', branch, actual_commit_hash, overwrite, silent)
+    download_benchmark_artifact(artifacts, 'all-benchmarks', branch, actual_commit_hash, overwrite, silent)
+
+
+def main():
+    try:
+        options = process_commandline()
+        download_benchmarks(
+            options.branch,
+            options.pull_request_id,
+            options.base_of_pr,
+            options.ignore_commit_hash,
+            options.overwrite,
+            options.debug_requests,
+        )
+
+        return 0
+    except APIHelperError as exception:
+        print(f"[ERROR] {exception}", file=sys.stderr)
+        return 1
+    except requests.exceptions.HTTPError as exception:
+        print(f"[ERROR] {exception}", file=sys.stderr)
+        return 1
+    except RuntimeError as exception:
+        print(f"[ERROR] {exception}", file=sys.stderr)
+        return 1
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/test/scripts/test_externalTests_benchmark_downloader.py
+++ b/test/scripts/test_externalTests_benchmark_downloader.py
@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+
+from pathlib import Path
+from unittest import TestCase
+from unittest.mock import call, Mock, patch
+
+# NOTE: This test file file only works with scripts/ added to PYTHONPATH so pylint can't find the imports
+# pragma pylint: disable=import-error
+from externalTests.download_benchmarks import download_benchmarks
+# pragma pylint: enable=import-error
+
+
+def _git_run_command_mock(command):
+    if command == ['git', 'symbolic-ref', 'HEAD', '--short']:
+        return 'benchmark-downloader'
+
+    if len(command) == 4 and command[:3] == ['git', 'rev-parse', '--verify']:
+        ref = command[3]
+        if ref == 'HEAD':
+            ref = 'benchmark-downloader'
+
+        if ref == 'benchmark-downloader':
+            return 'fa1ddc6f412100d531f6d3a77008c73b474692d6'
+
+        if ref == 'develop':
+            return '43f29c00da02e19ff10d43f7eb6955d627c57728'
+
+    raise RuntimeError(
+        "The test tried to run an unexpected git command.\n"
+        f"command: {command}\n"
+        "If you have updated the code, please remember to add matching command fixtures above."
+    )
+
+def _requests_get_mock(url, params):
+    response_mock = Mock()
+
+    if url == 'https://api.github.com/repos/ethereum/solidity/pulls/12818':
+        response_mock.json.return_value = {
+            "head": {
+                "ref": "benchmark-downloader",
+                "sha": "fa1ddc6f412100d531f6d3a77008c73b474692d6",
+            },
+            "base": {
+                "ref": "develop",
+                "sha": "43f29c00da02e19ff10d43f7eb6955d627c57728",
+            },
+        }
+        return response_mock
+
+    if (
+        url == 'https://circleci.com/api/v2/project/gh/ethereum/solidity/pipeline' and
+        params.get('branch') == 'develop'
+    ):
+        response_mock.json.return_value = {
+            "next_page_token": None,
+            "items": [
+                {
+                    "id": "3b15a41f-6933-4a35-9823-08ebb1ff9336",
+                    "created_at": "2022-03-23T00:10:31.659Z",
+                    "trigger": {"type": "schedule"},
+                    "vcs": {
+                        "revision": "43f29c00da02e19ff10d43f7eb6955d627c57728",
+                        "branch": "develop"
+                    },
+                },
+                {
+                    "id": "f9036a2d-be2b-4315-bd57-4d35b87502d2",
+                    "created_at": "2022-03-22T00:10:30.304Z",
+                    "trigger": {"type": "webhook"},
+                    "vcs": {
+                        "revision": "43f29c00da02e19ff10d43f7eb6955d627c57728",
+                        "branch": "develop"
+                    },
+                },
+                {
+                    "id": "1d389e7c-b7dc-4d4d-9e58-c21ae48901a5",
+                    "created_at": "2022-03-21T00:10:30.579Z",
+                    "trigger": {"type": "schedule"},
+                    "vcs": {
+                        "revision": "430ecb6e16c346005315dbdd3edf3c3e64e9b1d8",
+                        "branch": "develop"
+                    },
+                },
+                {
+                    "id": "7185a3f6-6338-4c2c-952d-4c30e7561e61",
+                    "created_at": "2022-03-21T12:54:41.817Z",
+                    "trigger": {"type": "webhook"},
+                    "vcs": {
+                        "revision": "43f29c00da02e19ff10d43f7eb6955d627c57728",
+                        "branch": "develop"
+                    }
+                },
+            ]
+        }
+        return response_mock
+
+    if (
+        url == 'https://circleci.com/api/v2/project/gh/ethereum/solidity/pipeline' and
+        params.get('branch') == 'benchmark-downloader'
+    ):
+        response_mock.json.return_value = {
+            "next_page_token": None,
+            "items": [
+                {
+                    "id": "9af60346-a6b9-41b9-8a16-16ccf8996373",
+                    "created_at": "2022-03-23T10:11:34.683Z",
+                    "trigger": {"type": "webhook"},
+                    "vcs": {
+                        "revision": "fa1ddc6f412100d531f6d3a77008c73b474692d6",
+                        "branch": "benchmark-downloader"
+                    }
+                }
+            ]
+        }
+        return response_mock
+
+    if (url in [
+        # To reduce the number of fixtures, let's use this workflow for multiple pipelines.
+        # This would not be the case in practice.
+        'https://circleci.com/api/v2/pipeline/f9036a2d-be2b-4315-bd57-4d35b87502d2/workflow',
+        'https://circleci.com/api/v2/pipeline/9af60346-a6b9-41b9-8a16-16ccf8996373/workflow'
+    ]):
+        response_mock.json.return_value = {
+            "next_page_token": None,
+            "items": [
+                {
+                    "id": "7a54e9cc-513d-4134-afdb-db62ab8146e5",
+                    "created_at": "2022-03-21T12:54:42Z",
+                }
+            ]
+        }
+        return response_mock
+
+    if url == 'https://circleci.com/api/v2/workflow/7a54e9cc-513d-4134-afdb-db62ab8146e5/job':
+        response_mock.json.return_value = {
+            "next_page_token": None,
+            "items": [
+                {
+                    "job_number": 1017975,
+                    "name": "chk_coding_style",
+                    "status": "success",
+                },
+                {
+                    "job_number": 1017969,
+                    "name": "b_ubu",
+                    "status": "success",
+                },
+                {
+                    "job_number": 1018023,
+                    "name": "c_ext_benchmarks",
+                    "status": "success",
+                },
+            ]
+        }
+        return response_mock
+
+    if url == 'https://circleci.com/api/v2/project/gh/ethereum/solidity/1018023/artifacts':
+        response_mock.json.return_value = {
+            "next_page_token": None,
+            "items": [
+                {
+                    "path": "reports/externalTests/all-benchmarks.json",
+                    "url": "https://circle-artifacts.com/0/reports/externalTests/all-benchmarks.json"
+                },
+                {
+                    "path": "reports/externalTests/summarized-benchmarks.json",
+                    "url": "https://circle-artifacts.com/0/reports/externalTests/summarized-benchmarks.json"
+                }
+            ]
+        }
+        return response_mock
+
+    raise RuntimeError(
+        "The test tried to perform an unexpected GET request.\n"
+        f"URL: {url}\n" +
+        (f"query: {params}\n" if len(params) > 0 else "") +
+        "If you have updated the code, please remember to add matching response fixtures above."
+    )
+
+class TestBenchmarkDownloader(TestCase):
+    def setUp(self):
+        self.maxDiff = 10000
+
+    @staticmethod
+    @patch('externalTests.download_benchmarks.download_file')
+    @patch('requests.get', _requests_get_mock)
+    @patch('common.git_helpers.run_git_command',_git_run_command_mock)
+    def test_download_benchmarks(download_file_mock):
+        download_benchmarks(None, None, None, silent=True)
+        download_file_mock.assert_has_calls([
+            call(
+                'https://circle-artifacts.com/0/reports/externalTests/summarized-benchmarks.json',
+                Path('summarized-benchmarks-benchmark-downloader-fa1ddc6f.json'),
+                False
+            ),
+            call(
+                'https://circle-artifacts.com/0/reports/externalTests/all-benchmarks.json',
+                Path('all-benchmarks-benchmark-downloader-fa1ddc6f.json'),
+                False
+            ),
+        ])
+
+    @staticmethod
+    @patch('externalTests.download_benchmarks.download_file')
+    @patch('requests.get', _requests_get_mock)
+    @patch('common.git_helpers.run_git_command',_git_run_command_mock)
+    def test_download_benchmarks_branch(download_file_mock):
+        download_benchmarks('develop', None, None, silent=True)
+        download_file_mock.assert_has_calls([
+            call(
+                'https://circle-artifacts.com/0/reports/externalTests/summarized-benchmarks.json',
+                Path('summarized-benchmarks-develop-43f29c00.json'),
+                False
+            ),
+            call(
+                'https://circle-artifacts.com/0/reports/externalTests/all-benchmarks.json',
+                Path('all-benchmarks-develop-43f29c00.json'),
+                False
+            ),
+        ])
+
+    @staticmethod
+    @patch('externalTests.download_benchmarks.download_file')
+    @patch('requests.get', _requests_get_mock)
+    @patch('common.git_helpers.run_git_command',_git_run_command_mock)
+    def test_download_benchmarks_pr(download_file_mock):
+        download_benchmarks(None, 12818, None, silent=True)
+        download_file_mock.assert_has_calls([
+            call(
+                'https://circle-artifacts.com/0/reports/externalTests/summarized-benchmarks.json',
+                Path('summarized-benchmarks-benchmark-downloader-fa1ddc6f.json'),
+                False
+            ),
+            call(
+                'https://circle-artifacts.com/0/reports/externalTests/all-benchmarks.json',
+                Path('all-benchmarks-benchmark-downloader-fa1ddc6f.json'),
+                False
+            ),
+        ])
+
+    @staticmethod
+    @patch('externalTests.download_benchmarks.download_file')
+    @patch('requests.get', _requests_get_mock)
+    @patch('common.git_helpers.run_git_command',_git_run_command_mock)
+    def test_download_benchmarks_base_of_pr(download_file_mock):
+        download_benchmarks(None, None, 12818, silent=True)
+        download_file_mock.assert_has_calls([
+            call(
+                'https://circle-artifacts.com/0/reports/externalTests/summarized-benchmarks.json',
+                Path('summarized-benchmarks-develop-43f29c00.json'),
+                False
+            ),
+            call(
+                'https://circle-artifacts.com/0/reports/externalTests/all-benchmarks.json',
+                Path('all-benchmarks-develop-43f29c00.json'),
+                False
+            ),
+        ])