download_benchmarks: Script for downloading benchmark results from CircleCI

This commit is contained in:
Kamil Śliwak 2022-03-18 22:46:36 +01:00
parent f0e43921f7
commit 9618cb947d
5 changed files with 630 additions and 3 deletions

View File

@ -690,9 +690,16 @@ jobs:
name: Install pip
command: apt -q update && apt install -y python3-pip
- run:
name: Install pylint
command: python3 -m pip install pylint z3-solver pygments-lexer-solidity parsec tabulate deepdiff colorama
# also z3-solver, parsec and tabulate to make sure pylint knows about this module, pygments-lexer-solidity for docs
name: Install pylint and dependencies of the scripts that will be linted
command: python3 -m pip install
pylint
z3-solver
pygments-lexer-solidity
parsec
tabulate
deepdiff
colorama
requests
- run:
name: Linting Python Scripts
command: ./scripts/pylint_all.py

View File

@ -0,0 +1,19 @@
import subprocess
def run_git_command(command):
process = subprocess.run(
command,
encoding='utf8',
capture_output=True,
check=True,
)
return process.stdout.strip()
def git_current_branch():
return run_git_command(['git', 'symbolic-ref', 'HEAD', '--short'])
def git_commit_hash(ref: str = 'HEAD'):
return run_git_command(['git', 'rev-parse', '--verify', ref])

View File

@ -0,0 +1,171 @@
from pathlib import Path
from typing import List, Mapping, Optional
import functools
import json
import operator
import shutil
import requests
class APIHelperError(Exception):
pass
class DataUnavailable(APIHelperError):
pass
class InvalidResponse(APIHelperError):
pass
class FileAlreadyExists(APIHelperError):
pass
def query_api(url: str, params: Mapping[str, str], debug_requests=False) -> dict:
if debug_requests:
print(f'REQUEST URL: {url}')
if len(params) > 0:
print(f'QUERY: {params}')
response = requests.get(url, params=params)
response.raise_for_status()
if debug_requests:
json_response = response.json()
print('========== RESPONSE ==========')
if json_response is not None:
print(json.dumps(json_response, indent=4))
else:
print(response.content)
print('==============================')
return response.json()
def download_file(url: str, target_path: Path, overwrite=False):
if not overwrite and target_path.exists():
raise FileAlreadyExists(f"Refusing to overwrite existing file: '{target_path}'.")
with requests.get(url, stream=True) as request:
with open(target_path, 'wb') as target_file:
shutil.copyfileobj(request.raw, target_file)
class Github:
BASE_URL = 'https://api.github.com'
project_slug: str
debug_requests: bool
def __init__(self, project_slug: str, debug_requests: bool):
self.project_slug = project_slug
self.debug_requests = debug_requests
def pull_request(self, pr_id: int) -> dict:
return query_api(
f'{self.BASE_URL}/repos/{self.project_slug}/pulls/{pr_id}',
{},
self.debug_requests
)
class CircleCI:
# None might be a more logical default for max_pages but in most cases we'll actually
# want some limit to prevent flooding the API with requests in case of a bug.
DEFAULT_MAX_PAGES = 10
BASE_URL = 'https://circleci.com/api/v2'
project_slug: str
debug_requests: bool
def __init__(self, project_slug: str, debug_requests: bool):
self.project_slug = project_slug
self.debug_requests = debug_requests
def paginated_query_api_iterator(self, url: str, params: Mapping[str, str], max_pages: int = DEFAULT_MAX_PAGES):
assert 'page-token' not in params
page_count = 0
next_page_token = None
while max_pages is None or page_count < max_pages:
if next_page_token is not None:
params = {**params, 'page-token': next_page_token}
json_response = query_api(url, params, self.debug_requests)
yield json_response['items']
next_page_token = json_response['next_page_token']
page_count += 1
if next_page_token is None:
break
def paginated_query_api(self, url: str, params: Mapping[str, str], max_pages: int = DEFAULT_MAX_PAGES):
return functools.reduce(operator.add, self.paginated_query_api_iterator(url, params, max_pages), [])
def pipelines(
self,
branch: Optional[str] = None,
commit_hash: Optional[str] = None,
excluded_trigger_types: List[str] = None,
) -> List[dict]:
if excluded_trigger_types is None:
excluded_trigger_types = []
for items in self.paginated_query_api_iterator(
f'{self.BASE_URL}/project/gh/{self.project_slug}/pipeline',
{'branch': branch} if branch is not None else {},
max_pages=10,
):
matching_items = [
item
for item in items
if (
(commit_hash is None or item['vcs']['revision'] == commit_hash) and
item['trigger']['type'] not in excluded_trigger_types
)
]
if len(matching_items) > 0:
return matching_items
return []
def workflows(self, pipeline_id: str) -> dict:
return self.paginated_query_api(f'{self.BASE_URL}/pipeline/{pipeline_id}/workflow', {})
def jobs(self, workflow_id: str) -> Mapping[str, dict]:
items = self.paginated_query_api(f'{self.BASE_URL}/workflow/{workflow_id}/job', {})
jobs_by_name = {job['name']: job for job in items}
assert len(jobs_by_name) <= len(items)
if len(jobs_by_name) < len(items):
raise InvalidResponse("Job names in the workflow are not unique.")
return jobs_by_name
def job(self, workflow_id: str, name: str, require_success: bool = False) -> dict:
jobs = self.jobs(workflow_id)
if name not in jobs:
raise DataUnavailable(f"Job {name} is not present in the workflow.")
if require_success and jobs[name]['status'] != 'success':
raise DataUnavailable(
f"Job {name} has failed or is still running. "
f"Current status: {jobs[name]['status']}."
)
return jobs[name]
def artifacts(self, job_number: int) -> Mapping[str, dict]:
items = self.paginated_query_api(f'{self.BASE_URL}/project/gh/{self.project_slug}/{job_number}/artifacts', {})
artifacts_by_name = {artifact['path']: artifact for artifact in items}
assert len(artifacts_by_name) <= len(items)
if len(artifacts_by_name) < len(items):
raise InvalidResponse("Names of artifacts attached to the job are not unique.")
return artifacts_by_name
@staticmethod
def latest_item(items: dict) -> dict:
sorted_items = sorted(items, key=lambda item: item['created_at'], reverse=True)
return sorted_items[0] if len(sorted_items) > 0 else None

View File

@ -0,0 +1,172 @@
#!/usr/bin/env python3
from argparse import ArgumentParser, Namespace
from pathlib import Path
from typing import Mapping, Optional
import sys
import requests
# Our scripts/ is not a proper Python package so we need to modify PYTHONPATH to import from it
# pragma pylint: disable=import-error,wrong-import-position
SCRIPTS_DIR = Path(__file__).parent.parent
sys.path.insert(0, str(SCRIPTS_DIR))
from common.git_helpers import git_current_branch, git_commit_hash
from common.rest_api_helpers import APIHelperError, CircleCI, Github, download_file
# pragma pylint: enable=import-error,wrong-import-position
def process_commandline() -> Namespace:
script_description = (
"Downloads benchmark results attached as artifacts to the c_ext_benchmarks job on CircleCI. "
"If no options are specified, downloads results for the currently checked out git branch."
)
parser = ArgumentParser(description=script_description)
target_definition = parser.add_mutually_exclusive_group()
target_definition.add_argument(
'--branch',
dest='branch',
help="Git branch that the job ran on.",
)
target_definition.add_argument(
'--pr',
dest='pull_request_id',
type=int,
help="Github PR ID that the job ran on.",
)
target_definition.add_argument(
'--base-of-pr',
dest='base_of_pr',
type=int,
help="ID of a Github PR that's based on top of the branch we're interested in."
)
parser.add_argument(
'--any-commit',
dest='ignore_commit_hash',
default=False,
action='store_true',
help="Include pipelines that ran on a different commit as long as branch/PR matches."
)
parser.add_argument(
'--overwrite',
dest='overwrite',
default=False,
action='store_true',
help="If artifacts already exist on disk, overwrite them.",
)
parser.add_argument(
'--debug-requests',
dest='debug_requests',
default=False,
action='store_true',
help="Print detailed info about performed API requests and received responses.",
)
return parser.parse_args()
def download_benchmark_artifact(
artifacts: Mapping[str, dict],
benchmark_name: str,
branch: str,
commit_hash: str,
overwrite: bool,
silent: bool = False
):
if not silent:
print(f"Downloading artifact: {benchmark_name}-{branch}-{commit_hash[:8]}.json.")
artifact_path = f'reports/externalTests/{benchmark_name}.json'
if artifact_path not in artifacts:
raise RuntimeError(f"Missing artifact: {artifact_path}.")
download_file(
artifacts[artifact_path]['url'],
Path(f'{benchmark_name}-{branch}-{commit_hash[:8]}.json'),
overwrite,
)
def download_benchmarks(
branch: Optional[str],
pull_request_id: Optional[int],
base_of_pr: Optional[int],
ignore_commit_hash: bool = False,
overwrite: bool = False,
debug_requests: bool = False,
silent: bool = False,
):
github = Github('ethereum/solidity', debug_requests)
circleci = CircleCI('ethereum/solidity', debug_requests)
expected_commit_hash = None
if branch is None and pull_request_id is None and base_of_pr is None:
branch = git_current_branch()
expected_commit_hash = git_commit_hash()
elif branch is not None:
expected_commit_hash = git_commit_hash(branch)
elif pull_request_id is not None:
pr_info = github.pull_request(pull_request_id)
branch = pr_info['head']['ref']
expected_commit_hash = pr_info['head']['sha']
elif base_of_pr is not None:
pr_info = github.pull_request(base_of_pr)
branch = pr_info['base']['ref']
expected_commit_hash = pr_info['base']['sha']
if not silent:
print(
f"Looking for pipelines that ran on branch {branch}" +
(f", commit {expected_commit_hash}." if not ignore_commit_hash else " (any commit).")
)
pipeline = circleci.latest_item(circleci.pipelines(
branch,
expected_commit_hash if not ignore_commit_hash else None,
# Skip nightly workflows. They don't have the c_ext_benchmarks job and even if they did,
# they would likely be running a different set of external tests.
excluded_trigger_types=['schedule'],
))
if pipeline is None:
raise RuntimeError("No matching pipelines found.")
actual_commit_hash = pipeline['vcs']['revision']
workflow_id = circleci.latest_item(circleci.workflows(pipeline['id']))['id']
benchmark_collector_job = circleci.job(workflow_id, 'c_ext_benchmarks', require_success=True)
artifacts = circleci.artifacts(int(benchmark_collector_job['job_number']))
download_benchmark_artifact(artifacts, 'summarized-benchmarks', branch, actual_commit_hash, overwrite, silent)
download_benchmark_artifact(artifacts, 'all-benchmarks', branch, actual_commit_hash, overwrite, silent)
def main():
try:
options = process_commandline()
download_benchmarks(
options.branch,
options.pull_request_id,
options.base_of_pr,
options.ignore_commit_hash,
options.overwrite,
options.debug_requests,
)
return 0
except APIHelperError as exception:
print(f"[ERROR] {exception}", file=sys.stderr)
return 1
except requests.exceptions.HTTPError as exception:
print(f"[ERROR] {exception}", file=sys.stderr)
return 1
except RuntimeError as exception:
print(f"[ERROR] {exception}", file=sys.stderr)
return 1
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,258 @@
#!/usr/bin/env python3
from pathlib import Path
from unittest import TestCase
from unittest.mock import call, Mock, patch
# NOTE: This test file file only works with scripts/ added to PYTHONPATH so pylint can't find the imports
# pragma pylint: disable=import-error
from externalTests.download_benchmarks import download_benchmarks
# pragma pylint: enable=import-error
def _git_run_command_mock(command):
if command == ['git', 'symbolic-ref', 'HEAD', '--short']:
return 'benchmark-downloader'
if len(command) == 4 and command[:3] == ['git', 'rev-parse', '--verify']:
ref = command[3]
if ref == 'HEAD':
ref = 'benchmark-downloader'
if ref == 'benchmark-downloader':
return 'fa1ddc6f412100d531f6d3a77008c73b474692d6'
if ref == 'develop':
return '43f29c00da02e19ff10d43f7eb6955d627c57728'
raise RuntimeError(
"The test tried to run an unexpected git command.\n"
f"command: {command}\n"
"If you have updated the code, please remember to add matching command fixtures above."
)
def _requests_get_mock(url, params):
response_mock = Mock()
if url == 'https://api.github.com/repos/ethereum/solidity/pulls/12818':
response_mock.json.return_value = {
"head": {
"ref": "benchmark-downloader",
"sha": "fa1ddc6f412100d531f6d3a77008c73b474692d6",
},
"base": {
"ref": "develop",
"sha": "43f29c00da02e19ff10d43f7eb6955d627c57728",
},
}
return response_mock
if (
url == 'https://circleci.com/api/v2/project/gh/ethereum/solidity/pipeline' and
params.get('branch') == 'develop'
):
response_mock.json.return_value = {
"next_page_token": None,
"items": [
{
"id": "3b15a41f-6933-4a35-9823-08ebb1ff9336",
"created_at": "2022-03-23T00:10:31.659Z",
"trigger": {"type": "schedule"},
"vcs": {
"revision": "43f29c00da02e19ff10d43f7eb6955d627c57728",
"branch": "develop"
},
},
{
"id": "f9036a2d-be2b-4315-bd57-4d35b87502d2",
"created_at": "2022-03-22T00:10:30.304Z",
"trigger": {"type": "webhook"},
"vcs": {
"revision": "43f29c00da02e19ff10d43f7eb6955d627c57728",
"branch": "develop"
},
},
{
"id": "1d389e7c-b7dc-4d4d-9e58-c21ae48901a5",
"created_at": "2022-03-21T00:10:30.579Z",
"trigger": {"type": "schedule"},
"vcs": {
"revision": "430ecb6e16c346005315dbdd3edf3c3e64e9b1d8",
"branch": "develop"
},
},
{
"id": "7185a3f6-6338-4c2c-952d-4c30e7561e61",
"created_at": "2022-03-21T12:54:41.817Z",
"trigger": {"type": "webhook"},
"vcs": {
"revision": "43f29c00da02e19ff10d43f7eb6955d627c57728",
"branch": "develop"
}
},
]
}
return response_mock
if (
url == 'https://circleci.com/api/v2/project/gh/ethereum/solidity/pipeline' and
params.get('branch') == 'benchmark-downloader'
):
response_mock.json.return_value = {
"next_page_token": None,
"items": [
{
"id": "9af60346-a6b9-41b9-8a16-16ccf8996373",
"created_at": "2022-03-23T10:11:34.683Z",
"trigger": {"type": "webhook"},
"vcs": {
"revision": "fa1ddc6f412100d531f6d3a77008c73b474692d6",
"branch": "benchmark-downloader"
}
}
]
}
return response_mock
if (url in [
# To reduce the number of fixtures, let's use this workflow for multiple pipelines.
# This would not be the case in practice.
'https://circleci.com/api/v2/pipeline/f9036a2d-be2b-4315-bd57-4d35b87502d2/workflow',
'https://circleci.com/api/v2/pipeline/9af60346-a6b9-41b9-8a16-16ccf8996373/workflow'
]):
response_mock.json.return_value = {
"next_page_token": None,
"items": [
{
"id": "7a54e9cc-513d-4134-afdb-db62ab8146e5",
"created_at": "2022-03-21T12:54:42Z",
}
]
}
return response_mock
if url == 'https://circleci.com/api/v2/workflow/7a54e9cc-513d-4134-afdb-db62ab8146e5/job':
response_mock.json.return_value = {
"next_page_token": None,
"items": [
{
"job_number": 1017975,
"name": "chk_coding_style",
"status": "success",
},
{
"job_number": 1017969,
"name": "b_ubu",
"status": "success",
},
{
"job_number": 1018023,
"name": "c_ext_benchmarks",
"status": "success",
},
]
}
return response_mock
if url == 'https://circleci.com/api/v2/project/gh/ethereum/solidity/1018023/artifacts':
response_mock.json.return_value = {
"next_page_token": None,
"items": [
{
"path": "reports/externalTests/all-benchmarks.json",
"url": "https://circle-artifacts.com/0/reports/externalTests/all-benchmarks.json"
},
{
"path": "reports/externalTests/summarized-benchmarks.json",
"url": "https://circle-artifacts.com/0/reports/externalTests/summarized-benchmarks.json"
}
]
}
return response_mock
raise RuntimeError(
"The test tried to perform an unexpected GET request.\n"
f"URL: {url}\n" +
(f"query: {params}\n" if len(params) > 0 else "") +
"If you have updated the code, please remember to add matching response fixtures above."
)
class TestBenchmarkDownloader(TestCase):
def setUp(self):
self.maxDiff = 10000
@staticmethod
@patch('externalTests.download_benchmarks.download_file')
@patch('requests.get', _requests_get_mock)
@patch('common.git_helpers.run_git_command',_git_run_command_mock)
def test_download_benchmarks(download_file_mock):
download_benchmarks(None, None, None, silent=True)
download_file_mock.assert_has_calls([
call(
'https://circle-artifacts.com/0/reports/externalTests/summarized-benchmarks.json',
Path('summarized-benchmarks-benchmark-downloader-fa1ddc6f.json'),
False
),
call(
'https://circle-artifacts.com/0/reports/externalTests/all-benchmarks.json',
Path('all-benchmarks-benchmark-downloader-fa1ddc6f.json'),
False
),
])
@staticmethod
@patch('externalTests.download_benchmarks.download_file')
@patch('requests.get', _requests_get_mock)
@patch('common.git_helpers.run_git_command',_git_run_command_mock)
def test_download_benchmarks_branch(download_file_mock):
download_benchmarks('develop', None, None, silent=True)
download_file_mock.assert_has_calls([
call(
'https://circle-artifacts.com/0/reports/externalTests/summarized-benchmarks.json',
Path('summarized-benchmarks-develop-43f29c00.json'),
False
),
call(
'https://circle-artifacts.com/0/reports/externalTests/all-benchmarks.json',
Path('all-benchmarks-develop-43f29c00.json'),
False
),
])
@staticmethod
@patch('externalTests.download_benchmarks.download_file')
@patch('requests.get', _requests_get_mock)
@patch('common.git_helpers.run_git_command',_git_run_command_mock)
def test_download_benchmarks_pr(download_file_mock):
download_benchmarks(None, 12818, None, silent=True)
download_file_mock.assert_has_calls([
call(
'https://circle-artifacts.com/0/reports/externalTests/summarized-benchmarks.json',
Path('summarized-benchmarks-benchmark-downloader-fa1ddc6f.json'),
False
),
call(
'https://circle-artifacts.com/0/reports/externalTests/all-benchmarks.json',
Path('all-benchmarks-benchmark-downloader-fa1ddc6f.json'),
False
),
])
@staticmethod
@patch('externalTests.download_benchmarks.download_file')
@patch('requests.get', _requests_get_mock)
@patch('common.git_helpers.run_git_command',_git_run_command_mock)
def test_download_benchmarks_base_of_pr(download_file_mock):
download_benchmarks(None, None, 12818, silent=True)
download_file_mock.assert_has_calls([
call(
'https://circle-artifacts.com/0/reports/externalTests/summarized-benchmarks.json',
Path('summarized-benchmarks-develop-43f29c00.json'),
False
),
call(
'https://circle-artifacts.com/0/reports/externalTests/all-benchmarks.json',
Path('all-benchmarks-develop-43f29c00.json'),
False
),
])