More logging for webapp deployment #923

Merged
telackey merged 12 commits from telackey/missing_app into main 2024-08-15 02:57:48 +00:00
Showing only changes of commit be69f4d670 - Show all commits

View File

@ -229,7 +229,6 @@ def command(ctx, kube_config, laconic_config, image_registry, deployment_parent_
request_id, discover, state_file, only_update_state,
dns_suffix, fqdn_policy, record_namespace_dns, record_namespace_deployments, dry_run,
include_tags, exclude_tags, force_rebuild, recreate_on_deploy, log_dir):
root_logger = TimedLogger(file=sys.stderr)
if request_id and discover:
print("Cannot specify both --request-id and --discover", file=sys.stderr)
sys.exit(2)
@ -251,152 +250,158 @@ def command(ctx, kube_config, laconic_config, image_registry, deployment_parent_
print("--fqdn-policy must be one of 'prohibit', 'allow', or 'preexisting'", file=sys.stderr)
sys.exit(2)
# Split CSV and clean up values.
include_tags = [tag.strip() for tag in include_tags.split(",") if tag]
exclude_tags = [tag.strip() for tag in exclude_tags.split(",") if tag]
main_logger = TimedLogger(file=sys.stderr)
laconic = LaconicRegistryClient(laconic_config, log_file=sys.stderr)
try:
# Split CSV and clean up values.
include_tags = [tag.strip() for tag in include_tags.split(",") if tag]
exclude_tags = [tag.strip() for tag in exclude_tags.split(",") if tag]
# Find deployment requests.
# single request
if request_id:
root_logger.log(f"Retrieving request {request_id}.")
requests = [laconic.get_record(request_id, require=True)]
# all requests
elif discover:
root_logger.log(f"Discovering deployment requests.")
requests = laconic.app_deployment_requests()
laconic = LaconicRegistryClient(laconic_config, log_file=sys.stderr)
if only_update_state:
if not dry_run:
dump_known_requests(state_file, requests)
return
# Find deployment requests.
# single request
if request_id:
main_logger.log(f"Retrieving request {request_id}...")
requests = [laconic.get_record(request_id, require=True)]
# all requests
elif discover:
main_logger.log("Discovering deployment requests...")
requests = laconic.app_deployment_requests()
root_logger.log(f"Loading known requests.")
previous_requests = load_known_requests(state_file)
if only_update_state:
if not dry_run:
dump_known_requests(state_file, requests)
return
# Collapse related requests.
requests.sort(key=lambda r: r.createTime)
requests.reverse()
requests_by_name = {}
skipped_by_name = {}
for r in requests:
root_logger.log(f"BEGIN: Examining request {r.id}.")
result = "DEPLOY"
try:
if r.id in previous_requests and previous_requests[r.id].get("status", "") != "RETRY":
print(f"Skipping request {r.id}, we've already seen it.")
continue
main_logger.log(f"Loading known requests from {state_file}...")
previous_requests = load_known_requests(state_file)
app = laconic.get_record(r.attributes.application)
if not app:
result = "ERROR"
root_logger.log(f"Skipping request {r.id}, cannot locate app.")
continue
requested_name = r.attributes.dns
if not requested_name:
requested_name = generate_hostname_for_app(app)
root_logger.log(("Generating name %s for request %s." % (requested_name, r.id))
if requested_name in skipped_by_name or requested_name in requests_by_name:
root_logger.log("Ignoring request %s, it has been superseded." % r.id)
result = "SKIP"
continue
if skip_by_tag(r, include_tags, exclude_tags):
root_logger.log("Skipping request %s, filtered by tag (include %s, exclude %s, present %s)" % (r.id,
include_tags,
exclude_tags,
r.attributes.tags))
skipped_by_name[requested_name] = r
result = "SKIP"
continue
root_logger.log_file("Found pending request %s to run application %s on %s." % (r.id, r.attributes.application, requested_name))
requests_by_name[requested_name] = r
except Exception as e:
result = "ERROR"
root_logger.log(f"ERROR examining request {r.id}: " + str(e))
finally:
root_logger.log(f"DONE Examining request {r.id} with result {result}.")
if result in ["ERROR"]:
dump_known_requests(state_file, [r], status)
# Find deployments.
root_logger.log(f"Discovering existing app deployments...")
deployments = laconic.app_deployments()
deployments_by_request = {}
for d in deployments:
if d.attributes.request:
deployments_by_request[d.attributes.request] = d
# Find removal requests.
root_logger.log(f"Discovering deployment removal and cancellation requests...")
cancellation_requests = {}
removal_requests = laconic.app_deployment_removal_requests()
for r in removal_requests:
if r.attributes.request:
cancellation_requests[r.attributes.request] = r
requests_to_execute = []
for r in requests_by_name.values():
if r.id in cancellation_requests and match_owner(cancellation_requests[r.id], r):
root_logger.log(f"Found deployment cancellation request for {r.id} at {cancellation_requests[r.id].id}")
elif r.id in deployments_by_request:
root_logger.log(f"Found satisfied request for {r.id} at {deployments_by_request[r.id].id}")
else:
if r.id not in previous_requests:
root_logger.log(f"Request {r.id} needs to processed.")
requests_to_execute.append(r)
else:
root_logger.log(f"Skipping unsatisfied request {r.id} because we have seen it before.")
root_logger.log("Found %d unsatisfied request(s) to process." % len(requests_to_execute))
if not dry_run:
for r in requests_to_execute:
root_logger.log("DEPLOYING {r.id}: BEGIN")
dump_known_requests(state_file, [r], "DEPLOYING")
status = "ERROR"
run_log_file = None
run_reg_client = laconic
# Collapse related requests.
requests.sort(key=lambda r: r.createTime)
requests.reverse()
requests_by_name = {}
skipped_by_name = {}
for r in requests:
main_logger.log(f"BEGIN: Examining request {r.id}")
result = "PENDING"
try:
run_id = f"{r.id}-{str(time.time()).split('.')[0]}-{str(uuid.uuid4()).split('-')[0]}"
if log_dir:
run_log_dir = os.path.join(log_dir, r.id)
if not os.path.exists(run_log_dir):
os.mkdir(run_log_dir)
run_log_file_path = os.path.join(run_log_dir, f"{run_id}.log")
root_logger.log(f"Directing deployment logs to: {run_log_file_path}")
run_log_file = open(run_log_file_path, "wt")
run_reg_client = LaconicRegistryClient(laconic_config, log_file=run_log_file)
if r.id in previous_requests and previous_requests[r.id].get("status", "") != "RETRY":
main_logger.log(f"Skipping request {r.id}, we've already seen it.")
continue
logger = TimedLogger(run_id, run_log_file)
logger.log("Processing ...")
process_app_deployment_request(
ctx,
run_reg_client,
r,
record_namespace_deployments,
record_namespace_dns,
dns_suffix,
os.path.abspath(deployment_parent_dir),
kube_config,
image_registry,
force_rebuild,
fqdn_policy,
recreate_on_deploy,
logger
)
status = "DEPLOYED"
app = laconic.get_record(r.attributes.application)
if not app:
result = "ERROR"
main_logger.log(f"Skipping request {r.id}, cannot locate app.")
continue
requested_name = r.attributes.dns
if not requested_name:
requested_name = generate_hostname_for_app(app)
main_logger.log("Generating name %s for request %s." % (requested_name, r.id))
if requested_name in skipped_by_name or requested_name in requests_by_name:
main_logger.log("Ignoring request %s, it has been superseded." % r.id)
result = "SKIP"
continue
if skip_by_tag(r, include_tags, exclude_tags):
main_logger.log("Skipping request %s, filtered by tag (include %s, exclude %s, present %s)" % (r.id,
include_tags,
exclude_tags,
r.attributes.tags))
skipped_by_name[requested_name] = r
result = "SKIP"
continue
main_logger.log_file("Found pending request %s to run application %s on %s." % (r.id, r.attributes.application, requested_name))
requests_by_name[requested_name] = r
except Exception as e:
root_logger.log("ERROR {r.id}:" + str(e))
logger.log("ERROR: " + str(e))
result = "ERROR"
main_logger.log(f"ERROR examining request {r.id}: " + str(e))
finally:
root_logger.log("DEPLOYING {r.id}: END - {status}")
if logger:
logger.log(f"DONE with status {status}", show_step_time=False, show_total_time=True)
dump_known_requests(state_file, [r], status)
if run_log_file:
run_log_file.close()
main_logger.log(f"DONE Examining request {r.id} with result {result}.")
if result in ["ERROR"]:
dump_known_requests(state_file, [r], status=result)
# Find deployments.
main_logger.log("Discovering existing app deployments...")
deployments = laconic.app_deployments()
deployments_by_request = {}
for d in deployments:
if d.attributes.request:
deployments_by_request[d.attributes.request] = d
# Find removal requests.
main_logger.log("Discovering deployment removal and cancellation requests...")
cancellation_requests = {}
removal_requests = laconic.app_deployment_removal_requests()
for r in removal_requests:
if r.attributes.request:
cancellation_requests[r.attributes.request] = r
requests_to_execute = []
for r in requests_by_name.values():
if r.id in cancellation_requests and match_owner(cancellation_requests[r.id], r):
main_logger.log(f"Found deployment cancellation request for {r.id} at {cancellation_requests[r.id].id}")
elif r.id in deployments_by_request:
main_logger.log(f"Found satisfied request for {r.id} at {deployments_by_request[r.id].id}")
else:
if r.id not in previous_requests:
main_logger.log(f"Request {r.id} needs to processed.")
requests_to_execute.append(r)
else:
main_logger.log(f"Skipping unsatisfied request {r.id} because we have seen it before.")
main_logger.log("Found %d unsatisfied request(s) to process." % len(requests_to_execute))
if not dry_run:
for r in requests_to_execute:
main_logger.log(f"DEPLOYING {r.id}: BEGIN")
dump_known_requests(state_file, [r], "DEPLOYING")
status = "ERROR"
run_log_file = None
run_reg_client = laconic
try:
run_id = f"{r.id}-{str(time.time()).split('.')[0]}-{str(uuid.uuid4()).split('-')[0]}"
if log_dir:
run_log_dir = os.path.join(log_dir, r.id)
if not os.path.exists(run_log_dir):
os.mkdir(run_log_dir)
run_log_file_path = os.path.join(run_log_dir, f"{run_id}.log")
main_logger.log(f"Directing deployment logs to: {run_log_file_path}")
run_log_file = open(run_log_file_path, "wt")
run_reg_client = LaconicRegistryClient(laconic_config, log_file=run_log_file)
build_logger = TimedLogger(run_id, run_log_file)
build_logger.log("Processing ...")
process_app_deployment_request(
ctx,
run_reg_client,
r,
record_namespace_deployments,
record_namespace_dns,
dns_suffix,
os.path.abspath(deployment_parent_dir),
kube_config,
image_registry,
force_rebuild,
fqdn_policy,
recreate_on_deploy,
build_logger
)
status = "DEPLOYED"
except Exception as e:
main_logger.log(f"ERROR {r.id}:" + str(e))
build_logger.log("ERROR: " + str(e))
finally:
main_logger.log(f"DEPLOYING {r.id}: END - {status}")
if build_logger:
build_logger.log(f"DONE with status {status}", show_step_time=False, show_total_time=True)
dump_known_requests(state_file, [r], status)
if run_log_file:
run_log_file.close()
except Exception as e:
main_logger.log("UNCAUGHT ERROR:" + str(e))
raise e