36 lines
951 B
Bash
36 lines
951 B
Bash
|
#!/bin/bash
|
||
|
|
||
|
# Requires:
|
||
|
# DEDUP_LOG
|
||
|
# DEDUP_INPUT_DIR
|
||
|
# DEDUP_OUTPUT_DIR
|
||
|
# DEDUP_SORT_DIR
|
||
|
|
||
|
# env file arg
|
||
|
ENV=$1
|
||
|
echo "Using env file: ${ENV}"
|
||
|
|
||
|
# read env file
|
||
|
export $(grep -v '^#' ${ENV} | xargs)
|
||
|
|
||
|
# redirect stdout/stderr to a file
|
||
|
exec >"${DEDUP_LOG}" 2>&1
|
||
|
|
||
|
# create output dir if not exists
|
||
|
mkdir -p "${DEDUP_OUTPUT_DIR}"
|
||
|
|
||
|
start_timestamp=$(date +%s)
|
||
|
|
||
|
echo "public.blocks"
|
||
|
echo Start: "$(date)"
|
||
|
sort -T "${DEDUP_SORT_DIR}" -u "${DEDUP_INPUT_DIR}"/public.blocks.csv -o "${DEDUP_OUTPUT_DIR}"/deduped-public.blocks.csv
|
||
|
echo End: "$(date)"
|
||
|
echo Total deduped rows: $(wc -l ${DEDUP_OUTPUT_DIR}/deduped-public.blocks.csv)
|
||
|
echo
|
||
|
|
||
|
difference=$(($(date +%s)-start_timestamp))
|
||
|
echo Time taken: $((difference/86400)):$(date -d@${difference} -u +%H:%M:%S)
|
||
|
|
||
|
# NOTE: This script currently only dedups public.blocks output file.
|
||
|
# If the output contains blocks that were statediffed more than once, output files for other tables will have to be deduped as well.
|