2db235f244
* Add helper scripts for processing and importing statediffed data * Add instructions to use the helper scripts * Update scripts to use arrays * Update README with bad row output example * Remove delimiter option from xargs command
36 lines
951 B
Bash
Executable File
36 lines
951 B
Bash
Executable File
#!/bin/bash
|
|
|
|
# Requires:
|
|
# DEDUP_LOG
|
|
# DEDUP_INPUT_DIR
|
|
# DEDUP_OUTPUT_DIR
|
|
# DEDUP_SORT_DIR
|
|
|
|
# env file arg
|
|
ENV=$1
|
|
echo "Using env file: ${ENV}"
|
|
|
|
# read env file
|
|
export $(grep -v '^#' ${ENV} | xargs)
|
|
|
|
# redirect stdout/stderr to a file
|
|
exec >"${DEDUP_LOG}" 2>&1
|
|
|
|
# create output dir if not exists
|
|
mkdir -p "${DEDUP_OUTPUT_DIR}"
|
|
|
|
start_timestamp=$(date +%s)
|
|
|
|
echo "public.blocks"
|
|
echo Start: "$(date)"
|
|
sort -T "${DEDUP_SORT_DIR}" -u "${DEDUP_INPUT_DIR}"/public.blocks.csv -o "${DEDUP_OUTPUT_DIR}"/deduped-public.blocks.csv
|
|
echo End: "$(date)"
|
|
echo Total deduped rows: $(wc -l ${DEDUP_OUTPUT_DIR}/deduped-public.blocks.csv)
|
|
echo
|
|
|
|
difference=$(($(date +%s)-start_timestamp))
|
|
echo Time taken: $((difference/86400)):$(date -d@${difference} -u +%H:%M:%S)
|
|
|
|
# NOTE: This script currently only dedups public.blocks output file.
|
|
# If the output contains blocks that were statediffed more than once, output files for other tables will have to be deduped as well.
|