eth-statediff-service/scripts/dedup.sh
prathamesh0 2db235f244
Add helper scripts for processing and importing statediffed data (#105)
* Add helper scripts for processing and importing statediffed data

* Add instructions to use the helper scripts

* Update scripts to use arrays

* Update README with bad row output example

* Remove delimiter option from xargs command
2022-09-22 13:52:47 +05:30

36 lines
951 B
Bash
Executable File

#!/bin/bash
# Requires:
# DEDUP_LOG
# DEDUP_INPUT_DIR
# DEDUP_OUTPUT_DIR
# DEDUP_SORT_DIR
# env file arg
ENV=$1
echo "Using env file: ${ENV}"
# read env file
export $(grep -v '^#' ${ENV} | xargs)
# redirect stdout/stderr to a file
exec >"${DEDUP_LOG}" 2>&1
# create output dir if not exists
mkdir -p "${DEDUP_OUTPUT_DIR}"
start_timestamp=$(date +%s)
echo "public.blocks"
echo Start: "$(date)"
sort -T "${DEDUP_SORT_DIR}" -u "${DEDUP_INPUT_DIR}"/public.blocks.csv -o "${DEDUP_OUTPUT_DIR}"/deduped-public.blocks.csv
echo End: "$(date)"
echo Total deduped rows: $(wc -l ${DEDUP_OUTPUT_DIR}/deduped-public.blocks.csv)
echo
difference=$(($(date +%s)-start_timestamp))
echo Time taken: $((difference/86400)):$(date -d@${difference} -u +%H:%M:%S)
# NOTE: This script currently only dedups public.blocks output file.
# If the output contains blocks that were statediffed more than once, output files for other tables will have to be deduped as well.