ipld-eth-state-snapshot/scripts/filter-bad-rows.sh
prathamesh0 be544a3424
Add helper scripts for data dump correction (#57)
* Add a script to find bad data in CSV file dumps

* Add a script to delete bad rows from CSV file dumps

* Add instructions to run the scripts

* Reorganize instructions
2022-08-17 15:14:14 +05:30

30 lines
793 B
Bash
Executable File

#!/bin/bash
# flags
# -i <input-file>: Input data file path
# -c <expected-columns>: Expected number of columns in each row of the input file
# -o [output-file]: Output destination file path
# eg: ./scripts/filter-bad-rows.sh -i eth.state_cids.csv -c 8 -o cleaned-eth.state_cids.csv
while getopts i:c:o: OPTION
do
case "${OPTION}" in
i) inputFile=${OPTARG};;
c) expectedColumns=${OPTARG};;
o) outputFile=${OPTARG};;
esac
done
timestamp=$(date +%s)
# select only rows having expected number of columns
if [ -z "${outputFile}" ]; then
echo "Invalid destination file arg (-o) ${outputFile}"
else
awk -F"," "NF==${expectedColumns}" ${inputFile} > ${outputFile}
fi
difference=$(($(date +%s)-timestamp))
echo Time taken: $(date -d@${difference} -u +%H:%M:%S)