Tally Csv
Tally file stats of csv files from subfolders.
SH
tally_csv.sh
#!/usr/bin/env bash
# tally_csvs.sh - Scan all CSVs in subfolders and produce tallies.csv
# Output columns: filename, filesize_bytes, records, total_cells,
# empty_cells, pct_empty
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
OUTPUT="$SCRIPT_DIR/tallies.csv"
echo "filename,filesize_bytes,records,total_cells,empty_cells,pct_empty" > "$OUTPUT"
find "$SCRIPT_DIR" -mindepth 2 -name '*.csv' -print0 | while IFS= read -r -d '' filepath; do
filename="$(basename "$filepath")"
filesize="$(stat -c%s "$filepath")"
# Use awk to compute everything in a single pass
# - skip header (NR>1)
# - count columns from header row
# - count records, empty cells
awk -F',' '
NR == 1 { cols = NF; next }
{
records++
for (i = 1; i <= cols; i++) {
if ($i == "" || $i ~ /^[[:space:]]*$/) empty++
}
}
END {
if (records + 0 == 0) {
printf "%s,%d,%d,%d,%d,%.2f\n", fn, fs, 0, 0, 0, 0.00
} else {
total = records * cols
pct = (empty + 0) / total * 100
printf "%s,%d,%d,%d,%d,%.2f\n", fn, fs, records, total, empty+0, pct
}
}
' fn="$filename" fs="$filesize" "$filepath" >> "$OUTPUT"
done
echo "Done. $(tail -n +2 "$OUTPUT" | wc -l) files tallied in $OUTPUT"