Whispy
BASH Scripts
SH
whispy.sh
#!/bin/bash
# Combined Whisper transcription script
# Usage:
# whispy.sh [--cpu|--gpu] <audio-file> [model] - Transcribe a single file
# whispy.sh [--cpu|--gpu] --all [model] - Batch transcribe all MP3 files in current directory
MODEL_DEFAULT="medium"
USE_CPU=true
# Parse CPU/GPU flag
if [ "$1" = "--cpu" ]; then
USE_CPU=true
shift
elif [ "$1" = "--gpu" ]; then
USE_CPU=false
shift
fi
# Set CUDA visibility based on flag
if [ "$USE_CPU" = true ]; then
export CUDA_VISIBLE_DEVICES=""
echo "Using CPU mode"
else
echo "Using GPU mode"
fi
# Function to transcribe a single file
transcribe_file() {
local FILE="$1"
local MODEL="$2"
local BASE="${FILE%.mp3}"
local TXT="$BASE.txt"
local SRT="$BASE.srt"
if [ ! -f "$FILE" ]; then
echo "Error: File '$FILE' not found"
return 1
fi
# Skip if both transcription files already exist
if [ -f "$TXT" ] && [ -f "$SRT" ]; then
echo "Skipping '$FILE' - already transcribed ($TXT and $SRT exist)"
return 0
fi
echo "Transcribing '$FILE' with model '$MODEL'..."
source "$HOME/.venvs/whisper-venv/bin/activate"
python3 - "$FILE" "$MODEL" "$TXT" "$SRT" <<'PY'
import sys, os
import whisper
from datetime import timedelta
audio_file, model_name, txt_path, srt_path = sys.argv[1:]
model = whisper.load_model(model_name)
result = model.transcribe(audio_file, verbose=True, language=None)
print(f"\nDetected language: {result['language']}")
# Write plain text
with open(txt_path, "w", encoding="utf-8") as txt:
txt.write(result["text"])
# Write SRT
with open(srt_path, "w", encoding="utf-8") as srt:
for i, segment in enumerate(result["segments"], 1):
start = str(timedelta(seconds=int(segment['start']))).split('.')[0]
end = str(timedelta(seconds=int(segment['end']))).split('.')[0]
srt.write(f"{i}\n")
srt.write(f"{start} --> {end}\n")
srt.write(f"{segment['text'].strip()}\n\n")
print(f"\nDone → {os.path.basename(txt_path)} + {os.path.basename(srt_path)}")
PY
if [ $? -eq 0 ]; then
echo -e "\033[0;32mFinished:\033[0m $TXT $SRT"
return 0
else
return 1
fi
}
# Function to batch process all MP3 files
batch_transcribe() {
local MODEL="$1"
local PROCESSED=0
local FAILED=0
echo "Starting batch transcription with model: $MODEL"
echo "================================================"
echo ""
# Loop through all MP3 files
for mp3_file in *.mp3; do
# Skip if no MP3 files found
if [ "$mp3_file" = "*.mp3" ]; then
echo "No MP3 files found in current directory"
exit 1
fi
# Check if already transcribed (skip if .txt exists)
base_name="${mp3_file%.mp3}"
if [ -f "$base_name.txt" ]; then
echo "⏭️ Skipping: $mp3_file (already transcribed)"
continue
fi
echo "🎙️ Processing: $mp3_file"
echo "---"
if transcribe_file "$mp3_file" "$MODEL"; then
((PROCESSED++))
echo "✅ Success: $base_name.txt + $base_name.srt"
else
((FAILED++))
echo "❌ Failed: $mp3_file"
fi
echo ""
done
echo "================================================"
echo "Batch transcription complete!"
echo " Processed: $PROCESSED"
echo " Failed: $FAILED"
echo " Total: $((PROCESSED + FAILED))"
}
# Main script logic
if [ "$1" = "--all" ]; then
# Batch mode
MODEL="${2:-$MODEL_DEFAULT}"
batch_transcribe "$MODEL"
elif [ -z "$1" ]; then
# No arguments - show usage
echo "Usage:"
echo " whispy.sh [--cpu|--gpu] <audio-file> [model] - Transcribe a single file"
echo " whispy.sh [--cpu|--gpu] --all [model] - Batch transcribe all MP3 files"
echo ""
echo "Options:"
echo " --cpu Use CPU-only mode (default)"
echo " --gpu Use GPU acceleration"
echo ""
echo "Models: tiny, base, small, medium (default), large"
exit 1
else
# Single file mode
FILE="$1"
MODEL="${2:-$MODEL_DEFAULT}"
transcribe_file "$FILE" "$MODEL"
fi