#!/usr/bin/env bash
##
# Transcription commands
#
##
# Transcribe audio using OpenAI's whisper. Ex: tlf transcribe audio-file.mp4. Run tlf makemd afterward.
#
# @arg file relative path to audio file to transcribe, relative to current working directory
# @usage `tlf transcribe rel/path/to/file.mp4`
# @return void
# @output transcription directory, transcription files created by whisper, clean-transcription.txt
#
function transcribe(){
file="$1";
filename_without_extension="${file%.*}"
src="$(pwd)/$file"
out_dir="$(pwd)/${file}-transcription"
tsv_file="${out_dir}/${filename_without_extension}.tsv"
whisper "$src" --output_dir "$out_dir" --model small --language English
php "$codeDir/lib/clean-transcription.php" "$tsv_file"
}
##
# Clean a .tsv file into a human-readable form. You probably just should use makemd, though.
#
# @arg file relative path to tsv file, within current working directory
# @return void
# @output a .md file with timestamped transcript in human-readable format
#
function transcribe_cleantsv(){
file="$1";
src="$(pwd)/$file"
php "$codeDir/lib/clean-transcription.php" "$src"
}
##
# Create a .md file with proper disclaimer header and links to timestamped portions of the uploaded video. Ex: tlf transcribe makemd audio-file.mp4 youtube.com/video_id 0
#
# Expects directory structure provided by primary transcribe function
#
# @usage `tlf makemd audio-file.mp4 youtube.com/video_id 0`
#
# @arg1 file relative path to audio file that was transcribed
# @arg2 yt_url the url to a youtube video representing this audio
# @arg3 offset the offset, in seconds, for the beginning of this audio in the yt video
#
# @return void
# @output `${file}.md` in the current working directory, without the audio-files extension
#
function transcribe_makemd(){
audio_filename="$1"
audio_file="$(pwd)/$1";
# file_without_ext="${audio_filename%.*}"
# transcription_dir="${audio_file}-transcription/"
# tsv_file="${transcription_dir}/${file_without_ext}.tsv"
yt_url="$2"
offset="$3"
if [[ -z "$offset" ]];then
offset=0
fi
php "$codeDir/lib/transcribe-makemd.php" "$audio_file" "$yt_url" "$offset"
}