transcribe.bash

#!/usr/bin/env bash
##
# Transcription commands 
#


##
# Transcribe audio using OpenAI's whisper. Ex: tlf transcribe audio-file.mp4. Run tlf makemd afterward.
#
# @arg file relative path to audio file to transcribe, relative to current working directory
# @usage `tlf transcribe rel/path/to/file.mp4`
# @return void
# @output transcription directory, transcription files created by whisper, clean-transcription.txt 
#
function transcribe(){
    file="$1";

    filename_without_extension="${file%.*}"
    src="$(pwd)/$file"
    out_dir="$(pwd)/${file}-transcription"
    tsv_file="${out_dir}/${filename_without_extension}.tsv"
    
    whisper "$src" --output_dir "$out_dir" --model small --language English 

    php "$codeDir/lib/clean-transcription.php" "$tsv_file"
}

##
# Clean a .tsv file into a human-readable form. You probably just should use makemd, though.
#
# @arg file relative path to tsv file, within current working directory
# @return void
# @output a .md file with timestamped transcript in human-readable format
#
function transcribe_cleantsv(){
    file="$1";
    src="$(pwd)/$file"
    php "$codeDir/lib/clean-transcription.php" "$src"
}

##
# Create a .md file with proper disclaimer header and links to timestamped portions of the uploaded video. Ex: tlf transcribe makemd audio-file.mp4 youtube.com/video_id 0
#
# Expects directory structure provided by primary transcribe function
#
# @usage `tlf makemd audio-file.mp4 youtube.com/video_id 0`
#
# @arg1 file relative path to audio file that was transcribed
# @arg2 yt_url the url to a youtube video representing this audio
# @arg3 offset the offset, in seconds, for the beginning of this audio in the yt video
#
# @return void
# @output `${file}.md` in the current working directory, without the audio-files extension
#
function transcribe_makemd(){
    audio_filename="$1"
    audio_file="$(pwd)/$1";
    # file_without_ext="${audio_filename%.*}"
    # transcription_dir="${audio_file}-transcription/"
    # tsv_file="${transcription_dir}/${file_without_ext}.tsv"

    yt_url="$2"
    
    offset="$3"
    if [[ -z "$offset" ]];then
        offset=0
    fi
    
    php "$codeDir/lib/transcribe-makemd.php" "$audio_file" "$yt_url" "$offset"

}