import whisper_timestamped as whisper
import torch
import sys, os
import json
from tqdm import tqdm

myhome = os.path.expanduser("~")
root_folder = myhome + "/special"
output_folder = root_folder

mydevice = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(mydevice)
model = whisper.load_model("large-v3", device=mydevice)

num_files = sum(1 for dirpath, dirnames, filenames in os.walk(root_folder) for filename in filenames if filename.endswith(".wav"))
print("Number of files: ", num_files)

# Transcribe the wav files and display a progress bar
with tqdm(total=num_files, desc="Transcribing Files") as pbar:
    for dirpath, dirnames, filenames in os.walk(root_folder):
        for filename in filenames:
            if filename.endswith(".wav"):
                filepath = os.path.join(dirpath, filename)
                audio = whisper.load_audio(filepath)
                result = whisper.transcribe(model, audio, language="en", vad=True)
                # Write transcription to text file
                filename_no_ext = os.path.splitext(filename)[0]
                newpath=os.path.join(output_folder, filename_no_ext + '.json')
                print(newpath)
                with open(newpath, 'w') as f:
                    json.dump(result, f, indent=2)                                
                pbar.update(1)