From 462948b849b1a6530d17340137dda7a27dc0f96b Mon Sep 17 00:00:00 2001
From: Gaumit Kauts <123269559+Gaumit-Kauts@users.noreply.github.com>
Date: Sat, 14 Feb 2026 16:10:19 -0700
Subject: [PATCH] created speech_to_text

Using openai whisper model for converting speech to text
---
 speech_to_text.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 speech_to_text.py

diff --git a/speech_to_text.py b/speech_to_text.py
new file mode 100644
index 0000000..d4fd32c
--- /dev/null
+++ b/speech_to_text.py
@@ -0,0 +1,36 @@
+import os
+from flask import Flask, request, jsonify
+from openai import OpenAI
+
+app = Flask(__name__)
+client = OpenAI()  # reads OPENAI_API_KEY from env
+
+def transcribe_with_timestamps(local_path: str):
+    # Use whisper-1 for verbose_json + timestamp_granularities (segment/word)
+    # timestamp_granularities requires response_format="verbose_json" :contentReference[oaicite:1]{index=1}
+    with open(local_path, "rb") as f:
+        tr = client.audio.transcriptions.create(
+            model="whisper-1",
+            file=f,
+            response_format="verbose_json",
+            timestamp_granularities=["segment"],
+        )
+    segments = []
+    for seg in tr.segments:
+        segments.append({
+            "start_ms": int(seg["start"] * 1000),
+            "end_ms": int(seg["end"] * 1000),
+            "text": seg["text"].strip(),
+        })
+    return segments
+
+@app.post("/upload-audio")
+def upload_audio():
+    file = request.files["file"]
+    local_path = f"/tmp/{file.filename}"
+    file.save(local_path)
+
+    segments = transcribe_with_timestamps(local_path)
+
+    # TODO: insert `segments` into transcript_segments table
+    return jsonify({"segments": segments})