speech_to_text functionality updated

using fast whisper to complete this task
This commit is contained in:
Gaumit Kauts
2026-02-14 19:10:03 -07:00
parent 01ad42f6a1
commit c3ff755d2f
3 changed files with 77 additions and 42 deletions

4
.env Normal file
View File

@@ -0,0 +1,4 @@
SUPABASE_URL=https://tnpnlkosqqudoadfylss.supabase.co
SUPABASE_PUBLISHABLE_KEY=sb_publishable_UqXeuY5gOjvGpoNO1ciZYw_g7nO2M1Q
SUPABASE_ANON_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InRucG5sa29zcXF1ZG9hZGZ5bHNzIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NzEwMjU0MDgsImV4cCI6MjA4NjYwMTQwOH0.6_juLoB24uHNcukoKHFNbAcRxXBP7PMsAk4cwISV6A0
SUPABASE_SERVICE_ROLE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InRucG5sa29zcXF1ZG9hZGZ5bHNzIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NzEwMjU0MDgsImV4cCI6MjA4NjYwMTQwOH0.6_juLoB24uHNcukoKHFNbAcRxXBP7PMsAk4cwISV6A0

View File

@@ -1,5 +1,5 @@
flask flask
openai faster-whisper
supabase supabase
python-dotenv python-dotenv
werkzeug werkzeug

View File

@@ -2,28 +2,42 @@
import uuid import uuid
from pathlib import Path from pathlib import Path
from flask import Flask, jsonify, request from dotenv import load_dotenv
from openai import OpenAI from flask import Flask, jsonify, render_template, request
from faster_whisper import WhisperModel
from supabase import Client, create_client from supabase import Client, create_client
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
load_dotenv()
app = Flask(__name__) app = Flask(__name__)
UPLOAD_DIR = Path(os.getenv("UPLOAD_DIR", "uploads")) UPLOAD_DIR = Path(os.getenv("UPLOAD_DIR", "uploads"))
UPLOAD_DIR.mkdir(parents=True, exist_ok=True) UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
ALLOWED_EXTENSIONS = {"mp3", "wav", "m4a", "ogg", "webm", "flac", "mp4"} ALLOWED_EXTENSIONS = {"mp3", "wav", "m4a", "ogg", "webm", "flac", "mp4"}
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) WHISPER_MODEL_NAME = os.getenv("WHISPER_MODEL", "base")
WHISPER_DEVICE = os.getenv("WHISPER_DEVICE", "cpu")
WHISPER_COMPUTE_TYPE = os.getenv("WHISPER_COMPUTE_TYPE", "int8")
SUPABASE_URL = os.getenv("SUPABASE_URL") SUPABASE_URL = (os.getenv("SUPABASE_URL") or "").strip()
SUPABASE_SERVICE_ROLE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY") SUPABASE_SERVICE_ROLE_KEY = (os.getenv("SUPABASE_SERVICE_ROLE_KEY") or "").strip()
supabase: Client | None = None
if SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY:
supabase = create_client(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY)
if not SUPABASE_URL or not SUPABASE_SERVICE_ROLE_KEY: _model: WhisperModel | None = None
raise RuntimeError(
"Missing SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY environment variables."
)
supabase: Client = create_client(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY)
def get_whisper_model() -> WhisperModel:
global _model
if _model is None:
_model = WhisperModel(
WHISPER_MODEL_NAME,
device=WHISPER_DEVICE,
compute_type=WHISPER_COMPUTE_TYPE,
)
return _model
def allowed_file(filename: str) -> bool: def allowed_file(filename: str) -> bool:
@@ -52,21 +66,15 @@ def parse_category_ids(value: str | None) -> list[int]:
def transcribe_audio(local_path: Path) -> str: def transcribe_audio(local_path: Path) -> str:
with local_path.open("rb") as audio_file: model = get_whisper_model()
transcript = client.audio.transcriptions.create( segments, _info = model.transcribe(str(local_path))
model="whisper-1", text = " ".join(segment.text.strip() for segment in segments).strip()
file=audio_file, return text
response_format="text",
)
if isinstance(transcript, str):
return transcript.strip()
text_value = getattr(transcript, "text", "")
return str(text_value).strip()
def verify_supabase_connection() -> None: def verify_supabase_connection() -> None:
if not supabase:
raise RuntimeError("Supabase is not configured.")
supabase.table("categories").select("category_id").limit(1).execute() supabase.table("categories").select("category_id").limit(1).execute()
@@ -80,6 +88,9 @@ def insert_post(
image_url: str | None, image_url: str | None,
category_ids: list[int], category_ids: list[int],
) -> int: ) -> int:
if not supabase:
raise RuntimeError("Supabase is not configured.")
post_payload = { post_payload = {
"user_id": user_id, "user_id": user_id,
"title": title, "title": title,
@@ -108,7 +119,17 @@ def insert_post(
@app.get("/health") @app.get("/health")
def health_check(): def health_check():
return jsonify({"status": "ok"}) return jsonify({
"status": "ok",
"whisper_model": WHISPER_MODEL_NAME,
"whisper_device": WHISPER_DEVICE,
"whisper_compute_type": WHISPER_COMPUTE_TYPE,
})
@app.get("/")
def demo_frontend():
return render_template("index.html")
@app.get("/health/db") @app.get("/health/db")
@@ -133,13 +154,12 @@ def upload_audio():
return jsonify({"error": "Unsupported file extension."}), 400 return jsonify({"error": "Unsupported file extension."}), 400
user_id_raw = request.form.get("user_id") user_id_raw = request.form.get("user_id")
if not user_id_raw: user_id: int | None = None
return jsonify({"error": "'user_id' is required in form-data."}), 400 if user_id_raw:
try:
try: user_id = int(user_id_raw)
user_id = int(user_id_raw) except ValueError:
except ValueError: return jsonify({"error": "'user_id' must be an integer."}), 400
return jsonify({"error": "'user_id' must be an integer."}), 400
try: try:
category_ids = parse_category_ids(request.form.get("category_ids")) category_ids = parse_category_ids(request.form.get("category_ids"))
@@ -159,24 +179,35 @@ def upload_audio():
try: try:
transcript_text = transcribe_audio(local_path) transcript_text = transcribe_audio(local_path)
post_id = insert_post(
user_id=user_id,
title=title,
transcribed_text=transcript_text,
audio_url=str(local_path).replace("\\", "/"),
is_private=is_private,
image_url=image_url,
category_ids=category_ids,
)
except Exception as error: except Exception as error:
return jsonify({"error": "Failed to process audio", "details": str(error)}), 500 return jsonify({"error": "Transcription failed", "details": str(error)}), 500
post_id: int | None = None
db_warning: str | None = None
if supabase:
if user_id is None:
db_warning = "Transcribed successfully. Skipped Supabase save because 'user_id' was not provided."
else:
try:
post_id = insert_post(
user_id=user_id,
title=title,
transcribed_text=transcript_text,
audio_url=str(local_path).replace("\\", "/"),
is_private=is_private,
image_url=image_url,
category_ids=category_ids,
)
except Exception as error:
db_warning = f"Transcribed successfully, but Supabase save failed: {error}"
return jsonify( return jsonify(
{ {
"message": "Audio uploaded, transcribed, and saved to Supabase.", "message": "Audio uploaded and transcribed (local whisper).",
"post_id": post_id, "post_id": post_id,
"transcribed_text": transcript_text, "transcribed_text": transcript_text,
"audio_url": str(local_path).replace("\\", "/"), "audio_url": str(local_path).replace("\\", "/"),
"db_warning": db_warning,
} }
) )