speech_to_text functionality updated
using fast whisper to complete this task
This commit is contained in:
4
.env
Normal file
4
.env
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
SUPABASE_URL=https://tnpnlkosqqudoadfylss.supabase.co
|
||||||
|
SUPABASE_PUBLISHABLE_KEY=sb_publishable_UqXeuY5gOjvGpoNO1ciZYw_g7nO2M1Q
|
||||||
|
SUPABASE_ANON_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InRucG5sa29zcXF1ZG9hZGZ5bHNzIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NzEwMjU0MDgsImV4cCI6MjA4NjYwMTQwOH0.6_juLoB24uHNcukoKHFNbAcRxXBP7PMsAk4cwISV6A0
|
||||||
|
SUPABASE_SERVICE_ROLE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InRucG5sa29zcXF1ZG9hZGZ5bHNzIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NzEwMjU0MDgsImV4cCI6MjA4NjYwMTQwOH0.6_juLoB24uHNcukoKHFNbAcRxXBP7PMsAk4cwISV6A0
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
flask
|
flask
|
||||||
openai
|
faster-whisper
|
||||||
supabase
|
supabase
|
||||||
python-dotenv
|
python-dotenv
|
||||||
werkzeug
|
werkzeug
|
||||||
|
|||||||
@@ -2,28 +2,42 @@
|
|||||||
import uuid
|
import uuid
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from flask import Flask, jsonify, request
|
from dotenv import load_dotenv
|
||||||
from openai import OpenAI
|
from flask import Flask, jsonify, render_template, request
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
from supabase import Client, create_client
|
from supabase import Client, create_client
|
||||||
from werkzeug.utils import secure_filename
|
from werkzeug.utils import secure_filename
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
UPLOAD_DIR = Path(os.getenv("UPLOAD_DIR", "uploads"))
|
UPLOAD_DIR = Path(os.getenv("UPLOAD_DIR", "uploads"))
|
||||||
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
ALLOWED_EXTENSIONS = {"mp3", "wav", "m4a", "ogg", "webm", "flac", "mp4"}
|
ALLOWED_EXTENSIONS = {"mp3", "wav", "m4a", "ogg", "webm", "flac", "mp4"}
|
||||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
WHISPER_MODEL_NAME = os.getenv("WHISPER_MODEL", "base")
|
||||||
|
WHISPER_DEVICE = os.getenv("WHISPER_DEVICE", "cpu")
|
||||||
|
WHISPER_COMPUTE_TYPE = os.getenv("WHISPER_COMPUTE_TYPE", "int8")
|
||||||
|
|
||||||
SUPABASE_URL = os.getenv("SUPABASE_URL")
|
SUPABASE_URL = (os.getenv("SUPABASE_URL") or "").strip()
|
||||||
SUPABASE_SERVICE_ROLE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY")
|
SUPABASE_SERVICE_ROLE_KEY = (os.getenv("SUPABASE_SERVICE_ROLE_KEY") or "").strip()
|
||||||
|
supabase: Client | None = None
|
||||||
|
if SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY:
|
||||||
|
supabase = create_client(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY)
|
||||||
|
|
||||||
if not SUPABASE_URL or not SUPABASE_SERVICE_ROLE_KEY:
|
_model: WhisperModel | None = None
|
||||||
raise RuntimeError(
|
|
||||||
"Missing SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY environment variables."
|
|
||||||
)
|
|
||||||
|
|
||||||
supabase: Client = create_client(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY)
|
|
||||||
|
def get_whisper_model() -> WhisperModel:
|
||||||
|
global _model
|
||||||
|
if _model is None:
|
||||||
|
_model = WhisperModel(
|
||||||
|
WHISPER_MODEL_NAME,
|
||||||
|
device=WHISPER_DEVICE,
|
||||||
|
compute_type=WHISPER_COMPUTE_TYPE,
|
||||||
|
)
|
||||||
|
return _model
|
||||||
|
|
||||||
|
|
||||||
def allowed_file(filename: str) -> bool:
|
def allowed_file(filename: str) -> bool:
|
||||||
@@ -52,21 +66,15 @@ def parse_category_ids(value: str | None) -> list[int]:
|
|||||||
|
|
||||||
|
|
||||||
def transcribe_audio(local_path: Path) -> str:
|
def transcribe_audio(local_path: Path) -> str:
|
||||||
with local_path.open("rb") as audio_file:
|
model = get_whisper_model()
|
||||||
transcript = client.audio.transcriptions.create(
|
segments, _info = model.transcribe(str(local_path))
|
||||||
model="whisper-1",
|
text = " ".join(segment.text.strip() for segment in segments).strip()
|
||||||
file=audio_file,
|
return text
|
||||||
response_format="text",
|
|
||||||
)
|
|
||||||
|
|
||||||
if isinstance(transcript, str):
|
|
||||||
return transcript.strip()
|
|
||||||
|
|
||||||
text_value = getattr(transcript, "text", "")
|
|
||||||
return str(text_value).strip()
|
|
||||||
|
|
||||||
|
|
||||||
def verify_supabase_connection() -> None:
|
def verify_supabase_connection() -> None:
|
||||||
|
if not supabase:
|
||||||
|
raise RuntimeError("Supabase is not configured.")
|
||||||
supabase.table("categories").select("category_id").limit(1).execute()
|
supabase.table("categories").select("category_id").limit(1).execute()
|
||||||
|
|
||||||
|
|
||||||
@@ -80,6 +88,9 @@ def insert_post(
|
|||||||
image_url: str | None,
|
image_url: str | None,
|
||||||
category_ids: list[int],
|
category_ids: list[int],
|
||||||
) -> int:
|
) -> int:
|
||||||
|
if not supabase:
|
||||||
|
raise RuntimeError("Supabase is not configured.")
|
||||||
|
|
||||||
post_payload = {
|
post_payload = {
|
||||||
"user_id": user_id,
|
"user_id": user_id,
|
||||||
"title": title,
|
"title": title,
|
||||||
@@ -108,7 +119,17 @@ def insert_post(
|
|||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
def health_check():
|
def health_check():
|
||||||
return jsonify({"status": "ok"})
|
return jsonify({
|
||||||
|
"status": "ok",
|
||||||
|
"whisper_model": WHISPER_MODEL_NAME,
|
||||||
|
"whisper_device": WHISPER_DEVICE,
|
||||||
|
"whisper_compute_type": WHISPER_COMPUTE_TYPE,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
def demo_frontend():
|
||||||
|
return render_template("index.html")
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health/db")
|
@app.get("/health/db")
|
||||||
@@ -133,13 +154,12 @@ def upload_audio():
|
|||||||
return jsonify({"error": "Unsupported file extension."}), 400
|
return jsonify({"error": "Unsupported file extension."}), 400
|
||||||
|
|
||||||
user_id_raw = request.form.get("user_id")
|
user_id_raw = request.form.get("user_id")
|
||||||
if not user_id_raw:
|
user_id: int | None = None
|
||||||
return jsonify({"error": "'user_id' is required in form-data."}), 400
|
if user_id_raw:
|
||||||
|
try:
|
||||||
try:
|
user_id = int(user_id_raw)
|
||||||
user_id = int(user_id_raw)
|
except ValueError:
|
||||||
except ValueError:
|
return jsonify({"error": "'user_id' must be an integer."}), 400
|
||||||
return jsonify({"error": "'user_id' must be an integer."}), 400
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
category_ids = parse_category_ids(request.form.get("category_ids"))
|
category_ids = parse_category_ids(request.form.get("category_ids"))
|
||||||
@@ -159,24 +179,35 @@ def upload_audio():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
transcript_text = transcribe_audio(local_path)
|
transcript_text = transcribe_audio(local_path)
|
||||||
post_id = insert_post(
|
|
||||||
user_id=user_id,
|
|
||||||
title=title,
|
|
||||||
transcribed_text=transcript_text,
|
|
||||||
audio_url=str(local_path).replace("\\", "/"),
|
|
||||||
is_private=is_private,
|
|
||||||
image_url=image_url,
|
|
||||||
category_ids=category_ids,
|
|
||||||
)
|
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
return jsonify({"error": "Failed to process audio", "details": str(error)}), 500
|
return jsonify({"error": "Transcription failed", "details": str(error)}), 500
|
||||||
|
|
||||||
|
post_id: int | None = None
|
||||||
|
db_warning: str | None = None
|
||||||
|
if supabase:
|
||||||
|
if user_id is None:
|
||||||
|
db_warning = "Transcribed successfully. Skipped Supabase save because 'user_id' was not provided."
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
post_id = insert_post(
|
||||||
|
user_id=user_id,
|
||||||
|
title=title,
|
||||||
|
transcribed_text=transcript_text,
|
||||||
|
audio_url=str(local_path).replace("\\", "/"),
|
||||||
|
is_private=is_private,
|
||||||
|
image_url=image_url,
|
||||||
|
category_ids=category_ids,
|
||||||
|
)
|
||||||
|
except Exception as error:
|
||||||
|
db_warning = f"Transcribed successfully, but Supabase save failed: {error}"
|
||||||
|
|
||||||
return jsonify(
|
return jsonify(
|
||||||
{
|
{
|
||||||
"message": "Audio uploaded, transcribed, and saved to Supabase.",
|
"message": "Audio uploaded and transcribed (local whisper).",
|
||||||
"post_id": post_id,
|
"post_id": post_id,
|
||||||
"transcribed_text": transcript_text,
|
"transcribed_text": transcript_text,
|
||||||
"audio_url": str(local_path).replace("\\", "/"),
|
"audio_url": str(local_path).replace("\\", "/"),
|
||||||
|
"db_warning": db_warning,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user