backend for speech to text
This commit is contained in:
51
README.md
51
README.md
@@ -1,2 +1,49 @@
|
|||||||
# AI-Titan-Forge
|
|
||||||
### CalgaryHack26 project
|
## Backend (Audio -> Whisper -> Supabase)
|
||||||
|
|
||||||
|
This backend:
|
||||||
|
1. accepts an audio file,
|
||||||
|
2. transcribes it with OpenAI Whisper (`whisper-1`),
|
||||||
|
3. stores transcript text in Supabase `posts.transcribed_text`,
|
||||||
|
4. links categories in `post_categories`.
|
||||||
|
|
||||||
|
## Install
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
## Environment variables
|
||||||
|
- `OPENAI_API_KEY`
|
||||||
|
- `SUPABASE_URL`
|
||||||
|
- `SUPABASE_SERVICE_ROLE_KEY` (use service-role key on backend only)
|
||||||
|
- `UPLOAD_DIR` (default: `uploads`)
|
||||||
|
- `PORT` (default: `5000`)
|
||||||
|
|
||||||
|
## Run
|
||||||
|
```bash
|
||||||
|
python speech_to_text.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Endpoints
|
||||||
|
- `GET /health`
|
||||||
|
- `GET /health/db`
|
||||||
|
- `POST /upload-audio`
|
||||||
|
|
||||||
|
## Upload example
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:5000/upload-audio \
|
||||||
|
-F "file=@sample.mp3" \
|
||||||
|
-F "user_id=1" \
|
||||||
|
-F "title=My oral history" \
|
||||||
|
-F "category_ids=1,4" \
|
||||||
|
-F "is_private=false"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Required tables in Supabase
|
||||||
|
Your Supabase Postgres project should already contain:
|
||||||
|
- `users`
|
||||||
|
- `posts`
|
||||||
|
- `post_categories`
|
||||||
|
- `categories`
|
||||||
|
|
||||||
|
Note: `user_id` must exist in `users` before upload.
|
||||||
|
|||||||
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
flask
|
||||||
|
openai
|
||||||
|
supabase
|
||||||
|
python-dotenv
|
||||||
|
werkzeug
|
||||||
@@ -1,36 +1,185 @@
|
|||||||
import os
|
import os
|
||||||
from flask import Flask, request, jsonify
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from flask import Flask, jsonify, request
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
from supabase import Client, create_client
|
||||||
|
from werkzeug.utils import secure_filename
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
client = OpenAI() # reads OPENAI_API_KEY from env
|
|
||||||
|
|
||||||
def transcribe_with_timestamps(local_path: str):
|
UPLOAD_DIR = Path(os.getenv("UPLOAD_DIR", "uploads"))
|
||||||
# Use whisper-1 for verbose_json + timestamp_granularities (segment/word)
|
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
# timestamp_granularities requires response_format="verbose_json" :contentReference[oaicite:1]{index=1}
|
|
||||||
with open(local_path, "rb") as f:
|
ALLOWED_EXTENSIONS = {"mp3", "wav", "m4a", "ogg", "webm", "flac", "mp4"}
|
||||||
tr = client.audio.transcriptions.create(
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
|
|
||||||
|
SUPABASE_URL = os.getenv("SUPABASE_URL")
|
||||||
|
SUPABASE_SERVICE_ROLE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY")
|
||||||
|
|
||||||
|
if not SUPABASE_URL or not SUPABASE_SERVICE_ROLE_KEY:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Missing SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY environment variables."
|
||||||
|
)
|
||||||
|
|
||||||
|
supabase: Client = create_client(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY)
|
||||||
|
|
||||||
|
|
||||||
|
def allowed_file(filename: str) -> bool:
|
||||||
|
if "." not in filename:
|
||||||
|
return False
|
||||||
|
extension = filename.rsplit(".", 1)[1].lower()
|
||||||
|
return extension in ALLOWED_EXTENSIONS
|
||||||
|
|
||||||
|
|
||||||
|
def parse_bool(value: str | None, default: bool = False) -> bool:
|
||||||
|
if value is None:
|
||||||
|
return default
|
||||||
|
return value.lower() in {"1", "true", "yes", "on"}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_category_ids(value: str | None) -> list[int]:
|
||||||
|
if not value:
|
||||||
|
return []
|
||||||
|
ids: list[int] = []
|
||||||
|
for item in value.split(","):
|
||||||
|
candidate = item.strip()
|
||||||
|
if not candidate:
|
||||||
|
continue
|
||||||
|
ids.append(int(candidate))
|
||||||
|
return ids
|
||||||
|
|
||||||
|
|
||||||
|
def transcribe_audio(local_path: Path) -> str:
|
||||||
|
with local_path.open("rb") as audio_file:
|
||||||
|
transcript = client.audio.transcriptions.create(
|
||||||
model="whisper-1",
|
model="whisper-1",
|
||||||
file=f,
|
file=audio_file,
|
||||||
response_format="verbose_json",
|
response_format="text",
|
||||||
timestamp_granularities=["segment"],
|
|
||||||
)
|
)
|
||||||
segments = []
|
|
||||||
for seg in tr.segments:
|
if isinstance(transcript, str):
|
||||||
segments.append({
|
return transcript.strip()
|
||||||
"start_ms": int(seg["start"] * 1000),
|
|
||||||
"end_ms": int(seg["end"] * 1000),
|
text_value = getattr(transcript, "text", "")
|
||||||
"text": seg["text"].strip(),
|
return str(text_value).strip()
|
||||||
})
|
|
||||||
return segments
|
|
||||||
|
def verify_supabase_connection() -> None:
|
||||||
|
supabase.table("categories").select("category_id").limit(1).execute()
|
||||||
|
|
||||||
|
|
||||||
|
def insert_post(
|
||||||
|
*,
|
||||||
|
user_id: int,
|
||||||
|
title: str | None,
|
||||||
|
transcribed_text: str,
|
||||||
|
audio_url: str,
|
||||||
|
is_private: bool,
|
||||||
|
image_url: str | None,
|
||||||
|
category_ids: list[int],
|
||||||
|
) -> int:
|
||||||
|
post_payload = {
|
||||||
|
"user_id": user_id,
|
||||||
|
"title": title,
|
||||||
|
"transcribed_text": transcribed_text,
|
||||||
|
"audio_url": audio_url,
|
||||||
|
"is_private": is_private,
|
||||||
|
"image_url": image_url,
|
||||||
|
}
|
||||||
|
|
||||||
|
post_response = supabase.table("posts").insert(post_payload).execute()
|
||||||
|
post_rows = getattr(post_response, "data", None) or []
|
||||||
|
if not post_rows:
|
||||||
|
raise RuntimeError("Supabase insert failed for posts table.")
|
||||||
|
|
||||||
|
post_id = int(post_rows[0]["post_id"])
|
||||||
|
|
||||||
|
if category_ids:
|
||||||
|
category_rows = [
|
||||||
|
{"post_id": post_id, "category_id": category_id}
|
||||||
|
for category_id in category_ids
|
||||||
|
]
|
||||||
|
supabase.table("post_categories").insert(category_rows).execute()
|
||||||
|
|
||||||
|
return post_id
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
def health_check():
|
||||||
|
return jsonify({"status": "ok"})
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/health/db")
|
||||||
|
def db_health_check():
|
||||||
|
try:
|
||||||
|
verify_supabase_connection()
|
||||||
|
return jsonify({"status": "ok", "database": "supabase"})
|
||||||
|
except Exception as error:
|
||||||
|
return jsonify({"status": "error", "details": str(error)}), 500
|
||||||
|
|
||||||
|
|
||||||
@app.post("/upload-audio")
|
@app.post("/upload-audio")
|
||||||
def upload_audio():
|
def upload_audio():
|
||||||
|
if "file" not in request.files:
|
||||||
|
return jsonify({"error": "Missing 'file' in form-data."}), 400
|
||||||
|
|
||||||
file = request.files["file"]
|
file = request.files["file"]
|
||||||
local_path = f"/tmp/{file.filename}"
|
if not file.filename:
|
||||||
|
return jsonify({"error": "Filename is empty."}), 400
|
||||||
|
|
||||||
|
if not allowed_file(file.filename):
|
||||||
|
return jsonify({"error": "Unsupported file extension."}), 400
|
||||||
|
|
||||||
|
user_id_raw = request.form.get("user_id")
|
||||||
|
if not user_id_raw:
|
||||||
|
return jsonify({"error": "'user_id' is required in form-data."}), 400
|
||||||
|
|
||||||
|
try:
|
||||||
|
user_id = int(user_id_raw)
|
||||||
|
except ValueError:
|
||||||
|
return jsonify({"error": "'user_id' must be an integer."}), 400
|
||||||
|
|
||||||
|
try:
|
||||||
|
category_ids = parse_category_ids(request.form.get("category_ids"))
|
||||||
|
except ValueError:
|
||||||
|
return jsonify(
|
||||||
|
{"error": "'category_ids' must be a comma-separated list of integers."}
|
||||||
|
), 400
|
||||||
|
|
||||||
|
title = request.form.get("title")
|
||||||
|
image_url = request.form.get("image_url")
|
||||||
|
is_private = parse_bool(request.form.get("is_private"), default=False)
|
||||||
|
|
||||||
|
safe_name = secure_filename(file.filename)
|
||||||
|
unique_name = f"{uuid.uuid4()}_{safe_name}"
|
||||||
|
local_path = UPLOAD_DIR / unique_name
|
||||||
file.save(local_path)
|
file.save(local_path)
|
||||||
|
|
||||||
segments = transcribe_with_timestamps(local_path)
|
try:
|
||||||
|
transcript_text = transcribe_audio(local_path)
|
||||||
|
post_id = insert_post(
|
||||||
|
user_id=user_id,
|
||||||
|
title=title,
|
||||||
|
transcribed_text=transcript_text,
|
||||||
|
audio_url=str(local_path).replace("\\", "/"),
|
||||||
|
is_private=is_private,
|
||||||
|
image_url=image_url,
|
||||||
|
category_ids=category_ids,
|
||||||
|
)
|
||||||
|
except Exception as error:
|
||||||
|
return jsonify({"error": "Failed to process audio", "details": str(error)}), 500
|
||||||
|
|
||||||
# TODO: insert `segments` into transcript_segments table
|
return jsonify(
|
||||||
return jsonify({"segments": segments})
|
{
|
||||||
|
"message": "Audio uploaded, transcribed, and saved to Supabase.",
|
||||||
|
"post_id": post_id,
|
||||||
|
"transcribed_text": transcript_text,
|
||||||
|
"audio_url": str(local_path).replace("\\", "/"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.run(host="0.0.0.0", port=int(os.getenv("PORT", "5000")), debug=True)
|
||||||
|
|||||||
Reference in New Issue
Block a user