feat: not working download zip file

This commit is contained in:
Mann Patel
2026-02-15 02:18:07 -07:00
parent f06820ced2
commit 30392a7cd9
3 changed files with 47 additions and 112 deletions

View File

@@ -4,18 +4,18 @@ Includes auth, upload+transcription, history, and RAG search workflow.
"""
import hashlib
import io
import json
import os
import uuid
import re
import zipfile
from pathlib import Path
from typing import Any, List
import io
import zipfile
from flask import send_file
from dotenv import load_dotenv
from faster_whisper import WhisperModel
from flask import Blueprint, jsonify, request, send_file
from flask import Blueprint, jsonify, request
from werkzeug.security import check_password_hash, generate_password_hash
from werkzeug.utils import secure_filename
@@ -25,8 +25,6 @@ from db_queries import (
add_rag_chunks,
create_audio_post,
create_user,
download_storage_object_by_stored_path,
get_archive_file_by_role,
get_archive_metadata,
get_original_audio_url,
get_archive_rights,
@@ -40,7 +38,6 @@ from db_queries import (
list_rag_chunks,
list_user_history,
search_rag_chunks,
search_rag_chunks_vector,
update_audio_post,
upload_storage_object,
upsert_archive_metadata,
@@ -103,7 +100,7 @@ def _build_prompt(transcript_text: str, title: str) -> str:
f"{transcript_text}\n\n"
"Answer user questions grounded in this transcript."
)
def _add_audio_url(post: dict[str, Any]) -> dict[str, Any]:
def _add_audio_url(post: Dict[str, Any]) -> Dict[str, Any]:
"""Add signed audio URL to post if ready"""
if post.get("status") == "ready":
try:
@@ -114,29 +111,6 @@ def _add_audio_url(post: dict[str, Any]) -> dict[str, Any]:
return post
def _local_embedding(text: str, dimensions: int = 1536) -> List[float]:
"""
Free deterministic embedding fallback (offline).
Replace with model-based embeddings later if needed.
"""
vector = [0.0] * dimensions
tokens = re.findall(r"[A-Za-z0-9']+", text.lower())
if not tokens:
return vector
for token in tokens:
digest = hashlib.sha256(token.encode("utf-8")).digest()
idx = int.from_bytes(digest[:4], "big") % dimensions
sign = 1.0 if (digest[4] & 1) == 0 else -1.0
weight = 1.0 + (digest[5] / 255.0) * 0.25
vector[idx] += sign * weight
norm = sum(v * v for v in vector) ** 0.5
if norm > 0:
vector = [v / norm for v in vector]
return vector
@api.get("/health")
def health():
@@ -304,7 +278,7 @@ def api_upload_post():
"end_sec": float(seg.end),
"text": segment_text,
"confidence": float(seg.avg_logprob) if seg.avg_logprob is not None else None,
"embedding": _local_embedding(segment_text),
"embedding": None,
}
)
@@ -390,31 +364,17 @@ def api_user_history(user_id: int):
def api_rag_search():
query_text = (request.args.get("q") or "").strip()
user_id = request.args.get("user_id", type=int)
query_embedding_raw = request.args.get("query_embedding")
page = request.args.get("page", default=1, type=int)
limit = request.args.get("limit", default=30, type=int)
if not user_id:
return _error("'user_id' is required.", 400)
if not query_text:
return _error("'q' is required.", 400)
try:
if query_embedding_raw:
try:
parsed = json.loads(query_embedding_raw)
if not isinstance(parsed, list):
return _error("'query_embedding' must be a JSON array.", 400)
query_embedding = [float(v) for v in parsed]
except Exception:
return _error("Invalid 'query_embedding'. Example: [0.1,0.2,...]", 400)
rows = search_rag_chunks_vector(user_id=user_id, query_embedding=query_embedding, limit=limit)
return jsonify({"results": rows, "mode": "vector", "limit": min(max(1, limit), 100)})
if not query_text:
return _error("'q' is required when 'query_embedding' is not provided.", 400)
rows = search_rag_chunks(user_id=user_id, query_text=query_text, page=page, limit=limit)
return jsonify({"results": rows, "mode": "text", "page": page, "limit": min(max(1, limit), 100)})
return jsonify({"results": rows, "page": page, "limit": min(max(1, limit), 100)})
except Exception as e:
return _error(str(e), 500)
@@ -533,64 +493,6 @@ def api_post_audio_url(post_id: int):
return _error(str(e), 500)
@api.get("/posts/<int:post_id>/archive.zip")
def api_post_archive_zip(post_id: int):
"""
Download a complete archive zip for a post.
Private posts require owner user_id in query params.
"""
row = get_audio_post_by_id(post_id)
if not row:
return _error("Post not found.", 404)
visibility = row.get("visibility")
owner_id = row.get("user_id")
requester_id = request.args.get("user_id", type=int)
if visibility == "private" and requester_id != owner_id:
return _error("Not authorized to download this private archive.", 403)
try:
bundle = get_post_bundle(post_id)
if not bundle:
return _error("Post bundle not found.", 404)
archive_buf = io.BytesIO()
with zipfile.ZipFile(archive_buf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
# Always include core JSON artifacts.
zf.writestr("post.json", json.dumps(bundle.get("post", {}), indent=2, default=str))
zf.writestr("metadata.json", json.dumps(bundle.get("metadata", {}), indent=2, default=str))
zf.writestr("rights.json", json.dumps(bundle.get("rights", {}), indent=2, default=str))
zf.writestr("rag_chunks.json", json.dumps(bundle.get("rag_chunks", []), indent=2, default=str))
zf.writestr("audit_log.json", json.dumps(bundle.get("audit_log", []), indent=2, default=str))
# Derive transcript from rag chunks.
chunks = bundle.get("rag_chunks", []) or []
transcript_text = " ".join(
(chunk.get("text") or "").strip() for chunk in chunks if chunk.get("text")
).strip()
if transcript_text:
zf.writestr("transcript.txt", transcript_text)
# Include original media from Supabase Storage if present.
original_file = get_archive_file_by_role(post_id, "original_audio")
if original_file and original_file.get("path"):
original_bytes = download_storage_object_by_stored_path(original_file["path"])
source_name = original_file["path"].split("/")[-1] or f"post_{post_id}_original.bin"
zf.writestr(f"original/{source_name}", original_bytes)
archive_buf.seek(0)
download_name = f"voicevault_post_{post_id}_archive.zip"
return send_file(
archive_buf,
mimetype="application/zip",
as_attachment=True,
download_name=download_name,
)
except Exception as e:
return _error(f"Failed to build archive zip: {e}", 500)
@api.post("/posts/<int:post_id>/files")
def api_add_file(post_id: int):
payload = request.get_json(force=True, silent=False) or {}
@@ -707,3 +609,36 @@ def api_post_audit(post_id: int):
return jsonify({"logs": list_audit_logs(post_id=post_id, page=page, limit=limit)})
except Exception as e:
return _error(str(e), 500)
@api.get("/posts/<int:post_id>/download")
def download_post(post_id: int):
post = get_audio_post_by_id(post_id)
if not post:
return jsonify({"error": "Post not found"}), 404
files = list_archive_files(post_id)
metadata = get_archive_metadata(post_id) or {}
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf:
if metadata.get("metadata"):
zipf.writestr("metadata.json", json.dumps(metadata, indent=2))
for f in files:
try:
signed_url = get_original_audio_url(post_id)["signed_url"] if f["role"] == "original_audio" else None
if signed_url:
r = requests.get(signed_url)
if r.status_code == 200:
filename = f"{f['role']}_{f['path'].split('/')[-1]}"
zipf.writestr(filename, r.content)
except Exception as e:
print("Failed to add file:", e)
zip_buffer.seek(0)
return send_file(
zip_buffer,
mimetype="application/zip",
as_attachment=True,
download_name=f"{post['title'].replace(' ', '_')}.zip"
)