feat: not working download zip file
This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -4,18 +4,18 @@ Includes auth, upload+transcription, history, and RAG search workflow.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import uuid
|
import uuid
|
||||||
import re
|
|
||||||
import zipfile
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, List
|
import io
|
||||||
|
import zipfile
|
||||||
|
from flask import send_file
|
||||||
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
from flask import Blueprint, jsonify, request, send_file
|
from flask import Blueprint, jsonify, request
|
||||||
from werkzeug.security import check_password_hash, generate_password_hash
|
from werkzeug.security import check_password_hash, generate_password_hash
|
||||||
from werkzeug.utils import secure_filename
|
from werkzeug.utils import secure_filename
|
||||||
|
|
||||||
@@ -25,8 +25,6 @@ from db_queries import (
|
|||||||
add_rag_chunks,
|
add_rag_chunks,
|
||||||
create_audio_post,
|
create_audio_post,
|
||||||
create_user,
|
create_user,
|
||||||
download_storage_object_by_stored_path,
|
|
||||||
get_archive_file_by_role,
|
|
||||||
get_archive_metadata,
|
get_archive_metadata,
|
||||||
get_original_audio_url,
|
get_original_audio_url,
|
||||||
get_archive_rights,
|
get_archive_rights,
|
||||||
@@ -40,7 +38,6 @@ from db_queries import (
|
|||||||
list_rag_chunks,
|
list_rag_chunks,
|
||||||
list_user_history,
|
list_user_history,
|
||||||
search_rag_chunks,
|
search_rag_chunks,
|
||||||
search_rag_chunks_vector,
|
|
||||||
update_audio_post,
|
update_audio_post,
|
||||||
upload_storage_object,
|
upload_storage_object,
|
||||||
upsert_archive_metadata,
|
upsert_archive_metadata,
|
||||||
@@ -103,7 +100,7 @@ def _build_prompt(transcript_text: str, title: str) -> str:
|
|||||||
f"{transcript_text}\n\n"
|
f"{transcript_text}\n\n"
|
||||||
"Answer user questions grounded in this transcript."
|
"Answer user questions grounded in this transcript."
|
||||||
)
|
)
|
||||||
def _add_audio_url(post: dict[str, Any]) -> dict[str, Any]:
|
def _add_audio_url(post: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Add signed audio URL to post if ready"""
|
"""Add signed audio URL to post if ready"""
|
||||||
if post.get("status") == "ready":
|
if post.get("status") == "ready":
|
||||||
try:
|
try:
|
||||||
@@ -114,29 +111,6 @@ def _add_audio_url(post: dict[str, Any]) -> dict[str, Any]:
|
|||||||
return post
|
return post
|
||||||
|
|
||||||
|
|
||||||
def _local_embedding(text: str, dimensions: int = 1536) -> List[float]:
|
|
||||||
"""
|
|
||||||
Free deterministic embedding fallback (offline).
|
|
||||||
Replace with model-based embeddings later if needed.
|
|
||||||
"""
|
|
||||||
vector = [0.0] * dimensions
|
|
||||||
tokens = re.findall(r"[A-Za-z0-9']+", text.lower())
|
|
||||||
if not tokens:
|
|
||||||
return vector
|
|
||||||
|
|
||||||
for token in tokens:
|
|
||||||
digest = hashlib.sha256(token.encode("utf-8")).digest()
|
|
||||||
idx = int.from_bytes(digest[:4], "big") % dimensions
|
|
||||||
sign = 1.0 if (digest[4] & 1) == 0 else -1.0
|
|
||||||
weight = 1.0 + (digest[5] / 255.0) * 0.25
|
|
||||||
vector[idx] += sign * weight
|
|
||||||
|
|
||||||
norm = sum(v * v for v in vector) ** 0.5
|
|
||||||
if norm > 0:
|
|
||||||
vector = [v / norm for v in vector]
|
|
||||||
return vector
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@api.get("/health")
|
@api.get("/health")
|
||||||
def health():
|
def health():
|
||||||
@@ -304,7 +278,7 @@ def api_upload_post():
|
|||||||
"end_sec": float(seg.end),
|
"end_sec": float(seg.end),
|
||||||
"text": segment_text,
|
"text": segment_text,
|
||||||
"confidence": float(seg.avg_logprob) if seg.avg_logprob is not None else None,
|
"confidence": float(seg.avg_logprob) if seg.avg_logprob is not None else None,
|
||||||
"embedding": _local_embedding(segment_text),
|
"embedding": None,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -390,31 +364,17 @@ def api_user_history(user_id: int):
|
|||||||
def api_rag_search():
|
def api_rag_search():
|
||||||
query_text = (request.args.get("q") or "").strip()
|
query_text = (request.args.get("q") or "").strip()
|
||||||
user_id = request.args.get("user_id", type=int)
|
user_id = request.args.get("user_id", type=int)
|
||||||
query_embedding_raw = request.args.get("query_embedding")
|
|
||||||
page = request.args.get("page", default=1, type=int)
|
page = request.args.get("page", default=1, type=int)
|
||||||
limit = request.args.get("limit", default=30, type=int)
|
limit = request.args.get("limit", default=30, type=int)
|
||||||
|
|
||||||
if not user_id:
|
if not user_id:
|
||||||
return _error("'user_id' is required.", 400)
|
return _error("'user_id' is required.", 400)
|
||||||
|
if not query_text:
|
||||||
|
return _error("'q' is required.", 400)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if query_embedding_raw:
|
|
||||||
try:
|
|
||||||
parsed = json.loads(query_embedding_raw)
|
|
||||||
if not isinstance(parsed, list):
|
|
||||||
return _error("'query_embedding' must be a JSON array.", 400)
|
|
||||||
query_embedding = [float(v) for v in parsed]
|
|
||||||
except Exception:
|
|
||||||
return _error("Invalid 'query_embedding'. Example: [0.1,0.2,...]", 400)
|
|
||||||
|
|
||||||
rows = search_rag_chunks_vector(user_id=user_id, query_embedding=query_embedding, limit=limit)
|
|
||||||
return jsonify({"results": rows, "mode": "vector", "limit": min(max(1, limit), 100)})
|
|
||||||
|
|
||||||
if not query_text:
|
|
||||||
return _error("'q' is required when 'query_embedding' is not provided.", 400)
|
|
||||||
|
|
||||||
rows = search_rag_chunks(user_id=user_id, query_text=query_text, page=page, limit=limit)
|
rows = search_rag_chunks(user_id=user_id, query_text=query_text, page=page, limit=limit)
|
||||||
return jsonify({"results": rows, "mode": "text", "page": page, "limit": min(max(1, limit), 100)})
|
return jsonify({"results": rows, "page": page, "limit": min(max(1, limit), 100)})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return _error(str(e), 500)
|
return _error(str(e), 500)
|
||||||
|
|
||||||
@@ -533,64 +493,6 @@ def api_post_audio_url(post_id: int):
|
|||||||
return _error(str(e), 500)
|
return _error(str(e), 500)
|
||||||
|
|
||||||
|
|
||||||
@api.get("/posts/<int:post_id>/archive.zip")
|
|
||||||
def api_post_archive_zip(post_id: int):
|
|
||||||
"""
|
|
||||||
Download a complete archive zip for a post.
|
|
||||||
Private posts require owner user_id in query params.
|
|
||||||
"""
|
|
||||||
row = get_audio_post_by_id(post_id)
|
|
||||||
if not row:
|
|
||||||
return _error("Post not found.", 404)
|
|
||||||
|
|
||||||
visibility = row.get("visibility")
|
|
||||||
owner_id = row.get("user_id")
|
|
||||||
requester_id = request.args.get("user_id", type=int)
|
|
||||||
|
|
||||||
if visibility == "private" and requester_id != owner_id:
|
|
||||||
return _error("Not authorized to download this private archive.", 403)
|
|
||||||
|
|
||||||
try:
|
|
||||||
bundle = get_post_bundle(post_id)
|
|
||||||
if not bundle:
|
|
||||||
return _error("Post bundle not found.", 404)
|
|
||||||
|
|
||||||
archive_buf = io.BytesIO()
|
|
||||||
with zipfile.ZipFile(archive_buf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
|
|
||||||
# Always include core JSON artifacts.
|
|
||||||
zf.writestr("post.json", json.dumps(bundle.get("post", {}), indent=2, default=str))
|
|
||||||
zf.writestr("metadata.json", json.dumps(bundle.get("metadata", {}), indent=2, default=str))
|
|
||||||
zf.writestr("rights.json", json.dumps(bundle.get("rights", {}), indent=2, default=str))
|
|
||||||
zf.writestr("rag_chunks.json", json.dumps(bundle.get("rag_chunks", []), indent=2, default=str))
|
|
||||||
zf.writestr("audit_log.json", json.dumps(bundle.get("audit_log", []), indent=2, default=str))
|
|
||||||
|
|
||||||
# Derive transcript from rag chunks.
|
|
||||||
chunks = bundle.get("rag_chunks", []) or []
|
|
||||||
transcript_text = " ".join(
|
|
||||||
(chunk.get("text") or "").strip() for chunk in chunks if chunk.get("text")
|
|
||||||
).strip()
|
|
||||||
if transcript_text:
|
|
||||||
zf.writestr("transcript.txt", transcript_text)
|
|
||||||
|
|
||||||
# Include original media from Supabase Storage if present.
|
|
||||||
original_file = get_archive_file_by_role(post_id, "original_audio")
|
|
||||||
if original_file and original_file.get("path"):
|
|
||||||
original_bytes = download_storage_object_by_stored_path(original_file["path"])
|
|
||||||
source_name = original_file["path"].split("/")[-1] or f"post_{post_id}_original.bin"
|
|
||||||
zf.writestr(f"original/{source_name}", original_bytes)
|
|
||||||
|
|
||||||
archive_buf.seek(0)
|
|
||||||
download_name = f"voicevault_post_{post_id}_archive.zip"
|
|
||||||
return send_file(
|
|
||||||
archive_buf,
|
|
||||||
mimetype="application/zip",
|
|
||||||
as_attachment=True,
|
|
||||||
download_name=download_name,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
return _error(f"Failed to build archive zip: {e}", 500)
|
|
||||||
|
|
||||||
|
|
||||||
@api.post("/posts/<int:post_id>/files")
|
@api.post("/posts/<int:post_id>/files")
|
||||||
def api_add_file(post_id: int):
|
def api_add_file(post_id: int):
|
||||||
payload = request.get_json(force=True, silent=False) or {}
|
payload = request.get_json(force=True, silent=False) or {}
|
||||||
@@ -707,3 +609,36 @@ def api_post_audit(post_id: int):
|
|||||||
return jsonify({"logs": list_audit_logs(post_id=post_id, page=page, limit=limit)})
|
return jsonify({"logs": list_audit_logs(post_id=post_id, page=page, limit=limit)})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return _error(str(e), 500)
|
return _error(str(e), 500)
|
||||||
|
|
||||||
|
@api.get("/posts/<int:post_id>/download")
|
||||||
|
def download_post(post_id: int):
|
||||||
|
post = get_audio_post_by_id(post_id)
|
||||||
|
if not post:
|
||||||
|
return jsonify({"error": "Post not found"}), 404
|
||||||
|
|
||||||
|
files = list_archive_files(post_id)
|
||||||
|
metadata = get_archive_metadata(post_id) or {}
|
||||||
|
|
||||||
|
zip_buffer = io.BytesIO()
|
||||||
|
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf:
|
||||||
|
if metadata.get("metadata"):
|
||||||
|
zipf.writestr("metadata.json", json.dumps(metadata, indent=2))
|
||||||
|
|
||||||
|
for f in files:
|
||||||
|
try:
|
||||||
|
signed_url = get_original_audio_url(post_id)["signed_url"] if f["role"] == "original_audio" else None
|
||||||
|
if signed_url:
|
||||||
|
r = requests.get(signed_url)
|
||||||
|
if r.status_code == 200:
|
||||||
|
filename = f"{f['role']}_{f['path'].split('/')[-1]}"
|
||||||
|
zipf.writestr(filename, r.content)
|
||||||
|
except Exception as e:
|
||||||
|
print("Failed to add file:", e)
|
||||||
|
|
||||||
|
zip_buffer.seek(0)
|
||||||
|
return send_file(
|
||||||
|
zip_buffer,
|
||||||
|
mimetype="application/zip",
|
||||||
|
as_attachment=True,
|
||||||
|
download_name=f"{post['title'].replace(' ', '_')}.zip"
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user