diff --git a/README.md b/README.md index 35e59b9..da6cbb7 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,7 @@ History + RAG: Playback: - `GET /api/posts//audio-url?user_id=` (required for private posts) +- `GET /api/posts//archive.zip?user_id=` (download archive package; required for private posts) Post data: - `GET /api/posts` diff --git a/backend/__pycache__/api_routes.cpython-311.pyc b/backend/__pycache__/api_routes.cpython-311.pyc index e9bccd3..2a83a73 100644 Binary files a/backend/__pycache__/api_routes.cpython-311.pyc and b/backend/__pycache__/api_routes.cpython-311.pyc differ diff --git a/backend/__pycache__/db_queries.cpython-311.pyc b/backend/__pycache__/db_queries.cpython-311.pyc index a92005f..f2f0aa7 100644 Binary files a/backend/__pycache__/db_queries.cpython-311.pyc and b/backend/__pycache__/db_queries.cpython-311.pyc differ diff --git a/backend/api_routes.py b/backend/api_routes.py index 40887ff..f6d474d 100644 --- a/backend/api_routes.py +++ b/backend/api_routes.py @@ -4,16 +4,18 @@ Includes auth, upload+transcription, history, and RAG search workflow. """ import hashlib +import io import json import os import uuid import re +import zipfile from pathlib import Path from typing import Any, List from dotenv import load_dotenv from faster_whisper import WhisperModel -from flask import Blueprint, jsonify, request +from flask import Blueprint, jsonify, request, send_file from werkzeug.security import check_password_hash, generate_password_hash from werkzeug.utils import secure_filename @@ -23,6 +25,8 @@ from db_queries import ( add_rag_chunks, create_audio_post, create_user, + download_storage_object_by_stored_path, + get_archive_file_by_role, get_archive_metadata, get_original_audio_url, get_archive_rights, @@ -529,6 +533,64 @@ def api_post_audio_url(post_id: int): return _error(str(e), 500) +@api.get("/posts//archive.zip") +def api_post_archive_zip(post_id: int): + """ + Download a complete archive zip for a post. + Private posts require owner user_id in query params. + """ + row = get_audio_post_by_id(post_id) + if not row: + return _error("Post not found.", 404) + + visibility = row.get("visibility") + owner_id = row.get("user_id") + requester_id = request.args.get("user_id", type=int) + + if visibility == "private" and requester_id != owner_id: + return _error("Not authorized to download this private archive.", 403) + + try: + bundle = get_post_bundle(post_id) + if not bundle: + return _error("Post bundle not found.", 404) + + archive_buf = io.BytesIO() + with zipfile.ZipFile(archive_buf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: + # Always include core JSON artifacts. + zf.writestr("post.json", json.dumps(bundle.get("post", {}), indent=2, default=str)) + zf.writestr("metadata.json", json.dumps(bundle.get("metadata", {}), indent=2, default=str)) + zf.writestr("rights.json", json.dumps(bundle.get("rights", {}), indent=2, default=str)) + zf.writestr("rag_chunks.json", json.dumps(bundle.get("rag_chunks", []), indent=2, default=str)) + zf.writestr("audit_log.json", json.dumps(bundle.get("audit_log", []), indent=2, default=str)) + + # Derive transcript from rag chunks. + chunks = bundle.get("rag_chunks", []) or [] + transcript_text = " ".join( + (chunk.get("text") or "").strip() for chunk in chunks if chunk.get("text") + ).strip() + if transcript_text: + zf.writestr("transcript.txt", transcript_text) + + # Include original media from Supabase Storage if present. + original_file = get_archive_file_by_role(post_id, "original_audio") + if original_file and original_file.get("path"): + original_bytes = download_storage_object_by_stored_path(original_file["path"]) + source_name = original_file["path"].split("/")[-1] or f"post_{post_id}_original.bin" + zf.writestr(f"original/{source_name}", original_bytes) + + archive_buf.seek(0) + download_name = f"voicevault_post_{post_id}_archive.zip" + return send_file( + archive_buf, + mimetype="application/zip", + as_attachment=True, + download_name=download_name, + ) + except Exception as e: + return _error(f"Failed to build archive zip: {e}", 500) + + @api.post("/posts//files") def api_add_file(post_id: int): payload = request.get_json(force=True, silent=False) or {} diff --git a/backend/db_queries.py b/backend/db_queries.py index b7a1115..441e4fd 100644 --- a/backend/db_queries.py +++ b/backend/db_queries.py @@ -111,6 +111,30 @@ def get_original_audio_url(post_id: int, expires_in: int = 3600) -> Dict[str, An } +def get_archive_file_by_role(post_id: int, role: str) -> Optional[Dict[str, Any]]: + response = ( + supabase.table("archive_files") + .select("*") + .eq("post_id", post_id) + .eq("role", role) + .limit(1) + .execute() + ) + return _first(response) + + +def download_storage_object_by_stored_path(stored_path: str) -> bytes: + """ + Download object bytes from a stored path like + 'archives/user/uuid/original/file.mp4'. + """ + bucket, object_path = _parse_bucket_path(stored_path) + content = supabase.storage.from_(bucket).download(object_path) + if isinstance(content, (bytes, bytearray)): + return bytes(content) + raise RuntimeError("Failed to download storage object content.") + + # ==================== Users ==================== def create_user(payload: Dict[str, Any]) -> Dict[str, Any]: