feat: not working download zip file

2026-02-15 02:18:07 -07:00
parent f06820ced2
commit 30392a7cd9
3 changed files with 47 additions and 112 deletions
--- a/backend/pycache/api_routes.cpython-314.pyc
+++ b/backend/pycache/api_routes.cpython-314.pyc
--- a/backend/pycache/db_queries.cpython-314.pyc
+++ b/backend/pycache/db_queries.cpython-314.pyc
--- a/backend/api_routes.py
+++ b/backend/api_routes.py
@@ -4,18 +4,18 @@ Includes auth, upload+transcription, history, and RAG search workflow.
 """

 import hashlib
-import io
 import json
 import os
 import uuid
-import re
-import zipfile
 from pathlib import Path
-from typing import Any, List
+import io
+import zipfile
+from flask import send_file
+

 from dotenv import load_dotenv
 from faster_whisper import WhisperModel
-from flask import Blueprint, jsonify, request, send_file
+from flask import Blueprint, jsonify, request
 from werkzeug.security import check_password_hash, generate_password_hash
 from werkzeug.utils import secure_filename

@@ -25,8 +25,6 @@ from db_queries import (
    add_rag_chunks,
    create_audio_post,
    create_user,
-    download_storage_object_by_stored_path,
-    get_archive_file_by_role,
    get_archive_metadata,
    get_original_audio_url,
    get_archive_rights,
@@ -40,7 +38,6 @@ from db_queries import (
    list_rag_chunks,
    list_user_history,
    search_rag_chunks,
-    search_rag_chunks_vector,
    update_audio_post,
    upload_storage_object,
    upsert_archive_metadata,
@@ -103,7 +100,7 @@ def _build_prompt(transcript_text: str, title: str) -> str:
        f"{transcript_text}\n\n"
        "Answer user questions grounded in this transcript."
    )
-def _add_audio_url(post: dict[str, Any]) -> dict[str, Any]:
+def _add_audio_url(post: Dict[str, Any]) -> Dict[str, Any]:
    """Add signed audio URL to post if ready"""
    if post.get("status") == "ready":
        try:
@@ -114,29 +111,6 @@ def _add_audio_url(post: dict[str, Any]) -> dict[str, Any]:
    return post


-def _local_embedding(text: str, dimensions: int = 1536) -> List[float]:
-    """
-    Free deterministic embedding fallback (offline).
-    Replace with model-based embeddings later if needed.
-    """
-    vector = [0.0] * dimensions
-    tokens = re.findall(r"[A-Za-z0-9']+", text.lower())
-    if not tokens:
-        return vector
-
-    for token in tokens:
-        digest = hashlib.sha256(token.encode("utf-8")).digest()
-        idx = int.from_bytes(digest[:4], "big") % dimensions
-        sign = 1.0 if (digest[4] & 1) == 0 else -1.0
-        weight = 1.0 + (digest[5] / 255.0) * 0.25
-        vector[idx] += sign * weight
-
-    norm = sum(v * v for v in vector) ** 0.5
-    if norm > 0:
-        vector = [v / norm for v in vector]
-    return vector
-
-

@api.get("/health")
 def health():
@@ -304,7 +278,7 @@ def api_upload_post():
                    "end_sec": float(seg.end),
                    "text": segment_text,
                    "confidence": float(seg.avg_logprob) if seg.avg_logprob is not None else None,
-                    "embedding": _local_embedding(segment_text),
+                    "embedding": None,
                }
            )

@@ -390,31 +364,17 @@ def api_user_history(user_id: int):
 def api_rag_search():
    query_text = (request.args.get("q") or "").strip()
    user_id = request.args.get("user_id", type=int)
-    query_embedding_raw = request.args.get("query_embedding")
    page = request.args.get("page", default=1, type=int)
    limit = request.args.get("limit", default=30, type=int)

    if not user_id:
        return _error("'user_id' is required.", 400)
+    if not query_text:
+        return _error("'q' is required.", 400)

    try:
-        if query_embedding_raw:
-            try:
-                parsed = json.loads(query_embedding_raw)
-                if not isinstance(parsed, list):
-                    return _error("'query_embedding' must be a JSON array.", 400)
-                query_embedding = [float(v) for v in parsed]
-            except Exception:
-                return _error("Invalid 'query_embedding'. Example: [0.1,0.2,...]", 400)
-
-            rows = search_rag_chunks_vector(user_id=user_id, query_embedding=query_embedding, limit=limit)
-            return jsonify({"results": rows, "mode": "vector", "limit": min(max(1, limit), 100)})
-
-        if not query_text:
-            return _error("'q' is required when 'query_embedding' is not provided.", 400)
-
        rows = search_rag_chunks(user_id=user_id, query_text=query_text, page=page, limit=limit)
-        return jsonify({"results": rows, "mode": "text", "page": page, "limit": min(max(1, limit), 100)})
+        return jsonify({"results": rows, "page": page, "limit": min(max(1, limit), 100)})
    except Exception as e:
        return _error(str(e), 500)

@@ -458,19 +418,19 @@ def api_list_posts():
    limit = request.args.get("limit", default=20, type=int)
    visibility = request.args.get("visibility")
    current_user_id = request.args.get("current_user_id", type=int)  # NEW LINE
-    
+
    try:
        rows = list_audio_posts(page=page, limit=limit, visibility=visibility)
-        
+
        # NEW: Filter private posts
        if current_user_id:
            rows = [p for p in rows if p.get('visibility') == 'public' or p.get('user_id') == current_user_id]
        else:
            rows = [p for p in rows if p.get('visibility') == 'public']
-        
+
        # NEW: Add audio URLs - CHANGE THIS LINE ONLY
        rows = [_add_audio_url(post) for post in rows]
-        
+
        return jsonify({"posts": rows, "page": page, "limit": min(max(1, limit), 100)})
    except Exception as e:
        return _error(str(e), 500)
@@ -533,64 +493,6 @@ def api_post_audio_url(post_id: int):
        return _error(str(e), 500)


-@api.get("/posts/<int:post_id>/archive.zip")
-def api_post_archive_zip(post_id: int):
-    """
-    Download a complete archive zip for a post.
-    Private posts require owner user_id in query params.
-    """
-    row = get_audio_post_by_id(post_id)
-    if not row:
-        return _error("Post not found.", 404)
-
-    visibility = row.get("visibility")
-    owner_id = row.get("user_id")
-    requester_id = request.args.get("user_id", type=int)
-
-    if visibility == "private" and requester_id != owner_id:
-        return _error("Not authorized to download this private archive.", 403)
-
-    try:
-        bundle = get_post_bundle(post_id)
-        if not bundle:
-            return _error("Post bundle not found.", 404)
-
-        archive_buf = io.BytesIO()
-        with zipfile.ZipFile(archive_buf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
-            # Always include core JSON artifacts.
-            zf.writestr("post.json", json.dumps(bundle.get("post", {}), indent=2, default=str))
-            zf.writestr("metadata.json", json.dumps(bundle.get("metadata", {}), indent=2, default=str))
-            zf.writestr("rights.json", json.dumps(bundle.get("rights", {}), indent=2, default=str))
-            zf.writestr("rag_chunks.json", json.dumps(bundle.get("rag_chunks", []), indent=2, default=str))
-            zf.writestr("audit_log.json", json.dumps(bundle.get("audit_log", []), indent=2, default=str))
-
-            # Derive transcript from rag chunks.
-            chunks = bundle.get("rag_chunks", []) or []
-            transcript_text = " ".join(
-                (chunk.get("text") or "").strip() for chunk in chunks if chunk.get("text")
-            ).strip()
-            if transcript_text:
-                zf.writestr("transcript.txt", transcript_text)
-
-            # Include original media from Supabase Storage if present.
-            original_file = get_archive_file_by_role(post_id, "original_audio")
-            if original_file and original_file.get("path"):
-                original_bytes = download_storage_object_by_stored_path(original_file["path"])
-                source_name = original_file["path"].split("/")[-1] or f"post_{post_id}_original.bin"
-                zf.writestr(f"original/{source_name}", original_bytes)
-
-        archive_buf.seek(0)
-        download_name = f"voicevault_post_{post_id}_archive.zip"
-        return send_file(
-            archive_buf,
-            mimetype="application/zip",
-            as_attachment=True,
-            download_name=download_name,
-        )
-    except Exception as e:
-        return _error(f"Failed to build archive zip: {e}", 500)
-
-
@api.post("/posts/<int:post_id>/files")
 def api_add_file(post_id: int):
    payload = request.get_json(force=True, silent=False) or {}
@@ -707,3 +609,36 @@ def api_post_audit(post_id: int):
        return jsonify({"logs": list_audit_logs(post_id=post_id, page=page, limit=limit)})
    except Exception as e:
        return _error(str(e), 500)
+
+@api.get("/posts/<int:post_id>/download")
+def download_post(post_id: int):
+    post = get_audio_post_by_id(post_id)
+    if not post:
+        return jsonify({"error": "Post not found"}), 404
+
+    files = list_archive_files(post_id)
+    metadata = get_archive_metadata(post_id) or {}
+
+    zip_buffer = io.BytesIO()
+    with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf:
+        if metadata.get("metadata"):
+            zipf.writestr("metadata.json", json.dumps(metadata, indent=2))
+
+        for f in files:
+            try:
+                signed_url = get_original_audio_url(post_id)["signed_url"] if f["role"] == "original_audio" else None
+                if signed_url:
+                    r = requests.get(signed_url)
+                    if r.status_code == 200:
+                        filename = f"{f['role']}_{f['path'].split('/')[-1]}"
+                        zipf.writestr(filename, r.content)
+            except Exception as e:
+                print("Failed to add file:", e)
+
+    zip_buffer.seek(0)
+    return send_file(
+        zip_buffer,
+        mimetype="application/zip",
+        as_attachment=True,
+        download_name=f"{post['title'].replace(' ', '_')}.zip"
+    )