feat: not working download zip file

2026-02-15 02:18:07 -07:00
parent f06820ced2
commit 30392a7cd9
3 changed files with 47 additions and 112 deletions
--- a/backend/pycache/api_routes.cpython-314.pyc
+++ b/backend/pycache/api_routes.cpython-314.pyc
--- a/backend/pycache/db_queries.cpython-314.pyc
+++ b/backend/pycache/db_queries.cpython-314.pyc
--- a/backend/api_routes.py
+++ b/backend/api_routes.py
@@ -4,18 +4,18 @@ Includes auth, upload+transcription, history, and RAG search workflow.
 """
 import hashlib
 import io
 import json
 import os
 import uuid
 import re
 import zipfile
 from pathlib import Path
-from typing import Any, List
+import io
 import zipfile
 from flask import send_file
 from dotenv import load_dotenv
 from faster_whisper import WhisperModel
-from flask import Blueprint, jsonify, request, send_file
+from flask import Blueprint, jsonify, request
 from werkzeug.security import check_password_hash, generate_password_hash
 from werkzeug.utils import secure_filename
@@ -25,8 +25,6 @@ from db_queries import (
    add_rag_chunks,
    create_audio_post,
    create_user,
    download_storage_object_by_stored_path,
    get_archive_file_by_role,
    get_archive_metadata,
    get_original_audio_url,
    get_archive_rights,
@@ -40,7 +38,6 @@ from db_queries import (
    list_rag_chunks,
    list_user_history,
    search_rag_chunks,
    search_rag_chunks_vector,
    update_audio_post,
    upload_storage_object,
    upsert_archive_metadata,
@@ -103,7 +100,7 @@ def _build_prompt(transcript_text: str, title: str) -> str:
        f"{transcript_text}\n\n"
        "Answer user questions grounded in this transcript."
    )
-def _add_audio_url(post: dict[str, Any]) -> dict[str, Any]:
+def _add_audio_url(post: Dict[str, Any]) -> Dict[str, Any]:
    """Add signed audio URL to post if ready"""
    if post.get("status") == "ready":
        try:
@@ -114,29 +111,6 @@ def _add_audio_url(post: dict[str, Any]) -> dict[str, Any]:
    return post
 def _local_embedding(text: str, dimensions: int = 1536) -> List[float]:
    """
    Free deterministic embedding fallback (offline).
    Replace with model-based embeddings later if needed.
    """
    vector = [0.0] * dimensions
    tokens = re.findall(r"[A-Za-z0-9']+", text.lower())
    if not tokens:
        return vector
    for token in tokens:
        digest = hashlib.sha256(token.encode("utf-8")).digest()
        idx = int.from_bytes(digest[:4], "big") % dimensions
        sign = 1.0 if (digest[4] & 1) == 0 else -1.0
        weight = 1.0 + (digest[5] / 255.0) * 0.25
        vector[idx] += sign * weight
    norm = sum(v * v for v in vector) ** 0.5
    if norm > 0:
        vector = [v / norm for v in vector]
    return vector
@api.get("/health")
 def health():
@@ -304,7 +278,7 @@ def api_upload_post():
                    "end_sec": float(seg.end),
                    "text": segment_text,
                    "confidence": float(seg.avg_logprob) if seg.avg_logprob is not None else None,
-                    "embedding": _local_embedding(segment_text),
+                    "embedding": None,
                }
            )
@@ -390,31 +364,17 @@ def api_user_history(user_id: int):
 def api_rag_search():
    query_text = (request.args.get("q") or "").strip()
    user_id = request.args.get("user_id", type=int)
    query_embedding_raw = request.args.get("query_embedding")
    page = request.args.get("page", default=1, type=int)
    limit = request.args.get("limit", default=30, type=int)
    if not user_id:
        return _error("'user_id' is required.", 400)
    if not query_text:
        return _error("'q' is required.", 400)
    try:
        if query_embedding_raw:
            try:
                parsed = json.loads(query_embedding_raw)
                if not isinstance(parsed, list):
                    return _error("'query_embedding' must be a JSON array.", 400)
                query_embedding = [float(v) for v in parsed]
            except Exception:
                return _error("Invalid 'query_embedding'. Example: [0.1,0.2,...]", 400)
            rows = search_rag_chunks_vector(user_id=user_id, query_embedding=query_embedding, limit=limit)
            return jsonify({"results": rows, "mode": "vector", "limit": min(max(1, limit), 100)})
        if not query_text:
            return _error("'q' is required when 'query_embedding' is not provided.", 400)
        rows = search_rag_chunks(user_id=user_id, query_text=query_text, page=page, limit=limit)
-        return jsonify({"results": rows, "mode": "text", "page": page, "limit": min(max(1, limit), 100)})
+        return jsonify({"results": rows, "page": page, "limit": min(max(1, limit), 100)})
    except Exception as e:
        return _error(str(e), 500)
@@ -533,64 +493,6 @@ def api_post_audio_url(post_id: int):
        return _error(str(e), 500)
@api.get("/posts/<int:post_id>/archive.zip")
 def api_post_archive_zip(post_id: int):
    """
    Download a complete archive zip for a post.
    Private posts require owner user_id in query params.
    """
    row = get_audio_post_by_id(post_id)
    if not row:
        return _error("Post not found.", 404)
    visibility = row.get("visibility")
    owner_id = row.get("user_id")
    requester_id = request.args.get("user_id", type=int)
    if visibility == "private" and requester_id != owner_id:
        return _error("Not authorized to download this private archive.", 403)
    try:
        bundle = get_post_bundle(post_id)
        if not bundle:
            return _error("Post bundle not found.", 404)
        archive_buf = io.BytesIO()
        with zipfile.ZipFile(archive_buf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
            # Always include core JSON artifacts.
            zf.writestr("post.json", json.dumps(bundle.get("post", {}), indent=2, default=str))
            zf.writestr("metadata.json", json.dumps(bundle.get("metadata", {}), indent=2, default=str))
            zf.writestr("rights.json", json.dumps(bundle.get("rights", {}), indent=2, default=str))
            zf.writestr("rag_chunks.json", json.dumps(bundle.get("rag_chunks", []), indent=2, default=str))
            zf.writestr("audit_log.json", json.dumps(bundle.get("audit_log", []), indent=2, default=str))
            # Derive transcript from rag chunks.
            chunks = bundle.get("rag_chunks", []) or []
            transcript_text = " ".join(
                (chunk.get("text") or "").strip() for chunk in chunks if chunk.get("text")
            ).strip()
            if transcript_text:
                zf.writestr("transcript.txt", transcript_text)
            # Include original media from Supabase Storage if present.
            original_file = get_archive_file_by_role(post_id, "original_audio")
            if original_file and original_file.get("path"):
                original_bytes = download_storage_object_by_stored_path(original_file["path"])
                source_name = original_file["path"].split("/")[-1] or f"post_{post_id}_original.bin"
                zf.writestr(f"original/{source_name}", original_bytes)
        archive_buf.seek(0)
        download_name = f"voicevault_post_{post_id}_archive.zip"
        return send_file(
            archive_buf,
            mimetype="application/zip",
            as_attachment=True,
            download_name=download_name,
        )
    except Exception as e:
        return _error(f"Failed to build archive zip: {e}", 500)
@api.post("/posts/<int:post_id>/files")
 def api_add_file(post_id: int):
    payload = request.get_json(force=True, silent=False) or {}
@@ -707,3 +609,36 @@ def api_post_audit(post_id: int):
        return jsonify({"logs": list_audit_logs(post_id=post_id, page=page, limit=limit)})
    except Exception as e:
        return _error(str(e), 500)
@api.get("/posts/<int:post_id>/download")
 def download_post(post_id: int):
    post = get_audio_post_by_id(post_id)
    if not post:
        return jsonify({"error": "Post not found"}), 404
    files = list_archive_files(post_id)
    metadata = get_archive_metadata(post_id) or {}
    zip_buffer = io.BytesIO()
    with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf:
        if metadata.get("metadata"):
            zipf.writestr("metadata.json", json.dumps(metadata, indent=2))
        for f in files:
            try:
                signed_url = get_original_audio_url(post_id)["signed_url"] if f["role"] == "original_audio" else None
                if signed_url:
                    r = requests.get(signed_url)
                    if r.status_code == 200:
                        filename = f"{f['role']}_{f['path'].split('/')[-1]}"
                        zipf.writestr(filename, r.content)
            except Exception as e:
                print("Failed to add file:", e)
    zip_buffer.seek(0)
    return send_file(
        zip_buffer,
        mimetype="application/zip",
        as_attachment=True,
        download_name=f"{post['title'].replace(' ', '_')}.zip"
    )