diff --git a/backend/__pycache__/api_routes.cpython-314.pyc b/backend/__pycache__/api_routes.cpython-314.pyc index 2c72bd9..59964d3 100644 Binary files a/backend/__pycache__/api_routes.cpython-314.pyc and b/backend/__pycache__/api_routes.cpython-314.pyc differ diff --git a/backend/api_routes.py b/backend/api_routes.py index 3da6d13..2167926 100644 --- a/backend/api_routes.py +++ b/backend/api_routes.py @@ -11,6 +11,12 @@ from pathlib import Path import io import zipfile from flask import send_file +from typing import Dict, Any +import requests + + + + from dotenv import load_dotenv @@ -610,35 +616,110 @@ def api_post_audit(post_id: int): except Exception as e: return _error(str(e), 500) + @api.get("/posts//download") def download_post(post_id: int): - post = get_audio_post_by_id(post_id) - if not post: - return jsonify({"error": "Post not found"}), 404 + """ + Download post as a ZIP file containing: + - Original audio file + - Transcript as text + - Metadata as JSON + """ + try: + # Get post data + post = get_audio_post_by_id(post_id) + if not post: + return _error("Post not found", 404) - files = list_archive_files(post_id) - metadata = get_archive_metadata(post_id) or {} + # Get files and metadata + files = list_archive_files(post_id) + metadata_row = get_archive_metadata(post_id) - zip_buffer = io.BytesIO() - with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf: - if metadata.get("metadata"): - zipf.writestr("metadata.json", json.dumps(metadata, indent=2)) + # Create ZIP in memory + zip_buffer = io.BytesIO() - for f in files: + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf: + # 1. Add metadata.json + if metadata_row and metadata_row.get("metadata"): + try: + metadata_dict = json.loads(metadata_row["metadata"]) if isinstance(metadata_row["metadata"], str) else metadata_row["metadata"] + + # Extract clean transcript from prompt + transcript_text = "" + if "prompt" in metadata_dict: + prompt = metadata_dict["prompt"] + match = prompt.split("Transcript:\n") + if len(match) > 1: + transcript_text = match[1].split("\n\nAnswer user questions")[0].strip() + + # Create a clean metadata file + clean_metadata = { + "title": post.get("title"), + "description": post.get("description"), + "language": metadata_dict.get("language", "en"), + "transcript_length": metadata_dict.get("transcript_length_chars"), + "created_at": post.get("created_at"), + "visibility": post.get("visibility"), + } + + zipf.writestr("metadata.json", json.dumps(clean_metadata, indent=2)) + + # Add transcript as separate file + if transcript_text: + zipf.writestr("transcript.txt", transcript_text) + + except Exception as e: + print(f"Error adding metadata: {e}") + + # 2. Add original audio file + for file_info in files: + if file_info.get("role") == "original_audio": + try: + # Get signed URL for the audio + audio_url_data = get_original_audio_url(post_id, expires_in=300) # 5 min expiry + signed_url = audio_url_data.get("signed_url") + + if signed_url: + # Download the file from Supabase + response = requests.get(signed_url, timeout=30) + + if response.status_code == 200: + # Get original filename + original_filename = file_info["path"].split("/")[-1] + zipf.writestr(f"audio/{original_filename}", response.content) + else: + print(f"Failed to download audio: HTTP {response.status_code}") + except Exception as e: + print(f"Error adding audio file: {e}") + + # 3. Add RAG chunks if available try: - signed_url = get_original_audio_url(post_id)["signed_url"] if f["role"] == "original_audio" else None - if signed_url: - r = requests.get(signed_url) - if r.status_code == 200: - filename = f"{f['role']}_{f['path'].split('/')[-1]}" - zipf.writestr(filename, r.content) + chunks = list_rag_chunks(post_id, page=1, limit=1000) + if chunks: + chunks_text = "\n\n".join([ + f"[{chunk['start_sec']:.2f}s - {chunk['end_sec']:.2f}s]\n{chunk['text']}" + for chunk in chunks + ]) + zipf.writestr("transcript_timestamped.txt", chunks_text) except Exception as e: - print("Failed to add file:", e) + print(f"Error adding chunks: {e}") - zip_buffer.seek(0) - return send_file( - zip_buffer, - mimetype="application/zip", - as_attachment=True, - download_name=f"{post['title'].replace(' ', '_')}.zip" - ) + # Move to beginning of buffer + zip_buffer.seek(0) + + # Generate safe filename + safe_title = "".join(c for c in post.get("title", "archive") if c.isalnum() or c in (' ', '-', '_')).strip() + safe_title = safe_title.replace(' ', '_')[:50] # Limit length + + return send_file( + zip_buffer, + mimetype="application/zip", + as_attachment=True, + download_name=f"{safe_title}_{post_id}.zip" + ) + + except Exception as e: + print(f"Download error: {e}") + import traceback + traceback.print_exc() + return _error(f"Failed to create download: {str(e)}", 500) diff --git a/speech_to_text.py b/backend/speech_to_text.py similarity index 100% rename from speech_to_text.py rename to backend/speech_to_text.py diff --git a/backend/uploads/71c40cd3-9cd4-45d7-ad4e-01560d42ed03_khatta_vada__Gujarati_Gharavada__Gujarati_vada__vada_recipe.mp3 b/backend/uploads/71c40cd3-9cd4-45d7-ad4e-01560d42ed03_khatta_vada__Gujarati_Gharavada__Gujarati_vada__vada_recipe.mp3 new file mode 100644 index 0000000..d87b06a Binary files /dev/null and b/backend/uploads/71c40cd3-9cd4-45d7-ad4e-01560d42ed03_khatta_vada__Gujarati_Gharavada__Gujarati_vada__vada_recipe.mp3 differ diff --git a/backend/uploads/ae73dd14-4aaf-4f68-8449-ddd2061e5a8b_Untitled.m4a b/backend/uploads/ae73dd14-4aaf-4f68-8449-ddd2061e5a8b_Untitled.m4a new file mode 100644 index 0000000..519aca6 Binary files /dev/null and b/backend/uploads/ae73dd14-4aaf-4f68-8449-ddd2061e5a8b_Untitled.m4a differ diff --git a/frontend/src/components/AudioPostCard.jsx b/frontend/src/components/AudioPostCard.jsx index 5dc8a2c..040dbfa 100644 --- a/frontend/src/components/AudioPostCard.jsx +++ b/frontend/src/components/AudioPostCard.jsx @@ -206,9 +206,7 @@ export default function AudioPostCard({ post }) { )} )} - +