initial commit

2025-10-26 23:39:49 -05:00
commit 5fb0909e8d
120 changed files with 11279 additions and 0 deletions
--- a/scripts/download_images.py
+++ b/scripts/download_images.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+"""
+Script to download all images from the MTC website and store them locally.
+This makes the application work completely offline.
+"""
+
+import os
+import re
+import requests
+from urllib.parse import urlparse
+import time
+from pathlib import Path
+
+def create_images_directory():
+    """Create the static/images directory if it doesn't exist."""
+    images_dir = Path("static/images")
+    images_dir.mkdir(parents=True, exist_ok=True)
+    return images_dir
+
+def extract_image_urls_from_markdown():
+    """Extract all image URLs from the markdown file."""
+    markdown_file = Path("data/balotario_clase_a_cat_I.md")
+
+    if not markdown_file.exists():
+        print(f"Error: {markdown_file} not found!")
+        return []
+
+    with open(markdown_file, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+    # Find all image URLs
+    pattern = r'!\[\]\((https://sierdgtt\.mtc\.gob\.pe/Content/img-data/img\d+\.jpg)\)'
+    urls = re.findall(pattern, content)
+
+    return list(set(urls))  # Remove duplicates
+
+def download_image(url, images_dir, retries=3):
+    """Download a single image with retry logic."""
+    try:
+        # Extract filename from URL
+        filename = os.path.basename(urlparse(url).path)
+        filepath = images_dir / filename
+
+        # Skip if already exists
+        if filepath.exists():
+            print(f"✓ {filename} already exists")
+            return True
+
+        print(f"📥 Downloading {filename}...")
+
+        # Download with retries
+        for attempt in range(retries):
+            try:
+                headers = {
+                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+                }
+
+                response = requests.get(url, headers=headers, timeout=30)
+                response.raise_for_status()
+
+                # Save the image
+                with open(filepath, 'wb') as f:
+                    f.write(response.content)
+
+                print(f"✅ Downloaded {filename} ({len(response.content)} bytes)")
+                return True
+
+            except requests.RequestException as e:
+                print(f"❌ Attempt {attempt + 1} failed for {filename}: {e}")
+                if attempt < retries - 1:
+                    time.sleep(2)  # Wait before retry
+
+        print(f"💥 Failed to download {filename} after {retries} attempts")
+        return False
+
+    except Exception as e:
+        print(f"💥 Error downloading {url}: {e}")
+        return False
+
+def update_markdown_file(images_dir):
+    """Update the markdown file to use local image paths."""
+    markdown_file = Path("data/balotario_clase_a_cat_I.md")
+    backup_file = Path("data/balotario_clase_a_cat_I.md.backup")
+
+    # Create backup
+    if not backup_file.exists():
+        with open(markdown_file, 'r', encoding='utf-8') as f:
+            content = f.read()
+        with open(backup_file, 'w', encoding='utf-8') as f:
+            f.write(content)
+        print(f"📋 Created backup: {backup_file}")
+
+    # Read current content
+    with open(markdown_file, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+    # Replace URLs with local paths
+    pattern = r'!\[\]\(https://sierdgtt\.mtc\.gob\.pe/Content/img-data/(img\d+\.jpg)\)'
+    replacement = r'![](/static/images/\1)'
+
+    updated_content = re.sub(pattern, replacement, content)
+
+    # Write updated content
+    with open(markdown_file, 'w', encoding='utf-8') as f:
+        f.write(updated_content)
+
+    print("📝 Updated markdown file to use local image paths")
+
+def main():
+    """Main function to download all images."""
+    print("🚀 Starting image download process...")
+
+    # Create images directory
+    images_dir = create_images_directory()
+    print(f"📁 Images will be saved to: {images_dir}")
+
+    # Extract image URLs
+    urls = extract_image_urls_from_markdown()
+    print(f"🔍 Found {len(urls)} unique images to download")
+
+    if not urls:
+        print("❌ No image URLs found!")
+        return
+
+    # Download images
+    successful = 0
+    failed = 0
+
+    for i, url in enumerate(urls, 1):
+        print(f"\n[{i}/{len(urls)}] Processing: {url}")
+
+        if download_image(url, images_dir):
+            successful += 1
+        else:
+            failed += 1
+
+        # Small delay to be respectful to the server
+        time.sleep(0.5)
+
+    # Summary
+    print(f"\n📊 Download Summary:")
+    print(f"✅ Successful: {successful}")
+    print(f"❌ Failed: {failed}")
+    print(f"📁 Total files: {len(list(images_dir.glob('*.jpg')))}")
+
+    # Update markdown file
+    if successful > 0:
+        print(f"\n🔄 Updating markdown file...")
+        update_markdown_file(images_dir)
+        print(f"✅ Process completed!")
+        print(f"💡 You can now run your app completely offline!")
+    else:
+        print(f"❌ No images were downloaded successfully")
+
+if __name__ == "__main__":
+    main()