initial commit

This commit is contained in:
2025-10-26 23:39:49 -05:00
commit 5fb0909e8d
120 changed files with 11279 additions and 0 deletions

156
scripts/download_images.py Normal file
View File

@@ -0,0 +1,156 @@
#!/usr/bin/env python3
"""
Script to download all images from the MTC website and store them locally.
This makes the application work completely offline.
"""
import os
import re
import requests
from urllib.parse import urlparse
import time
from pathlib import Path
def create_images_directory():
"""Create the static/images directory if it doesn't exist."""
images_dir = Path("static/images")
images_dir.mkdir(parents=True, exist_ok=True)
return images_dir
def extract_image_urls_from_markdown():
"""Extract all image URLs from the markdown file."""
markdown_file = Path("data/balotario_clase_a_cat_I.md")
if not markdown_file.exists():
print(f"Error: {markdown_file} not found!")
return []
with open(markdown_file, 'r', encoding='utf-8') as f:
content = f.read()
# Find all image URLs
pattern = r'!\[\]\((https://sierdgtt\.mtc\.gob\.pe/Content/img-data/img\d+\.jpg)\)'
urls = re.findall(pattern, content)
return list(set(urls)) # Remove duplicates
def download_image(url, images_dir, retries=3):
"""Download a single image with retry logic."""
try:
# Extract filename from URL
filename = os.path.basename(urlparse(url).path)
filepath = images_dir / filename
# Skip if already exists
if filepath.exists():
print(f"{filename} already exists")
return True
print(f"📥 Downloading {filename}...")
# Download with retries
for attempt in range(retries):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers, timeout=30)
response.raise_for_status()
# Save the image
with open(filepath, 'wb') as f:
f.write(response.content)
print(f"✅ Downloaded {filename} ({len(response.content)} bytes)")
return True
except requests.RequestException as e:
print(f"❌ Attempt {attempt + 1} failed for {filename}: {e}")
if attempt < retries - 1:
time.sleep(2) # Wait before retry
print(f"💥 Failed to download {filename} after {retries} attempts")
return False
except Exception as e:
print(f"💥 Error downloading {url}: {e}")
return False
def update_markdown_file(images_dir):
"""Update the markdown file to use local image paths."""
markdown_file = Path("data/balotario_clase_a_cat_I.md")
backup_file = Path("data/balotario_clase_a_cat_I.md.backup")
# Create backup
if not backup_file.exists():
with open(markdown_file, 'r', encoding='utf-8') as f:
content = f.read()
with open(backup_file, 'w', encoding='utf-8') as f:
f.write(content)
print(f"📋 Created backup: {backup_file}")
# Read current content
with open(markdown_file, 'r', encoding='utf-8') as f:
content = f.read()
# Replace URLs with local paths
pattern = r'!\[\]\(https://sierdgtt\.mtc\.gob\.pe/Content/img-data/(img\d+\.jpg)\)'
replacement = r'![](/static/images/\1)'
updated_content = re.sub(pattern, replacement, content)
# Write updated content
with open(markdown_file, 'w', encoding='utf-8') as f:
f.write(updated_content)
print("📝 Updated markdown file to use local image paths")
def main():
"""Main function to download all images."""
print("🚀 Starting image download process...")
# Create images directory
images_dir = create_images_directory()
print(f"📁 Images will be saved to: {images_dir}")
# Extract image URLs
urls = extract_image_urls_from_markdown()
print(f"🔍 Found {len(urls)} unique images to download")
if not urls:
print("❌ No image URLs found!")
return
# Download images
successful = 0
failed = 0
for i, url in enumerate(urls, 1):
print(f"\n[{i}/{len(urls)}] Processing: {url}")
if download_image(url, images_dir):
successful += 1
else:
failed += 1
# Small delay to be respectful to the server
time.sleep(0.5)
# Summary
print(f"\n📊 Download Summary:")
print(f"✅ Successful: {successful}")
print(f"❌ Failed: {failed}")
print(f"📁 Total files: {len(list(images_dir.glob('*.jpg')))}")
# Update markdown file
if successful > 0:
print(f"\n🔄 Updating markdown file...")
update_markdown_file(images_dir)
print(f"✅ Process completed!")
print(f"💡 You can now run your app completely offline!")
else:
print(f"❌ No images were downloaded successfully")
if __name__ == "__main__":
main()