#!/usr/bin/env python3
"""Test script to crawl only 5 products with safe rate limiting."""

import sys
from pathlib import Path
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings

# Add project root to path
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))

from scrapy_project.spiders.generic_spider import GenericSpider

def main():
    """Run test crawl for 5 products."""
    config_file = 'configs/gardenkindtees-com.yaml'
    
    if not Path(config_file).exists():
        print(f"❌ Config file not found: {config_file}")
        sys.exit(1)
    
    # Get settings and modify for safe testing
    settings = get_project_settings()
    
    # Safe rate limiting - 3 seconds delay between requests
    settings.set('DOWNLOAD_DELAY', 3.0)
    settings.set('RANDOMIZE_DOWNLOAD_DELAY', True)
    settings.set('RANDOMIZE_DOWNLOAD_DELAY_RANGE', 0.5)  # 1.5-4.5 seconds
    settings.set('CONCURRENT_REQUESTS', 1)
    settings.set('CONCURRENT_REQUESTS_PER_DOMAIN', 1)
    
    # Limit to 5 products
    settings.set('CLOSESPIDER_ITEMCOUNT', 5)
    
    # Logging
    settings.set('LOG_LEVEL', 'INFO')
    
    print("🧪 Testing crawler with 5 products")
    print("   Config: gardenkindtees-com.yaml")
    print("   Delay: 3.0s (with randomization)")
    print("   Limit: 5 products")
    print("   Reviews: Enabled\n")
    
    # Create crawler process
    process = CrawlerProcess(settings)
    
    process.crawl(
        GenericSpider,
        config_file=config_file
    )
    
    print("🚀 Starting crawl...\n")
    
    try:
        process.start(stop_after_crawl=True)
        print("\n✅ Test crawl completed!")
        print("\n📊 Check results in:")
        print("   - data/exports/")
        print("   - data/products.json")
    except KeyboardInterrupt:
        print("\n⚠️  Crawl interrupted by user")
        sys.exit(1)
    except Exception as e:
        print(f"\n❌ Crawl failed: {e}", file=sys.stderr)
        import traceback
        traceback.print_exc()
        sys.exit(1)

if __name__ == '__main__':
    main()