#!/usr/bin/env python3
"""CLI interface for SS Crawler."""

import os
import sys
import click
import threading
import time
from pathlib import Path
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings

# Add project root to path
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))

from scrapy_project.utils.detector import WebsiteDetector
from scrapy_project.utils.analyzer import analyze_website
from scrapy_project.spiders.generic_spider import GenericSpider


class Spinner:
    """Simple loading spinner."""
    def __init__(self, message="Loading"):
        self.spinner_chars = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']
        self.message = message
        self.stop_spinner = False
        self.spinner_thread = None
    
    def _spin(self):
        """Run spinner animation."""
        i = 0
        while not self.stop_spinner:
            char = self.spinner_chars[i % len(self.spinner_chars)]
            click.echo(f"\r{char} {self.message}", nl=False)
            sys.stdout.flush()
            time.sleep(0.1)
            i += 1
        # Clear spinner line
        click.echo("\r" + " " * (len(self.message) + 3) + "\r", nl=False)
        sys.stdout.flush()
    
    def __enter__(self):
        """Start spinner."""
        self.spinner_thread = threading.Thread(target=self._spin, daemon=True)
        self.spinner_thread.start()
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        """Stop spinner."""
        self.stop_spinner = True
        if self.spinner_thread:
            self.spinner_thread.join(timeout=0.5)


@click.group()
def cli():
    """SS Crawler - Safe and efficient web scraping tool."""
    pass


@cli.command()
@click.argument('url')
@click.option('--output', '-o', default=None, help='Output path for config file')
@click.option('--start-urls', '-u', multiple=True, help='Additional start URLs')
@click.option('--count-all', is_flag=True, help='Count all products across all pages/collections (slower but complete)')
def check(url: str, output: str, start_urls: tuple, count_all: bool):
    """Check website and generate configuration file."""
    # Ensure URL has scheme
    if not url.startswith(('http://', 'https://')):
        url = 'https://' + url
    
    click.echo(f"🔍 Checking website: {url}")
    
    detector = WebsiteDetector()
    config = detector.detect(url)
    
    if not config:
        click.echo("❌ Could not detect website type", err=True)
        sys.exit(1)
    
    # Display detection results
    click.echo(f"\n✅ Detected: {config.name} ({config.type})")
    click.echo(f"📝 Notes: {config.notes}")
    
    # Generate recommendations
    recommendations = detector._get_recommendations(config)
    if recommendations:
        click.echo("\n💡 Recommendations:")
        for rec in recommendations:
            click.echo(f"   - {rec}")
    
    # Generate YAML config
    start_urls_list = list(start_urls) if start_urls else None
    try:
        config_path = detector.generate_yaml_config(
            url=url,
            start_urls=start_urls_list,
            output_path=Path(output) if output else None
        )
        click.echo(f"\n✅ Configuration file generated: {config_path}")
        
        # Automatically analyze website
        if count_all:
            click.echo("\n📊 Analyzing website (counting all products - this may take longer)...")
            spinner_msg = "Counting all products across all collections..."
        else:
            click.echo("\n📊 Analyzing website...")
            spinner_msg = "Discovering collections and counting products..."
        
        try:
            # Show loading spinner
            with Spinner(spinner_msg):
                analysis = analyze_website(config_path, save_to_file=True, count_all=count_all)
            
            # Display formatted results
            from scrapy_project.utils.analyzer import format_analysis_results
            formatted_output = format_analysis_results(analysis)
            click.echo("\n" + formatted_output)
            
            # Analysis file info
            if analysis.get('analysis_file'):
                click.echo(f"\n💾 Analysis saved to: {analysis['analysis_file']}")
                if analysis.get('analysis_json_file'):
                    click.echo(f"   JSON version: {analysis['analysis_json_file']}")
            
        except Exception as e:
            click.echo(f"⚠️  Analysis failed: {e}", err=True)
            import traceback
            click.echo(f"   Error details: {traceback.format_exc()}", err=True)
            click.echo("   You can still use the config file to crawl")
        
        click.echo(f"\n📋 Review and edit the config file if needed, then run:")
        click.echo(f"   ./ss-crawler crawl {Path(config_path).name}")
    except Exception as e:
        click.echo(f"❌ Error generating config: {e}", err=True)
        sys.exit(1)


@cli.command()
@click.argument('config_or_domain', required=False)
@click.option('--config', '-c', help='Path to YAML configuration file (alternative to positional argument)')
@click.option('--url', '-u', help='URL to crawl (requires --auto-detect)')
@click.option('--auto-detect', is_flag=True, help='Auto-detect website configuration')
@click.option('--safe', is_flag=True, help='Safe mode: slow crawl (delay 3.0s, conservative)')
@click.option('--medium', is_flag=True, help='Medium mode: balanced crawl (delay 1.5s)')
@click.option('--fast', is_flag=True, help='Fast mode: quick crawl (delay 0.5s, use with caution)')
@click.option('--output', '-o', default='data/exports', help='Output directory for exports')
@click.option('--format', '-f', 
              type=click.Choice(['woocommerce', 'shopify', 'excel', 'json', 'all'], case_sensitive=False),
              default='woocommerce',
              help='Export format')
@click.option('--resume', is_flag=True, help='Resume from checkpoint')
@click.option('--checkpoint', default=None, help='Path to checkpoint file')
@click.option('--log-level', default='INFO', 
              type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR'], case_sensitive=False),
              help='Logging level')
def crawl(config_or_domain: str, config: str, url: str, auto_detect: bool, safe: bool, medium: bool, fast: bool,
          output: str, format: str, resume: bool, checkpoint: str, log_level: str):
    """Crawl products from website.
    
    Usage:
        ./ss-crawler crawl themarblecoffee.com
        ./ss-crawler crawl themarblecoffee.com.yaml
        ./ss-crawler crawl themarblecoffee.com --safe
        ./ss-crawler crawl themarblecoffee.com --fast
        ./ss-crawler crawl --config configs/example.yaml
        ./ss-crawler crawl --url https://example.com --auto-detect
    """
    
    # Determine config file path
    config_path = None
    
    # Track if user provided a URL directly (not just domain)
    start_url_override = None
    
    # Priority: config_or_domain argument > --config option > auto-detect
    if config_or_domain:
        # Check if it's a file path (ends with .yaml or .yml)
        if config_or_domain.endswith(('.yaml', '.yml')):
            config_path = config_or_domain
            # If just filename, check in configs directory
            if not Path(config_path).exists() and not Path(config_path).is_absolute():
                configs_path = Path('configs') / config_path
                if configs_path.exists():
                    config_path = str(configs_path)
        else:
            # It's a domain or URL, try to find config file
            domain = config_or_domain
            # Check if it's a full URL (with path)
            is_full_url = domain.startswith(('http://', 'https://')) and '/' in domain[8:]
            
            if is_full_url:
                # User provided a full URL - use it as start URL
                start_url_override = [domain]
                # Extract domain for config lookup
                from urllib.parse import urlparse
                parsed = urlparse(domain)
                domain = parsed.netloc
            else:
                # Remove protocol if present (but no path)
                if domain.startswith(('http://', 'https://')):
                    from urllib.parse import urlparse
                    parsed = urlparse(domain)
                    domain = parsed.netloc
            
            # Remove www. prefix
            if domain.startswith('www.'):
                domain = domain[4:]
            
            # Try to find config file using utility function
            from scrapy_project.utils.config_utils import find_config_file_for_domain
            found_config = find_config_file_for_domain(domain)
            if found_config:
                config_path = str(found_config)
            
            # If not found, suggest to check first
            if not config_path:
                click.echo(f"❌ Config file not found for domain: {domain}", err=True)
                click.echo(f"   Please run: ./ss-crawler check {domain}", err=True)
                click.echo(f"   Or provide full path: ./ss-crawler crawl configs/{domain}.yaml", err=True)
                sys.exit(1)
    
    elif config:
        config_path = config
    elif auto_detect and url:
        # Auto-detect mode
        click.echo(f"🔍 Auto-detecting website: {url}")
        detector = WebsiteDetector()
        config_obj = detector.detect(url)
        
        if not config_obj:
            click.echo("❌ Could not detect website type", err=True)
            sys.exit(1)
        
        # Generate temporary config
        temp_config_path = Path('configs') / 'temp_auto_detect.yaml'
        temp_config_path.parent.mkdir(exist_ok=True)
        config_path = str(detector.generate_yaml_config(url=url, output_path=temp_config_path))
        click.echo(f"✅ Generated config: {config_path}")
    else:
        click.echo("❌ Error: Config file or domain required.", err=True)
        click.echo("   Usage: ./ss-crawler crawl <domain>", err=True)
        click.echo("   Example: ./ss-crawler crawl themarblecoffee.com", err=True)
        click.echo("   Or: ./ss-crawler crawl --url <url> --auto-detect", err=True)
        sys.exit(1)
    
    # Validate config file exists
    config_path_obj = Path(config_path)
    if not config_path_obj.exists():
        click.echo(f"❌ Config file not found: {config_path}", err=True)
        click.echo(f"   Please run: ./ss-crawler check <domain>", err=True)
        sys.exit(1)
    
    config = str(config_path)
    
    # Determine speed mode
    speed_mode = None
    if safe:
        speed_mode = 'safe'
    elif medium:
        speed_mode = 'medium'
    elif fast:
        speed_mode = 'fast'
    
    # Speed presets
    speed_presets = {
        'safe': {
            'delay': 3.0,
            'download_delay': 3.0,
            'concurrent_requests': 1,
            'retry_times': 5,
            'download_timeout': 60
        },
        'medium': {
            'delay': 1.5,
            'download_delay': 1.5,
            'concurrent_requests': 1,
            'retry_times': 3,
            'download_timeout': 30
        },
        'fast': {
            'delay': 0.5,
            'download_delay': 0.5,
            'concurrent_requests': 2,
            'retry_times': 2,
            'download_timeout': 20
        }
    }
    
    # Load Scrapy settings
    settings = get_project_settings()
    
    # Update settings from CLI arguments
    settings.set('EXPORT_DIR', output)
    settings.set('EXPORT_FORMAT', format.lower())
    settings.set('LOG_LEVEL', log_level)
    
    if resume and checkpoint:
        settings.set('STATE_FILE', checkpoint)
    
    # Apply speed preset if specified
    if speed_mode:
        preset = speed_presets[speed_mode]
        settings.set('DOWNLOAD_DELAY', preset['delay'])
        settings.set('RANDOMIZE_DOWNLOAD_DELAY', True)
        settings.set('CONCURRENT_REQUESTS', preset['concurrent_requests'])
        settings.set('CONCURRENT_REQUESTS_PER_DOMAIN', preset['concurrent_requests'])
        settings.set('RETRY_TIMES', preset['retry_times'])
        settings.set('DOWNLOAD_TIMEOUT', preset['download_timeout'])
        click.echo(f"   Speed mode: {speed_mode.upper()} (delay: {preset['delay']}s)")
    
    # Load config and update settings
    import yaml
    with open(config, 'r', encoding='utf-8') as f:
        config_data = yaml.safe_load(f)
        
        # Override rate limiting if speed mode is specified
        if speed_mode and 'rate_limiting' in config_data:
            preset = speed_presets[speed_mode]
            config_data['rate_limiting']['delay'] = preset['delay']
            config_data['rate_limiting']['download_delay'] = preset['delay']
            config_data['rate_limiting']['concurrent_requests'] = preset['concurrent_requests']
        
        # Load proxy settings from config if available
        if config_data.get('proxies', {}).get('enabled', False):
            proxies = config_data.get('proxies', {}).get('list', [])
            proxy_mode = config_data.get('proxies', {}).get('mode', 'rotate')
            settings.set('PROXIES', proxies)
            settings.set('PROXY_MODE', proxy_mode)
            click.echo(f"   Using {len(proxies)} proxy(ies) - Mode: {proxy_mode}")
    
    # Create output directories
    Path(output).mkdir(parents=True, exist_ok=True)
    Path('data').mkdir(exist_ok=True)
    Path('logs').mkdir(exist_ok=True)
    
    # Create crawler process
    process = CrawlerProcess(settings)
    
    # Add spider with optional start URL override
    spider_kwargs = {'config_file': config}
    if start_url_override:
        spider_kwargs['start_urls_override'] = start_url_override
        click.echo(f"   Start URL: {start_url_override[0]}")
    
    process.crawl(
        GenericSpider,
        **spider_kwargs
    )
    
    click.echo(f"\n🚀 Starting crawl...")
    click.echo(f"   Config: {Path(config).name}")
    if speed_mode:
        preset = speed_presets[speed_mode]
        click.echo(f"   Speed mode: {speed_mode.upper()} (delay: {preset['delay']}s, concurrent: {preset['concurrent_requests']})")
    click.echo(f"   Export format: {format}")
    click.echo(f"   Output directory: {output}")
    
    # Start crawling with progress spinner
    try:
        # Start spinner
        with Spinner("Crawling products... (this may take a while)"):
            process.start(stop_after_crawl=True)
        
        click.echo("\n✅ Crawl completed successfully!")
        
        # Show file output info
        click.echo(f"\n📁 Check output files in:")
        click.echo(f"   - {Path(output).absolute()}")
        click.echo(f"   - {Path('data').absolute()}")
    except KeyboardInterrupt:
        click.echo("\n⚠️  Crawl interrupted by user")
        sys.exit(1)
    except Exception as e:
        click.echo(f"\n❌ Crawl failed: {e}", err=True)
        sys.exit(1)


@cli.command()
@click.argument('input_file', type=click.Path(exists=True))
@click.option('--format', '-f',
              type=click.Choice(['woocommerce', 'shopify', 'excel', 'json', 'all'], case_sensitive=False),
              default='woocommerce',
              help='Export format')
@click.option('--output', '-o', default='data/exports', help='Output directory')
def export(input_file: str, format: str, output: str):
    """Export products from JSON file to various formats."""
    import json
    import pandas as pd
    from datetime import datetime
    from scrapy_project.items import Product
    
    click.echo(f"📦 Exporting products from: {input_file}")
    
    # Load JSON file
    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    if not data:
        click.echo("❌ No products found in file", err=True)
        sys.exit(1)
    
    # Convert to Product models
    products = []
    for item in data:
        try:
            product = Product(**item)
            products.append(product)
        except Exception as e:
            click.echo(f"⚠️  Warning: Skipping invalid product: {e}")
    
    click.echo(f"✅ Loaded {len(products)} products")
    
    # Create output directory
    Path(output).mkdir(parents=True, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Export based on format
    if format == 'woocommerce' or format == 'all':
        rows = [p.to_woocommerce_csv_row() for p in products]
        df = pd.DataFrame(rows)
        filepath = Path(output) / f"woocommerce_products_{timestamp}.csv"
        df.to_csv(filepath, index=False, encoding='utf-8-sig')
        click.echo(f"✅ Exported to WooCommerce: {filepath}")
    
    if format == 'shopify' or format == 'all':
        # Use to_shopify_csv_rows() to get multiple rows per product (one per image)
        all_rows = []
        for product in products:
            product_rows = product.to_shopify_csv_rows()
            all_rows.extend(product_rows)
        
        df = pd.DataFrame(all_rows)
        filepath = Path(output) / f"shopify_products_{timestamp}.csv"
        df.to_csv(filepath, index=False, encoding='utf-8-sig')
        click.echo(f"✅ Exported {len(products)} products ({len(all_rows)} rows) to Shopify: {filepath}")
    
    if format == 'excel' or format == 'all':
        rows = [p.model_dump() for p in products]
        df = pd.DataFrame(rows)
        filepath = Path(output) / f"products_{timestamp}.xlsx"
        df.to_excel(filepath, index=False, engine='openpyxl')
        click.echo(f"✅ Exported to Excel: {filepath}")
    
    if format == 'json' or format == 'all':
        rows = [p.model_dump() for p in products]
        filepath = Path(output) / f"products_{timestamp}.json"
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(rows, f, ensure_ascii=False, indent=2, default=str)
        click.echo(f"✅ Exported to JSON: {filepath}")
    
    click.echo("\n✅ Export completed!")


@cli.command()
def version():
    """Show version information."""
    click.echo("SS Crawler v0.1.0")
    click.echo("A safe and efficient web scraping tool for product information")


if __name__ == '__main__':
    cli()