#!/usr/bin/env python3 """ Normalize all config filenames to use dash format (domain-com.yaml). This script: 1. Finds all config files with dot format (domain.com.yaml) 2. Renames them to dash format (domain-com.yaml) 3. Updates the domain_slug in the config file if needed """ import sys from pathlib import Path import yaml import shutil # Add project root to path project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) from scrapy_project.utils.config_utils import normalize_domain_to_slug def normalize_config_filename(config_path: Path, dry_run: bool = True) -> bool: """ Normalize a config filename from dot format to dash format. Args: config_path: Path to config file dry_run: If True, only show what would be done Returns: True if file was renamed, False otherwise """ name = config_path.stem # Skip if already in dash format or special cases if '-' in name or name == 'generic_e-commerce': return False # Skip if it's not a domain format (has dot) if '.' not in name: return False # Read config to get actual domain try: with open(config_path, 'r', encoding='utf-8') as f: config = yaml.safe_load(f) or {} base_url = config.get('website', {}).get('base_url', '') if not base_url: print(f"āš ļø Skipping {config_path.name}: No base_url found") return False # Extract domain from base_url from urllib.parse import urlparse parsed = urlparse(base_url) domain = parsed.netloc if domain.startswith('www.'): domain = domain[4:] # Generate new filename new_slug = normalize_domain_to_slug(domain) new_path = config_path.parent / f"{new_slug}.yaml" # Check if target already exists if new_path.exists() and new_path != config_path: print(f"āš ļø Skipping {config_path.name}: Target {new_path.name} already exists") return False if dry_run: print(f"šŸ“ Would rename: {config_path.name} → {new_path.name}") print(f" Domain: {domain}") else: # Update domain_slug in config if needed if 'export' not in config: config['export'] = {} config['export']['domain_slug'] = new_slug # Write updated config to new file with open(new_path, 'w', encoding='utf-8') as f: yaml.dump(config, f, default_flow_style=False, allow_unicode=True, sort_keys=False) # Remove old file config_path.unlink() print(f"āœ… Renamed: {config_path.name} → {new_path.name}") return True except Exception as e: print(f"āŒ Error processing {config_path.name}: {e}") return False def normalize_all_configs(dry_run: bool = True): """Normalize all config filenames to dash format.""" config_dir = Path('configs') if not config_dir.exists(): print(f"Config directory not found: {config_dir}") return all_configs = list(config_dir.glob('*.yaml')) + list(config_dir.glob('*.yml')) print("=== Normalize Config Filenames ===\n") renamed_count = 0 for config_file in sorted(all_configs): if normalize_config_filename(config_file, dry_run=dry_run): renamed_count += 1 if renamed_count == 0: print("\nāœ… All config files are already in the correct format!") elif dry_run: print(f"\nšŸ” DRY RUN: Would rename {renamed_count} file(s)") print(" Run with --execute to actually rename files") else: print(f"\nāœ… Renamed {renamed_count} file(s)") if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Normalize config filenames to dash format') parser.add_argument('--execute', action='store_true', help='Actually rename files (default: dry run)') args = parser.parse_args() normalize_all_configs(dry_run=not args.execute)