#!/usr/bin/env python3 """ Script để tìm và test free proxies tự động từ các nguồn public. Usage: python3 scripts/find_free_proxies.py python3 scripts/find_free_proxies.py --test-url https://www.etsy.com python3 scripts/find_free_proxies.py --max-proxies 10 --timeout 5 """ import sys import re import time import argparse import httpx from typing import List, Tuple from urllib.parse import urlparse def fetch_proxies_from_freeproxylist() -> List[str]: """Fetch proxies from free-proxy-list.net.""" proxies = [] try: url = "https://www.free-proxy-list.net/" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } with httpx.Client(headers=headers, timeout=10, follow_redirects=True) as client: response = client.get(url) if response.status_code == 200: # Parse HTML table for proxies # Format: IPPort... pattern = r'(\d+\.\d+\.\d+\.\d+)(\d+)' matches = re.findall(pattern, response.text) for ip, port in matches: proxies.append(f"http://{ip}:{port}") print(f"✅ Found {len(proxies)} proxies from free-proxy-list.net") except Exception as e: print(f"⚠️ Error fetching from free-proxy-list.net: {e}") return proxies def fetch_proxies_from_proxyscrape() -> List[str]: """Fetch proxies from ProxyScrape API.""" proxies = [] try: # ProxyScrape free proxy API urls = [ "https://api.proxyscrape.com/v2/?request=get&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all", "https://api.proxyscrape.com/v2/?request=get&protocol=https&timeout=10000&country=all&ssl=all&anonymity=all", ] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } with httpx.Client(headers=headers, timeout=10, follow_redirects=True) as client: for url in urls: try: response = client.get(url) if response.status_code == 200: # Response is newline-separated IP:PORT lines = response.text.strip().split('\n') for line in lines: line = line.strip() if line and ':' in line: if not line.startswith('http'): proxies.append(f"http://{line}") else: proxies.append(line) except: continue if proxies: print(f"✅ Found {len(proxies)} proxies from ProxyScrape") except Exception as e: print(f"⚠️ Error fetching from ProxyScrape: {e}") return proxies def test_proxy(proxy_url: str, test_url: str = "https://httpbin.org/ip", timeout: int = 5) -> Tuple[bool, float]: """ Test một proxy. Returns: (success: bool, response_time: float) """ parsed = urlparse(proxy_url) proxy_dict = { "http://": proxy_url, "https://": proxy_url } headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', } try: start_time = time.time() with httpx.Client(proxies=proxy_dict, headers=headers, timeout=timeout, follow_redirects=True) as client: response = client.get(test_url) elapsed = time.time() - start_time if response.status_code == 200: return True, elapsed else: return False, elapsed except Exception: return False, 0.0 def find_and_test_proxies( max_proxies: int = 20, test_url: str = "https://httpbin.org/ip", timeout: int = 5, test_etsy: bool = False ) -> List[Tuple[str, float]]: """ Tìm và test free proxies. Returns: List of (proxy_url, response_time) tuples for working proxies """ print("🔍 Finding free proxies...") print("=" * 60) # Fetch from multiple sources all_proxies = [] all_proxies.extend(fetch_proxies_from_freeproxylist()) all_proxies.extend(fetch_proxies_from_proxyscrape()) # Remove duplicates all_proxies = list(set(all_proxies)) print(f"\n📊 Total proxies found: {len(all_proxies)}") print(f"🧪 Testing proxies (timeout: {timeout}s, max: {max_proxies})...") print("=" * 60) working_proxies = [] tested = 0 for proxy in all_proxies[:max_proxies * 3]: # Test more than we need if len(working_proxies) >= max_proxies: break tested += 1 print(f"[{tested}/{len(all_proxies[:max_proxies * 3])}] Testing {proxy}...", end=" ", flush=True) success, response_time = test_proxy(proxy, test_url, timeout) if success: # If test_etsy is True, also test with Etsy if test_etsy: etsy_success, _ = test_proxy(proxy, "https://www.etsy.com/shop/KappClass", timeout) if etsy_success: print(f"✅ OK ({response_time:.2f}s) - Etsy: ✅") working_proxies.append((proxy, response_time)) else: print(f"✅ OK ({response_time:.2f}s) - Etsy: ❌ (blocked)") else: print(f"✅ OK ({response_time:.2f}s)") working_proxies.append((proxy, response_time)) else: print("❌ FAILED") # Small delay to avoid rate limiting time.sleep(0.1) # Sort by response time working_proxies.sort(key=lambda x: x[1]) return working_proxies def main(): parser = argparse.ArgumentParser(description='Find and test free proxies') parser.add_argument('--max-proxies', type=int, default=10, help='Maximum number of working proxies to find (default: 10)') parser.add_argument('--test-url', type=str, default='https://httpbin.org/ip', help='URL to test proxies with (default: httpbin.org/ip)') parser.add_argument('--timeout', type=int, default=5, help='Timeout in seconds for each test (default: 5)') parser.add_argument('--test-etsy', action='store_true', help='Also test proxies with Etsy (slower but more accurate)') parser.add_argument('--output', type=str, help='Output file to save working proxies (YAML format)') args = parser.parse_args() working_proxies = find_and_test_proxies( max_proxies=args.max_proxies, test_url=args.test_url, timeout=args.timeout, test_etsy=args.test_etsy ) print("\n" + "=" * 60) print("📊 RESULTS") print("=" * 60) if working_proxies: print(f"✅ Found {len(working_proxies)} working proxy(ies):\n") for i, (proxy, response_time) in enumerate(working_proxies, 1): print(f" {i}. {proxy} ({response_time:.2f}s)") # Generate YAML config yaml_config = "proxies:\n" yaml_config += " enabled: true\n" yaml_config += " list:\n" for proxy, _ in working_proxies: yaml_config += f" - {proxy}\n" yaml_config += " mode: rotate\n" print("\n" + "=" * 60) print("💡 YAML Config (copy to your config file):") print("=" * 60) print(yaml_config) # Save to file if requested if args.output: with open(args.output, 'w') as f: f.write(yaml_config) print(f"\n✅ Saved to {args.output}") else: print("❌ No working proxies found. Try:") print(" - Increase --timeout (default: 5s)") print(" - Increase --max-proxies (default: 10)") print(" - Try again later (free proxies are unstable)") sys.exit(1) if __name__ == "__main__": main()