#!/usr/bin/env python3
"""
Script để tìm và test free proxies tự động từ các nguồn public.
Usage:
python3 scripts/find_free_proxies.py
python3 scripts/find_free_proxies.py --test-url https://www.etsy.com
python3 scripts/find_free_proxies.py --max-proxies 10 --timeout 5
"""
import sys
import re
import time
import argparse
import httpx
from typing import List, Tuple
from urllib.parse import urlparse
def fetch_proxies_from_freeproxylist() -> List[str]:
"""Fetch proxies from free-proxy-list.net."""
proxies = []
try:
url = "https://www.free-proxy-list.net/"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
with httpx.Client(headers=headers, timeout=10, follow_redirects=True) as client:
response = client.get(url)
if response.status_code == 200:
# Parse HTML table for proxies
# Format:
| IP | Port | ...
pattern = r'
| (\d+\.\d+\.\d+\.\d+) | (\d+) | '
matches = re.findall(pattern, response.text)
for ip, port in matches:
proxies.append(f"http://{ip}:{port}")
print(f"✅ Found {len(proxies)} proxies from free-proxy-list.net")
except Exception as e:
print(f"⚠️ Error fetching from free-proxy-list.net: {e}")
return proxies
def fetch_proxies_from_proxyscrape() -> List[str]:
"""Fetch proxies from ProxyScrape API."""
proxies = []
try:
# ProxyScrape free proxy API
urls = [
"https://api.proxyscrape.com/v2/?request=get&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all",
"https://api.proxyscrape.com/v2/?request=get&protocol=https&timeout=10000&country=all&ssl=all&anonymity=all",
]
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
with httpx.Client(headers=headers, timeout=10, follow_redirects=True) as client:
for url in urls:
try:
response = client.get(url)
if response.status_code == 200:
# Response is newline-separated IP:PORT
lines = response.text.strip().split('\n')
for line in lines:
line = line.strip()
if line and ':' in line:
if not line.startswith('http'):
proxies.append(f"http://{line}")
else:
proxies.append(line)
except:
continue
if proxies:
print(f"✅ Found {len(proxies)} proxies from ProxyScrape")
except Exception as e:
print(f"⚠️ Error fetching from ProxyScrape: {e}")
return proxies
def test_proxy(proxy_url: str, test_url: str = "https://httpbin.org/ip", timeout: int = 5) -> Tuple[bool, float]:
"""
Test một proxy.
Returns:
(success: bool, response_time: float)
"""
parsed = urlparse(proxy_url)
proxy_dict = {
"http://": proxy_url,
"https://": proxy_url
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
}
try:
start_time = time.time()
with httpx.Client(proxies=proxy_dict, headers=headers, timeout=timeout, follow_redirects=True) as client:
response = client.get(test_url)
elapsed = time.time() - start_time
if response.status_code == 200:
return True, elapsed
else:
return False, elapsed
except Exception:
return False, 0.0
def find_and_test_proxies(
max_proxies: int = 20,
test_url: str = "https://httpbin.org/ip",
timeout: int = 5,
test_etsy: bool = False
) -> List[Tuple[str, float]]:
"""
Tìm và test free proxies.
Returns:
List of (proxy_url, response_time) tuples for working proxies
"""
print("🔍 Finding free proxies...")
print("=" * 60)
# Fetch from multiple sources
all_proxies = []
all_proxies.extend(fetch_proxies_from_freeproxylist())
all_proxies.extend(fetch_proxies_from_proxyscrape())
# Remove duplicates
all_proxies = list(set(all_proxies))
print(f"\n📊 Total proxies found: {len(all_proxies)}")
print(f"🧪 Testing proxies (timeout: {timeout}s, max: {max_proxies})...")
print("=" * 60)
working_proxies = []
tested = 0
for proxy in all_proxies[:max_proxies * 3]: # Test more than we need
if len(working_proxies) >= max_proxies:
break
tested += 1
print(f"[{tested}/{len(all_proxies[:max_proxies * 3])}] Testing {proxy}...", end=" ", flush=True)
success, response_time = test_proxy(proxy, test_url, timeout)
if success:
# If test_etsy is True, also test with Etsy
if test_etsy:
etsy_success, _ = test_proxy(proxy, "https://www.etsy.com/shop/KappClass", timeout)
if etsy_success:
print(f"✅ OK ({response_time:.2f}s) - Etsy: ✅")
working_proxies.append((proxy, response_time))
else:
print(f"✅ OK ({response_time:.2f}s) - Etsy: ❌ (blocked)")
else:
print(f"✅ OK ({response_time:.2f}s)")
working_proxies.append((proxy, response_time))
else:
print("❌ FAILED")
# Small delay to avoid rate limiting
time.sleep(0.1)
# Sort by response time
working_proxies.sort(key=lambda x: x[1])
return working_proxies
def main():
parser = argparse.ArgumentParser(description='Find and test free proxies')
parser.add_argument('--max-proxies', type=int, default=10,
help='Maximum number of working proxies to find (default: 10)')
parser.add_argument('--test-url', type=str, default='https://httpbin.org/ip',
help='URL to test proxies with (default: httpbin.org/ip)')
parser.add_argument('--timeout', type=int, default=5,
help='Timeout in seconds for each test (default: 5)')
parser.add_argument('--test-etsy', action='store_true',
help='Also test proxies with Etsy (slower but more accurate)')
parser.add_argument('--output', type=str,
help='Output file to save working proxies (YAML format)')
args = parser.parse_args()
working_proxies = find_and_test_proxies(
max_proxies=args.max_proxies,
test_url=args.test_url,
timeout=args.timeout,
test_etsy=args.test_etsy
)
print("\n" + "=" * 60)
print("📊 RESULTS")
print("=" * 60)
if working_proxies:
print(f"✅ Found {len(working_proxies)} working proxy(ies):\n")
for i, (proxy, response_time) in enumerate(working_proxies, 1):
print(f" {i}. {proxy} ({response_time:.2f}s)")
# Generate YAML config
yaml_config = "proxies:\n"
yaml_config += " enabled: true\n"
yaml_config += " list:\n"
for proxy, _ in working_proxies:
yaml_config += f" - {proxy}\n"
yaml_config += " mode: rotate\n"
print("\n" + "=" * 60)
print("💡 YAML Config (copy to your config file):")
print("=" * 60)
print(yaml_config)
# Save to file if requested
if args.output:
with open(args.output, 'w') as f:
f.write(yaml_config)
print(f"\n✅ Saved to {args.output}")
else:
print("❌ No working proxies found. Try:")
print(" - Increase --timeout (default: 5s)")
print(" - Increase --max-proxies (default: 10)")
print(" - Try again later (free proxies are unstable)")
sys.exit(1)
if __name__ == "__main__":
main()