#!/usr/bin/env python3
"""Sequential batch runner - unbuffered."""
import sys
sys.stdout.reconfigure(line_buffering=True)

import subprocess, csv, os, re

LEADS_FILE = "/root/agency/leads.csv"
RESULTS_DIR = "/tmp/scraper_results"
os.makedirs(RESULTS_DIR, exist_ok=True)

HEADERS = ["name", "niche", "area", "phone", "whatsapp", "status", "last_contact_date", "next_followup_date", "notes"]

QUERIES = [
    ("interior designers in Hayathnagar", "interior designer", "Hayathnagar"),
    ("interior designers in Vanasthalipuram", "interior designer", "Vanasthalipuram"),
    ("interior designers in LB Nagar, Hyderabad", "interior designer", "LB Nagar"),
    ("interior designers in Kothapet, Hyderabad", "interior designer", "Kothapet"),
    ("interior designers in Nagole, Hyderabad", "interior designer", "Nagole"),
    ("interior designers in Dilsukhnagar, Hyderabad", "interior designer", "Dilsukhnagar"),
    ("interior designers in Saroornagar, Hyderabad", "interior designer", "Saroornagar"),
    ("dentists in Hayathnagar", "dentist", "Hayathnagar"),
    ("dentists in Vanasthalipuram", "dentist", "Vanasthalipuram"),
    ("dentists in LB Nagar, Hyderabad", "dentist", "LB Nagar"),
    ("dentists in Kothapet, Hyderabad", "dentist", "Kothapet"),
    ("dentists in Nagole, Hyderabad", "dentist", "Nagole"),
    ("dentists in Dilsukhnagar, Hyderabad", "dentist", "Dilsukhnagar"),
    ("dentists in Saroornagar, Hyderabad", "dentist", "Saroornagar"),
]

def normalize_phone(p):
    d = re.sub(r'[^\d]', '', p)
    if d.startswith('0') and len(d) == 11: return f"+91{d[1:]}"
    if len(d) == 10: return f"+91{d}"
    if d.startswith('91') and len(d) >= 12: return f"+{d}"
    return d

def has_real_website(website):
    if not website: return False
    website = website.lower()
    if website in ['', 'none', 'not found', 'n/a']: return False
    if 'google.com/search' in website: return False
    if 'google.com/maps' in website: return False
    if 'google.com/url' in website: return False
    return True

print("=" * 60)
print("  BATCH GOOGLE MAPS SCRAPER")
print("  14 searches × 30 results each")
print("=" * 60)

all_leads = []

for i, (search_query, niche, area) in enumerate(QUERIES, 1):
    safe = f"{niche.replace(' ','_')}_{area.replace(' ','_')}"
    result_file = f"{RESULTS_DIR}/{safe}.csv"
    
    print(f"\n[{i:02d}/14] {niche.title()} in {area}...", end=" ", flush=True)
    
    try:
        result = subprocess.run(
            [sys.executable, "-u", "main.py", "-s", search_query, "-t", "30", "-o", result_file],
            cwd="/root/agency/scraper",
            capture_output=True, text=True, timeout=120
        )
        
        for line in result.stdout.split('\n'):
            if 'Found' in line or 'Saved' in line:
                print(f"({line.strip().split('- ')[-1]})", end=" ", flush=True)
        
        if os.path.exists(result_file):
            with open(result_file, "r") as f:
                reader = csv.DictReader(f)
                for row in reader:
                    name = row.get("name", "").strip()
                    if not name: continue
                    phone_raw = row.get("phone_number", "").strip()
                    website = row.get("website", "").strip()
                    
                    if not has_real_website(website):
                        phone = normalize_phone(phone_raw) if phone_raw else ""
                        all_leads.append({
                            "name": name, "niche": niche, "area": area,
                            "phone": phone, "whatsapp": phone,
                            "status": "new", "last_contact_date": "",
                            "next_followup_date": "",
                            "notes": "No website" + (" | Phone ✓" if phone else "")
                        })
            
            count = sum(1 for l in all_leads if l['niche']==niche and l['area']==area)
            print(f"→ {count} leads", flush=True)
            
    except subprocess.TimeoutExpired:
        print("⏱ TIMEOUT", flush=True)
    except Exception as e:
        print(f"⚠ {e}", flush=True)

# Dedup
seen = set()
unique = []
for lead in all_leads:
    key = lead["name"].lower().strip()
    if key and key not in seen:
        seen.add(key)
        unique.append(lead)

with_phone = sum(1 for l in unique if l.get("phone"))
print(f"\n{'=' * 60}")
print(f"  TOTAL: {len(unique)} businesses without website")
print(f"  WITH PHONE: {with_phone}")
print(f"{'=' * 60}")

# Save
existing = []
if os.path.exists(LEADS_FILE):
    try:
        with open(LEADS_FILE, "r") as f:
            existing = list(csv.DictReader(f))
    except: pass

existing_names = {r.get("name","").strip().lower() for r in existing if r.get("name")}
new_count = 0
for lead in unique:
    if lead["name"].lower().strip() not in existing_names:
        existing.append(lead)
        existing_names.add(lead["name"].lower().strip())
        new_count += 1

with open(LEADS_FILE, "w", newline="") as f:
    csv.DictWriter(f, fieldnames=HEADERS).writeheader()
    csv.DictWriter(f, fieldnames=HEADERS).writerows(existing)

print(f"\n  Added {new_count} new leads")
print(f"  Total in CSV: {len(existing)}")

print(f"\n  ─── TOP 20 LEADS WITH PHONES ───")
shown = 0
for l in unique:
    if l.get("phone") and shown < 20:
        shown += 1
        print(f"  {shown}. {l['name'][:45]}")
        print(f"     {l['niche']} | {l['area']} | {l['phone']}")

no_phone = [l for l in unique if not l.get("phone")]
if no_phone:
    print(f"\n  ─── NO PHONE ({len(no_phone)}) ───")
    for i, l in enumerate(no_phone[:10], 1):
        print(f"  {i}. {l['name'][:50]} ({l['area']})")

print(f"\n  → {LEADS_FILE}")
