#!/usr/bin/env python3
"""
Batch runner for Google Maps scraper.
Runs all niche × area queries and saves leads without websites to leads.csv
"""

import subprocess, csv, os, re, sys

LEADS_FILE = "/root/agency/leads.csv"
HEADERS = ["name", "niche", "area", "phone", "whatsapp", "status", "last_contact_date", "next_followup_date", "notes"]

QUERIES = [
    ("interior designers in Hayathnagar", "interior designer", "Hayathnagar"),
    ("interior designers in Vanasthalipuram", "interior designer", "Vanasthalipuram"),
    ("interior designers in LB Nagar, Hyderabad", "interior designer", "LB Nagar"),
    ("interior designers in Kothapet, Hyderabad", "interior designer", "Kothapet"),
    ("interior designers in Nagole, Hyderabad", "interior designer", "Nagole"),
    ("interior designers in Dilsukhnagar, Hyderabad", "interior designer", "Dilsukhnagar"),
    ("interior designers in Saroornagar, Hyderabad", "interior designer", "Saroornagar"),
    ("dentists in Hayathnagar", "dentist", "Hayathnagar"),
    ("dentists in Vanasthalipuram", "dentist", "Vanasthalipuram"),
    ("dentists in LB Nagar, Hyderabad", "dentist", "LB Nagar"),
    ("dentists in Kothapet, Hyderabad", "dentist", "Kothapet"),
    ("dentists in Nagole, Hyderabad", "dentist", "Nagole"),
    ("dentists in Dilsukhnagar, Hyderabad", "dentist", "Dilsukhnagar"),
    ("dentists in Saroornagar, Hyderabad", "dentist", "Saroornagar"),
]

def normalize_phone(p):
    d = re.sub(r'[^\d]', '', p)
    if d.startswith('0') and len(d) == 11:
        return f"+91{d[1:]}"
    if len(d) == 10:
        return f"+91{d}"
    if d.startswith('91') and len(d) >= 12:
        return f"+{d}"
    return d

def main():
    print("=" * 60)
    print("  BATCH GOOGLE MAPS SCRAPER")
    print("  14 searches × up to 30 results each")
    print("  Filtering: only businesses WITHOUT website")
    print("=" * 60)
    
    all_leads = []
    total_with_phones = 0
    total_no_website = 0
    errors = 0
    
    for search_query, niche, area in QUERIES:
        result_file = f"/tmp/scrape_result_{niche.replace(' ','_')}_{area.replace(' ','_')}.csv"
        
        print(f"\n  [{niche} in {area}] Searching: {search_query[:50]}...", end=" ", flush=True)
        
        try:
            result = subprocess.run(
                [sys.executable, "main.py", "-s", search_query, "-t", "30", "-o", result_file],
                cwd="/root/agency/scraper",
                capture_output=True, text=True, timeout=120
            )
            
            # Parse output
            found_line = [l for l in result.stdout.split('\n') if 'Found' in l]
            if found_line:
                print(f"✓ {found_line[0].split('Found')[1].strip()}", end="")
            
            # Read the CSV
            if os.path.exists(result_file):
                with open(result_file, "r") as f:
                    reader = csv.DictReader(f)
                    for row in reader:
                        name = row.get("name", "").strip()
                        phone_raw = row.get("phone_number", "").strip()
                        website = row.get("website", "").strip()
                        
                        # Check if they have a real website
                        has_website = bool(website and 
                            'google.com/search' not in website and 
                            'google.com/maps' not in website and
                            website.lower() not in ['', 'none', 'not found', 'n/a'])
                        
                        phone = normalize_phone(phone_raw) if phone_raw else ""
                        
                        if not has_website:
                            all_leads.append({
                                "name": name,
                                "niche": niche,
                                "area": area,
                                "phone": phone,
                                "whatsapp": phone,
                                "status": "new",
                                "last_contact_date": "",
                                "next_followup_date": "",
                                "notes": "No website - prime outreach target" + (" | Has phone" if phone else ""),
                            })
                            if phone:
                                total_with_phones += 1
                            total_no_website += 1
                
                os.remove(result_file)
            
            print(f" → {len([l for l in all_leads if l['niche']==niche and l['area']==area])} leads")
            
        except subprocess.TimeoutExpired:
            print("⏱ timeout")
            errors += 1
        except Exception as e:
            print(f"⚠ {e}")
            errors += 1
    
    # Deduplicate by name
    seen = set()
    unique = []
    for lead in all_leads:
        key = lead["name"].lower().strip()
        if key not in seen and key and len(key) > 2:
            seen.add(key)
            unique.append(lead)
    
    print(f"\n{'=' * 60}")
    print(f"  RESULTS")
    print(f"  Total businesses without website: {len(unique)}")
    print(f"  With phone numbers: {sum(1 for l in unique if l.get('phone'))}")
    print(f"  Errors: {errors}")
    print(f"{'=' * 60}")
    
    # Save to leads.csv
    if unique:
        existing = []
        if os.path.exists(LEADS_FILE):
            try:
                with open(LEADS_FILE, "r") as f:
                    existing = list(csv.DictReader(f))
            except: pass
        
        existing_names = {r.get("name","").strip().lower() for r in existing if r.get("name")}
        
        new_count = 0
        for lead in unique:
            if lead["name"].lower().strip() not in existing_names:
                existing.append(lead)
                existing_names.add(lead["name"].lower().strip())
                new_count += 1
        
        with open(LEADS_FILE, "w", newline="") as f:
            writer = csv.DictWriter(f, fieldnames=HEADERS)
            writer.writeheader()
            writer.writerows(existing)
        
        print(f"\n  Added {new_count} new leads to {LEADS_FILE}")
        print(f"  Total in CSV: {len(existing)}")
        
        # Preview
        print(f"\n  ─── TOP 20 LEADS WITH PHONES ───")
        shown = 0
        for l in unique:
            if l.get("phone") and shown < 20:
                shown += 1
                print(f"  {shown}. {l['name'][:45]}")
                print(f"     {l['niche']} | {l['area']} | {l['phone']}")
        
        no_phone = [l for l in unique if not l.get("phone")]
        if no_phone:
            print(f"\n  ─── LEADS WITHOUT PHONES ({len(no_phone)} total) ───")
            for i, l in enumerate(no_phone[:15], 1):
                print(f"  {i}. {l['name'][:50]} ({l['area']})")

if __name__ == "__main__":
    main()
