#!/usr/bin/env python3
"""Scrape remaining areas in small batches to avoid blocking/timeout"""

import csv, os, re, sys, time, subprocess, json

LEADS_FILE = "/root/agency/leads.csv"
SCRAPER_DIR = "/root/agency/scraper"

BATCHES = [
    ["LB Nagar", "Kothapet"],
    ["Nagole", "Dilsukhnagar", "Saroornagar"],
]

AREAS = ["LB Nagar", "Kothapet"]
NICHES = [
    "interior designers",
    "dentists",
    "dental clinic",
    "interior design studio",
]

def normalize_name(name):
    return name.strip().lower()

def name_to_whatsapp(phone):
    cleaned = re.sub(r'[^\d]', '', phone)
    if len(cleaned) == 12 and cleaned.startswith("91"):
        return cleaned[2:]
    elif len(cleaned) == 11 and cleaned.startswith("0"):
        return cleaned[1:]
    elif len(cleaned) == 10:
        return cleaned
    return cleaned

def load_existing():
    names = set()
    if os.path.exists(LEADS_FILE):
        try:
            with open(LEADS_FILE, newline='', encoding='utf-8') as f:
                reader = csv.DictReader(f)
                for row in reader:
                    n = row.get('name', '').strip().lower()
                    if n:
                        names.add(n)
        except:
            pass
    return names

existing = load_existing()
print(f"📋 Existing unique leads: {len(existing)}")

niche_normalized_map = {
    "interior designers": "interior designer",
    "dentists": "dentist",
    "dental clinic": "dentist",
    "interior design studio": "interior designer",
}

all_appended = []
run_count = 0

for area in AREAS:
    for niche_raw in NICHES:
        niche_normalized = niche_normalized_map[niche_raw]
        run_count += 1
        search = f"{niche_raw} in {area} Hyderabad"
        
        print(f"\n{'='*50}")
        print(f"🔍 {run_count}. {search}")
        print(f"{'='*50}")
        
        result_file = f"/tmp/maps_batch_{int(time.time())}_{run_count}.csv"
        
        try:
            result = subprocess.run(
                ["python3", "main.py", "-s", search, "-t", "15", "-o", result_file],
                capture_output=True, text=True,
                cwd=SCRAPER_DIR,
                timeout=90
            )
            
            if result.stderr:
                print(result.stderr.strip())
            if result.returncode != 0:
                print(f"   ⚠️ Exit code {result.returncode}")
                continue
        except subprocess.TimeoutExpired:
            print(f"   ⚠️ Timeout")
            continue
        
        if os.path.exists(result_file):
            try:
                with open(result_file, newline='', encoding='utf-8') as f:
                    reader = csv.DictReader(f)
                    appended = 0
                    skipped_dup = 0
                    skipped_has_website = 0
                    skipped_no_phone = 0
                    
                    for row in reader:
                        name = row.get('name', '').strip()
                        phone = row.get('phone_number', '').strip()
                        website = row.get('website', '').strip()
                        has_website_flag = row.get('has_website', 'False').strip()
                        
                        if not name:
                            continue
                        
                        nkey = normalize_name(name)
                        
                        if nkey in existing:
                            skipped_dup += 1
                            continue
                        
                        if has_website_flag.lower() == 'true' or website:
                            skipped_has_website += 1
                            continue
                        
                        if not phone:
                            skipped_no_phone += 1
                            continue
                        
                        whatsapp = name_to_whatsapp(phone)
                        
                        all_appended.append([
                            name,
                            niche_normalized,
                            area,
                            phone,
                            whatsapp,
                            "cold",
                            "",
                            "",
                            f"auto-scraped from Google Maps ({niche_raw})"
                        ])
                        existing.add(nkey)
                        appended += 1
                    
                    print(f"   → {appended} new | {skipped_has_website} has site | {skipped_no_phone} no phone | {skipped_dup} dups")
                    
            except Exception as e:
                print(f"   ⚠️ Parse error: {e}")
            
            try:
                os.remove(result_file)
            except:
                pass
        
        time.sleep(2)

if all_appended:
    file_exists = os.path.isfile(LEADS_FILE) and os.path.getsize(LEADS_FILE) > 10
    with open(LEADS_FILE, 'a', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        if not file_exists:
            writer.writerow(["name","niche","area","phone","whatsapp","status","last_contact_date","next_followup_date","notes"])
        for lead in all_appended:
            writer.writerow(lead)
    
    print(f"\n✅ Appended {len(all_appended)} leads to {LEADS_FILE}")
    print(f"📊 Total: {sum(1 for _ in open(LEADS_FILE)) - 1} leads in database")
else:
    print(f"\n😕 No new leads this batch")
