#!/usr/bin/env python3
"""
Google Maps Scraper - Fixed for current Google Maps (May 2026)
Extracts data from the search results panel directly.
"""

import logging, csv, os, re, sys, time
from typing import List, Optional
from playwright.sync_api import sync_playwright, Page
from dataclasses import dataclass, asdict
import pandas as pd

@dataclass
class Place:
    name: str = ""
    phone_number: str = ""
    website: str = ""
    address: str = ""
    reviews_average: Optional[float] = None
    place_type: str = ""

def setup_logging():
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

def scrape_places(search_for: str, total: int) -> List[Place]:
    setup_logging()
    places = []
    
    with sync_playwright() as p:
        browser = p.chromium.launch(
            headless=True,
            args=["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]
        )
        page = browser.new_page(
            viewport={"width": 1920, "height": 1080},
            user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
            locale="en-IN",
            timezone_id="Asia/Kolkata",
        )
        
        try:
            url = f"https://www.google.com/maps/search/{search_for.replace(' ', '+')}/"
            page.goto(url, timeout=60000)
            page.wait_for_timeout(5000)
            
            # Scroll the feed to load more results
            for i in range(10):
                try:
                    page.evaluate("document.querySelector('[role=\"feed\"]')?.scrollBy(0, 1200)")
                except: pass
                page.wait_for_timeout(1500)
            
            page.wait_for_timeout(3000)
            
            # Extract ALL business data from the visible results panel text
            # No clicking needed — Google Maps shows everything in the search results
            data = page.evaluate("""
                () => {
                    const results = [];
                    const seen = new Set();
                    
                    // Get all listing containers in the feed
                    const feed = document.querySelector('[role="feed"]');
                    if (!feed) return [];
                    
                    const items = feed.querySelectorAll(':scope > div > div > div[role="article"], :scope div[role="article"]');
                    
                    items.forEach(item => {
                        try {
                            const nameEl = item.querySelector('.qBF1Pd, .fontHeadlineSmall');
                            const name = nameEl ? nameEl.textContent.trim() : '';
                            if (!name || seen.has(name)) return;
                            seen.add(name);
                            
                            // Get full visible text of the listing card
                            const text = item.innerText;
                            
                            let phone = '';
                            const phoneMatch = text.match(/0[0-9]{5}[\s-]?[0-9]{5}|[6-9][0-9]{2}[\s-]?[0-9]{5}[\s-]?[0-9]{5}/);
                            if (phoneMatch) {
                                phone = phoneMatch[0].replace(/[\s-]/g, '');
                            }
                            
                            let rating = '';
                            const ratingText = item.querySelector('[aria-label*="star"], .MW4etd');
                            if (ratingText) {
                                const rt = ratingText.getAttribute('aria-label') || ratingText.textContent;
                                const rm = rt.match(/[0-9.]+/);
                                if (rm) rating = rm[0];
                            }
                            
                            // Check for website
                            const websiteLinks = item.querySelectorAll('a[href*="://"]');
                            let website = '';
                            let hasWebsite = false;
                            websiteLinks.forEach(a => {
                                const href = a.href;
                                if (href && !href.includes('google.com/maps') && !href.includes('google.com/search') && !href.startsWith('#')) {
                                    website = href;
                                    hasWebsite = true;
                                }
                            });
                            
                            results.push({
                                name: name,
                                phone: phone,
                                website: website,
                                has_website: hasWebsite,
                            });
                        } catch(e) {}
                    });
                    
                    return results;
                }
            """)
            
            logging.info(f"Found {len(data)} businesses")
            
            for item in data:
                place = Place()
                place.name = item.get('name', '')
                place.phone_number = item.get('phone', '')
                place.website = item.get('website', '')
                place.address = item.get('address', '')
                place.place_type = item.get('category', '')
                try:
                    place.reviews_average = float(item.get('rating', 0))
                except: pass
                places.append(place)
            
            # For businesses without a clearly detected website, 
            # we need to verify. The "Website" button might open as Google search
            # or as an external link. Check the href carefully.
            
        except Exception as e:
            logging.error(f"Error: {e}")
        finally:
            browser.close()
    
    return places


def save_places_to_csv(places: List[Place], output_path: str = "result.csv", append: bool = False):
    records = [asdict(p) for p in places]
    
    if not records:
        logging.warning("No data to save")
        return
    
    # Normalize phone numbers
    for r in records:
        phone = r.get('phone_number', '')
        cleaned = re.sub(r'[^\d]', '', phone)
        if len(cleaned) == 10:
            r['phone_number'] = f"+91{cleaned}"
        elif len(cleaned) == 11 and cleaned[0] == '0':
            r['phone_number'] = f"+91{cleaned[1:]}"
        elif len(cleaned) == 12:
            r['phone_number'] = f"+{cleaned}"
        else:
            r['phone_number'] = cleaned
    
    df = pd.DataFrame(records)
    file_exists = os.path.isfile(output_path)
    mode = "a" if append else "w"
    header = not (append and file_exists)
    df.to_csv(output_path, index=False, mode=mode, header=header)
    logging.info(f"Saved {len(df)} places to {output_path}")


def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("-s", "--search", type=str, help="Search query")
    parser.add_argument("-t", "--total", type=int, default=20, help="Max results")
    parser.add_argument("-o", "--output", type=str, default="result.csv", help="Output CSV")
    parser.add_argument("--append", action="store_true", help="Append to existing")
    args = parser.parse_args()
    
    search_for = args.search or "interior designers in Hayathnagar"
    total = args.total or 20
    places = scrape_places(search_for, total)
    save_places_to_csv(places, args.output, append=args.append)

if __name__ == "__main__":
    main()