#!/usr/bin/env python3
"""
Price Scraper - Monitor competitor API pricing pages and alert on changes.

This script scrapes pricing information from competitor websites, stores historical
data in SQLite, and alerts when pricing changes are detected.
"""

import argparse
import logging
import sqlite3
import json
import datetime
import os
import time
import re
import difflib
import hashlib
from typing import Dict, List, Optional, Tuple, Any
import httpx
from httpx import RequestError, Timeout

# Constants
DEFAULT_DB_PATH = "pricing_data.db"
DEFAULT_CONFIG_PATH = "config.json"
DEFAULT_LOG_PATH = "price_scraper.log"
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
DEFAULT_REQUEST_TIMEOUT = 10
DEFAULT_RETRY_COUNT = 3
DEFAULT_RETRY_DELAY = 2

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler(DEFAULT_LOG_PATH),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)


class PriceScraper:
    """Class to handle price scraping, storage, and change detection."""

    def __init__(self, db_path: str = DEFAULT_DB_PATH):
        """
        Initialize the PriceScraper with a database path.

        Args:
            db_path: Path to the SQLite database file.
        """
        self.db_path = db_path
        self.conn = None
        self.setup_database()

    def setup_database(self) -> None:
        """Set up the SQLite database and create tables if they don't exist."""
        try:
            self.conn = sqlite3.connect(self.db_path)
            cursor = self.conn.cursor()

            # Create tables
            cursor.execute("""
                CREATE TABLE IF NOT EXISTS pricing_sources (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    name TEXT NOT NULL,
                    url TEXT NOT NULL UNIQUE,
                    selector TEXT,
                    parser_type TEXT DEFAULT 'html',
                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
                )
            """)

            cursor.execute("""
                CREATE TABLE IF NOT EXISTS pricing_data (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    source_id INTEGER NOT NULL,
                    content_hash TEXT NOT NULL,
                    content TEXT NOT NULL,
                    scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                    FOREIGN KEY (source_id) REFERENCES pricing_sources (id)
                )
            """)

            cursor.execute("""
                CREATE TABLE IF NOT EXISTS pricing_changes (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    source_id INTEGER NOT NULL,
                    old_content_hash TEXT,
                    new_content_hash TEXT NOT NULL,
                    change_description TEXT,
                    detected_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                    FOREIGN KEY (source_id) REFERENCES pricing_sources (id)
                )
            """)

            self.conn.commit()
            logger.info("Database setup completed successfully")
        except sqlite3.Error as e:
            logger.error(f"Database setup error: {e}")
            raise

    def add_source(self, name: str, url: str, selector: str = None, parser_type: str = 'html') -> int:
        """
        Add a new pricing source to monitor.

        Args:
            name: Name of the pricing source.
            url: URL to scrape.
            selector: CSS selector for HTML parsing (optional).
            parser_type: Type of parser ('html' or 'json').

        Returns:
            ID of the newly added source.
        """
        try:
            cursor = self.conn.cursor()
            cursor.execute(
                "INSERT INTO pricing_sources (name, url, selector, parser_type) VALUES (?, ?, ?, ?)",
                (name, url, selector, parser_type)
            )
            self.conn.commit()
            return cursor.lastrowid
        except sqlite3.IntegrityError as e:
            logger.error(f"Failed to add source: {e}")
            raise

    def scrape_source(self, source_id: int) -> Optional[str]:
        """
        Scrape pricing data from a source.

        Args:
            source_id: ID of the pricing source.

        Returns:
            Scraped content as string, or None if scraping failed.
        """
        try:
            cursor = self.conn.cursor()
            cursor.execute("SELECT url, selector, parser_type FROM pricing_sources WHERE id = ?", (source_id,))
            source = cursor.fetchone()
            if not source:
                logger.error(f"Source ID {source_id} not found")
                return None

            url, selector, parser_type = source
            headers = {"User-Agent": DEFAULT_USER_AGENT}

            for attempt in range(DEFAULT_RETRY_COUNT):
                try:
                    response = httpx.get(url, headers=headers, timeout=DEFAULT_REQUEST_TIMEOUT)
                    response.raise_for_status()
                    content = response.text

                    if parser_type == 'json':
                        content = json.dumps(response.json(), indent=2)

                    return content
                except (RequestError, Timeout) as e:
                    logger.warning(f"Attempt {attempt + 1} failed: {e}")
                    time.sleep(DEFAULT_RETRY_DELAY)

            logger.error(f"Failed to scrape source {source_id} after {DEFAULT_RETRY_COUNT} attempts")
            return None
        except sqlite3.Error as e:
            logger.error(f"Database error during scraping: {e}")
            return None

    def store_pricing_data(self, source_id: int, content: str) -> bool:
        """
        Store scraped pricing data in the database.

        Args:
            source_id: ID of the pricing source.
            content: Scraped content.

        Returns:
            True if data was stored successfully, False otherwise.
        """
        try:
            content_hash = hashlib.sha256(content.encode()).hexdigest()
            cursor = self.conn.cursor()
            cursor.execute(
                "INSERT INTO pricing_data (source_id, content_hash, content) VALUES (?, ?, ?)",
                (source_id, content_hash, content)
            )
            self.conn.commit()
            return True
        except sqlite3.Error as e:
            logger.error(f"Failed to store pricing data: {e}")
            return False

    def detect_changes(self, source_id: int, new_content: str) -> Optional[str]:
        """
        Detect changes in pricing data.

        Args:
            source_id: ID of the pricing source.
            new_content: New scraped content.

        Returns:
            Change description if changes detected, None otherwise.
        """
        try:
            new_hash = hashlib.sha256(new_content.encode()).hexdigest()
            cursor = self.conn.cursor()

            cursor.execute(
                "SELECT content, content_hash FROM pricing_data WHERE source_id = ? ORDER BY scraped_at DESC LIMIT 1",
                (source_id,)
            )
            previous = cursor.fetchone()

            if previous:
                previous_content, previous_hash = previous
                if previous_hash == new_hash:
                    return None

                change_description = self.generate_change_description(previous_content, new_content)
                cursor.execute(
                    "INSERT INTO pricing_changes (source_id, old_content_hash, new_content_hash, change_description) VALUES (?, ?, ?, ?)",
                    (source_id, previous_hash, new_hash, change_description)
                )
                self.conn.commit()
                return change_description

            return "New pricing data detected (no previous data)"
        except sqlite3.Error as e:
            logger.error(f"Error detecting changes: {e}")
            return None

    def generate_change_description(self, old_content: str, new_content: str) -> str:
        """Generate a description of changes between old and new content."""
        diff = difflib.unified_diff(
            old_content.splitlines(),
            new_content.splitlines(),
            lineterm=''
        )
        return '\n'.join(diff)

    def close(self) -> None:
        """Close the database connection."""
        if self.conn:
            self.conn.close()


def main():
    parser = argparse.ArgumentParser(description="Price Scraper - Monitor competitor pricing")
    parser.add_argument("--config", default=DEFAULT_CONFIG_PATH, help="Path to config file")
    args = parser.parse_args()

    scraper = PriceScraper()

    try:
        with open(args.config) as f:
            config = json.load(f)

        for source in config.get("sources", []):
            source_id = scraper.add_source(
                name=source["name"],
                url=source["url"],
                selector=source.get("selector"),
                parser_type=source.get("parser_type", "html")
            )

            content = scraper.scrape_source(source_id)
            if content:
                scraper.store_pricing_data(source_id, content)
                change = scraper.detect_changes(source_id, content)
                if change:
                    logger.info(f"Change detected for source {source_id}: {change}")
    finally:
        scraper.close()


if __name__ == "__main__":
    main()