import argparse
import sys
from urllib.parse import urljoin, urlparse
import time
from typing import List, Dict, Tuple
import httpx
import sqlite3
import threading

def create_db(db_name: str) -> sqlite3.Connection:
    """
    Create or connect to the SQLite database to store link check results.
    """
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS link_report (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            url TEXT NOT NULL,
            status_code INTEGER,
            redirect_url TEXT,
            response_time REAL,
            error TEXT
        )
    ''')
    conn.commit()
    return conn

def insert_result(conn: sqlite3.Connection, url: str, status_code: int, redirect_url: str, response_time: float, error: str):
    """
    Insert a link check result into the database.
    """
    cursor = conn.cursor()
    cursor.execute('''
        INSERT INTO link_report (url, status_code, redirect_url, response_time, error)
        VALUES (?, ?, ?, ?, ?)
    ''', (url, status_code, redirect_url, response_time, error))
    conn.commit()

def fetch_links(session: httpx.Client, base_url: str) -> List[str]:
    """
    Fetch all links from the given webpage.
    """
    links = set()
    try:
        response = session.get(base_url, timeout=10)
        response.raise_for_status()
        from html.parser import HTMLParser

        class LinkParser(HTMLParser):
            def handle_starttag(self, tag, attrs):
                if tag == 'a':
                    for attr_name, attr_value in attrs:
                        if attr_name == 'href':
                            links.add(attr_value)

        parser = LinkParser()
        parser.feed(response.text)
    except Exception as e:
        print(f"Error fetching {base_url}: {e}")
    return list(links)

def check_link(session: httpx.Client, url: str, base_url: str, conn: sqlite3.Connection, slow_threshold: float = 2.0):
    """
    Check a single link for status, redirects, and response time.
    """
    full_url = urljoin(base_url, url)
    start_time = time.time()
    error = ''
    status_code = None
    redirect_url = None
    response_time = 0.0
    try:
        response = session.get(full_url, allow_redirects=True, timeout=10)
        response_time = time.time() - start_time
        status_code = response.status_code
        if response.history:
            redirect_url = response.url
        if status_code == 404:
            print(f"404 Not Found: {full_url}")
        if response_time > slow_threshold:
            print(f"Slow response ({response_time:.2f}s): {full_url}")
        insert_result(conn, full_url, status_code, str(redirect_url) if redirect_url else None, response_time, error)
    except httpx.RequestError as e:
        response_time = time.time() - start_time
        error = str(e)
        print(f"Error checking {full_url}: {error}")
        insert_result(conn, full_url, None, None, response_time, error)

def check_links_on_page(session: httpx.Client, base_url: str, conn: sqlite3.Connection, slow_threshold: float = 2.0):
    """
    Fetch all links from a page and check each one.
    """
    links = fetch_links(session, base_url)
    threads = []
    for link in links:
        t = threading.Thread(target=check_link, args=(session, link, base_url, conn, slow_threshold))
        t.start()
        threads.append(t)
    for t in threads:
        t.join()

def generate_report(conn: sqlite3.Connection, report_file: str):
    """
    Generate a report of the link check results.
    """
    cursor = conn.cursor()
    cursor.execute('SELECT url, status_code, redirect_url, response_time, error FROM link_report')
    rows = cursor.fetchall()
    with open(report_file, 'w', encoding='utf-8') as f:
        f.write("Link Checker Report\n")
        f.write("===================\n\n")
        for url, status_code, redirect_url, response_time, error in rows:
            f.write(f"URL: {url}\n")
            if error:
                f.write(f"Error: {error}\n")
            else:
                f.write(f"Status Code: {status_code}\n")
                if redirect_url:
                    f.write(f"Redirected To: {redirect_url}\n")
                f.write(f"Response Time: {response_time:.2f}s\n")
            f.write("\n")
    print(f"Report generated at {report_file}")

def main():
    parser = argparse.ArgumentParser(description='Website Link Checker')
    parser.add_argument('url', type=str, help='The URL of the website to check')
    parser.add_argument('--slow-threshold', type=float, default=2.0, help='Threshold in seconds to consider a response slow')
    parser.add_argument('--report', type=str, default='link_report.txt', help='Output report file')
    args = parser.parse_args()

    base_url = args.url
    print(f"Starting link check for {base_url}")
    conn = create_db('link_checker.db')
    session = httpx.Client()

    try:
        check_links_on_page(session, base_url, conn, args.slow_threshold)
        generate_report(conn, args.report)
    finally:
        conn.close()
        session.close()

if __name__ == '__main__':
    main()