"""Link finder plugin — findet relevante URLs für beliebige Themen und Kaufanfragen."""
from __future__ import annotations

import logging
import re
from urllib.parse import unquote

import httpx
from bs4 import BeautifulSoup

from hivemind.plugins.base import Plugin, capability

log = logging.getLogger(__name__)

DDG_URL = "https://html.duckduckgo.com/html/"

# Erkennt Link-/Website-Anfragen auf Deutsch und Englisch
_LINK_RE = re.compile(
    r"""
    \b(link|links|url|website|webseite|homepage|preisvergleich|onlineshop|online\s*shop)\b
    | \bwo\s+(kann|könnte|finde|bekomme|bestelle|kaufe)\s+ich\b
    | \bonline\s+(kaufen|bestellen|shoppen)\b
    | \b(gib|zeig|schick)\s+(mir\s+)?(einen?\s+)?(link|seite|website|url|adresse)\b
    | \b(empfiehlst|empfehle)\s+(du\s+)?(mir\s+)?(eine?\s+)?(seite|website|plattform|shop)\b
    | \bkannst\s+du\s+.{0,30}(empfehlen|vorschlagen)\b
    | \b(where\s+can\s+i|where\s+do\s+i)\b
    | \b(recommend|suggest)\s+(a\s+)?(website|site|link|shop|store)\b
    | \b(buy|purchase|order)\s+online\b
    """,
    re.VERBOSE | re.IGNORECASE,
)

# Phrasen die aus der Query entfernt werden damit DDG bessere Ergebnisse liefert
_STRIP_PHRASES = re.compile(
    r"""
    ^(gib\s+mir\s+(einen?\s+)?|zeig\s+mir\s+(einen?\s+)?|schick\s+mir\s+(einen?\s+)?)
    | (einen?\s+)?(link|url|adresse|webseite|website)\s+(zu|für|über|dazu|wo|that)?\s*
    | \bkannst\s+du\s+mir\s+
    | \bich\s+(suche|brauche|möchte|will)\s+(einen?\s+)?(link|website|seite|shop|url)?\s*(für|zu|über|zum|zur)?\s*
    | \bwo\s+kann\s+ich\s+
    | \bwo\s+finde\s+ich\s+
    | \bempfiehlst\s+du\s+(mir\s+)?(eine?\s+)?(website|seite|plattform|shop)?\s*(für|zum|zur|zu)?\s*
    """,
    re.VERBOSE | re.IGNORECASE,
)


class LinkFinderPlugin(Plugin):
    """Plugin that searches DuckDuckGo and returns clean URL lists for any topic."""

    name = "link_finder"
    version = "0.1.0"
    description = "Finds relevant URLs and links for any topic, product or service query"

    @capability("Find relevant links and URLs for a topic, product or service")
    async def find_links(self, query: str, max_results: int = 5) -> str:
        """Search DuckDuckGo and return a clean numbered list of URLs with titles.

        Args:
            query:       The user's original question or a search query.
            max_results: Maximum number of links to return.
        """
        search_query = self._clean_query(query)
        log.debug("LinkFinderPlugin: searching DDG for %r", search_query)

        try:
            async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client:
                resp = await client.post(
                    DDG_URL,
                    data={"q": search_query},
                    headers={"User-Agent": "HiveMind/0.1 link-finder"},
                )
                resp.raise_for_status()

            soup = BeautifulSoup(resp.text, "html.parser")
            results: list[dict] = []
            seen_urls: set[str] = set()

            for r in soup.select(".result__body"):
                if len(results) >= max_results:
                    break

                title_el = r.select_one(".result__a")
                snippet_el = r.select_one(".result__snippet")
                if not title_el:
                    continue

                title = title_el.get_text(strip=True)
                snippet = snippet_el.get_text(strip=True) if snippet_el else ""
                href = title_el.get("href", "")
                url = self._extract_real_url(href)

                if not url or url in seen_urls:
                    continue

                seen_urls.add(url)
                results.append({"title": title, "url": url, "snippet": snippet})

            if not results:
                return f'Keine Links gefunden für: {search_query}'

            lines = [f'Empfohlene Links für "{search_query}":']
            for i, item in enumerate(results, 1):
                lines.append(f"\n{i}. **{item['title']}**")
                lines.append(f"   -> {item['url']}")
                if item["snippet"]:
                    snip = item["snippet"][:130]
                    if len(item["snippet"]) > 130:
                        snip += "…"
                    lines.append(f"   {snip}")

            return "\n".join(lines)

        except Exception as e:
            log.error("LinkFinderPlugin.find_links(%r): %s", query[:60], e)
            return f"Links konnten nicht abgerufen werden: {e}"

    @capability("Check if a query is asking for links, websites or online shopping recommendations")
    async def is_link_query(self, query: str) -> bool:
        """Return True when the query is asking for links or website recommendations."""
        return bool(_LINK_RE.search(query))

    # ──────────────────────────────────────────────────────────────────────────
    # Internal helpers
    # ──────────────────────────────────────────────────────────────────────────

    @staticmethod
    def _extract_real_url(href: str) -> str:
        """Extract the real destination URL from a DuckDuckGo redirect link.

        DDG wraps every result URL in: /l/?uddg=<percent-encoded-url>&rut=...
        """
        if not href:
            return ""
        m = re.search(r"[?&]uddg=([^&]+)", href)
        if m:
            url = unquote(m.group(1))
            if url.startswith("http"):
                return url
        # Already a direct URL (shouldn't happen with DDG but be safe)
        if href.startswith("http"):
            return href
        return ""

    @staticmethod
    def _clean_query(query: str) -> str:
        """Strip conversational wrappers so DDG gets a clean search term.

        "Gib mir einen Link wo ich Waschmaschinen kaufen kann"
          → "Waschmaschinen kaufen"
        """
        q = _STRIP_PHRASES.sub(" ", query).strip(" ,.")
        # Collapse extra whitespace
        q = re.sub(r"\s{2,}", " ", q)
        return q if q else query


PLUGIN_CLASS = LinkFinderPlugin
