"""News feed plugin — liest RSS-Feeds und liefert aktuelle Schlagzeilen."""
from __future__ import annotations

import logging
import re
import time
import xml.etree.ElementTree as ET
from typing import NamedTuple

import httpx

from hivemind.plugins.base import Plugin, capability

log = logging.getLogger(__name__)

NEWS_KEYWORDS = {
    # Deutsch
    "news", "nachrichten", "schlagzeilen", "meldung", "meldungen",
    "aktuell", "aktuelles", "neuigkeit", "neuigkeiten",
    "bericht", "berichte", "presse", "medien",
    "politik", "wirtschaft", "sport", "kultur", "wissenschaft",
    "welt", "deutschland", "europa",
    # Englisch
    "headline", "headlines", "breaking", "latest", "current events",
    "news feed", "top stories",
}

# Standard-Feeds (kein API-Key nötig)
DEFAULT_FEEDS: list[tuple[str, str]] = [
    ("Tagesschau", "https://www.tagesschau.de/xml/rss2/"),
    ("Deutsche Welle", "https://rss.dw.com/rdf/rss-de-all"),
    ("Heise Online", "https://www.heise.de/rss/heise-atom.xml"),
    ("Spiegel Online", "https://www.spiegel.de/schlagzeilen/index.rss"),
]

CACHE_TTL = 900  # 15 Minuten


class _CacheEntry(NamedTuple):
    items: list[dict]
    ts: float


class NewsFeedPlugin(Plugin):
    """Plugin that fetches RSS news feeds and provides current headlines."""

    name = "news_feed"
    version = "0.1.0"
    description = "Reads RSS news feeds and provides current headlines"

    def __init__(self, node=None):
        super().__init__(node)
        self._cache: dict[str, _CacheEntry] = {}
        self._feeds: list[tuple[str, str]] = list(DEFAULT_FEEDS)

    async def initialize(self) -> None:
        """Load custom feed URLs from node config if present."""
        try:
            cfg = self.node.config if self.node else None
            if cfg:
                custom = getattr(cfg, "news_feeds", None) or []
                if custom:
                    self._feeds = [
                        (f.get("name", "Feed"), f["url"])
                        for f in custom
                        if f.get("url")
                    ]
                    log.info(
                        "NewsFeedPlugin: %d custom feed(s) loaded from config",
                        len(self._feeds),
                    )
        except Exception as e:
            log.debug("NewsFeedPlugin.initialize: %s", e)

    @capability("Get the latest news headlines, optionally filtered by keyword/topic")
    async def get_headlines(self, topic: str = "", max_items: int = 6) -> str:
        """Fetch current headlines from configured RSS feeds.

        Args:
            topic:     Optional keyword to filter headlines (e.g. 'KI', 'Ukraine').
            max_items: Maximum number of headlines to return.
        """
        all_items: list[dict] = []

        # Fetch from up to 3 feeds in sequence (avoids hammering many servers)
        for feed_name, feed_url in self._feeds[:3]:
            items = await self._fetch_feed(feed_name, feed_url)
            all_items.extend(items)

        # Topic filter
        if topic:
            t = topic.lower()
            filtered = [
                i for i in all_items
                if t in i["title"].lower() or t in i.get("summary", "").lower()
            ]
            if filtered:
                all_items = filtered

        all_items = all_items[:max_items]

        if not all_items:
            return "Keine aktuellen Nachrichten gefunden."

        lines = [f"Aktuelle Nachrichten ({len(all_items)} Meldungen):"]
        for item in all_items:
            src = f" [{item['source']}]" if item.get("source") else ""
            lines.append(f"• {item['title']}{src}")
            if item.get("summary"):
                summary = item["summary"][:140].rstrip()
                if len(item["summary"]) > 140:
                    summary += "…"
                lines.append(f"  {summary}")
            if item.get("url"):
                lines.append(f"  -> {item['url']}")

        return "\n".join(lines)

    @capability("Check if a query is asking about news or current events")
    async def is_news_query(self, query: str) -> bool:
        """Return True when the query is news- or current-events-related."""
        q = query.lower()
        return any(kw in q for kw in NEWS_KEYWORDS)

    # ──────────────────────────────────────────────────────────────────────────
    # Internal helpers
    # ──────────────────────────────────────────────────────────────────────────

    async def _fetch_feed(self, source_name: str, url: str) -> list[dict]:
        """Fetch and parse a single RSS feed, with 15-minute in-memory caching."""
        now = time.monotonic()
        cached = self._cache.get(url)
        if cached and (now - cached.ts) < CACHE_TTL:
            return cached.items

        try:
            async with httpx.AsyncClient(timeout=8, follow_redirects=True) as client:
                resp = await client.get(
                    url, headers={"User-Agent": "HiveMind/0.1 news-plugin"}
                )
                resp.raise_for_status()

            items = self._parse_feed(resp.text, source_name)
            self._cache[url] = _CacheEntry(items, now)
            log.debug(
                "NewsFeedPlugin: fetched %d items from %s", len(items), source_name
            )
            return items

        except Exception as e:
            log.warning(
                "NewsFeedPlugin: could not fetch '%s' (%s): %s", source_name, url, e
            )
            return cached.items if cached else []

    @staticmethod
    def _parse_feed(xml_text: str, source: str) -> list[dict]:
        """Parse RSS 2.0 or Atom feed XML into a list of item dicts."""
        items: list[dict] = []
        try:
            root = ET.fromstring(xml_text)
            tag = root.tag.lower()

            if "feed" in tag or "atom" in tag:
                # ── Atom ─────────────────────────────────────────────────────
                atom_ns = "http://www.w3.org/2005/Atom"
                entries = root.findall(f"{{{atom_ns}}}entry") or root.findall("entry")
                for entry in entries:
                    title_el = (
                        entry.find(f"{{{atom_ns}}}title") or entry.find("title")
                    )
                    summary_el = (
                        entry.find(f"{{{atom_ns}}}summary")
                        or entry.find(f"{{{atom_ns}}}content")
                        or entry.find("summary")
                    )
                    title = (
                        title_el.text.strip()
                        if title_el is not None and title_el.text
                        else ""
                    )
                    summary = (
                        summary_el.text.strip()
                        if summary_el is not None and summary_el.text
                        else ""
                    )
                    # Strip HTML tags from summary
                    summary = re.sub(r"<[^>]+>", " ", summary).strip()
                    summary = re.sub(r"\s{2,}", " ", summary)
                    # Link: <link href="..."> or <link>...</link>
                    url = ""
                    link_el = (
                        entry.find(f"{{{atom_ns}}}link")
                        or entry.find("link")
                    )
                    if link_el is not None:
                        url = link_el.get("href", "") or (link_el.text or "").strip()
                    if title:
                        items.append(
                            {"title": title, "summary": summary, "url": url, "source": source}
                        )
            else:
                # ── RSS 2.0 ──────────────────────────────────────────────────
                for item in root.iter("item"):
                    title_el = item.find("title")
                    desc_el = item.find("description")
                    title = (
                        title_el.text.strip()
                        if title_el is not None and title_el.text
                        else ""
                    )
                    summary = (
                        desc_el.text.strip()
                        if desc_el is not None and desc_el.text
                        else ""
                    )
                    # Strip HTML / CDATA artefacts
                    summary = re.sub(r"<[^>]+>", " ", summary).strip()
                    summary = re.sub(r"\s{2,}", " ", summary)
                    # Link — RSS 2.0 stores <link> as bare text node between tags;
                    # ElementTree exposes it either as .text or as the .tail of
                    # the preceding sibling, so we try both approaches.
                    url = ""
                    link_el = item.find("link")
                    if link_el is not None:
                        url = (link_el.text or "").strip()
                    if not url:
                        # Fallback: scan raw XML for <link>…</link>
                        m_link = re.search(r"<link>\s*(https?://[^\s<]+)\s*</link>",
                                           ET.tostring(item, encoding="unicode"))
                        if m_link:
                            url = m_link.group(1)
                    if not url:
                        # Last resort: <guid isPermaLink="true">
                        guid_el = item.find("guid")
                        if guid_el is not None and guid_el.get("isPermaLink", "true") != "false":
                            url = (guid_el.text or "").strip()
                    if title:
                        items.append(
                            {"title": title, "summary": summary, "url": url, "source": source}
                        )

        except ET.ParseError as e:
            log.warning("NewsFeedPlugin RSS parse error: %s", e)

        return items


PLUGIN_CLASS = NewsFeedPlugin
