"""HiveMind Node — the core unit that ties everything together."""
from __future__ import annotations

import asyncio
import logging
import uuid
from pathlib import Path

from hivemind import __version__
from hivemind.config import Config
from hivemind.model import Model, FastModel
from hivemind.cache import SemanticCache
from hivemind.plugins import PluginManager
from hivemind.confidence import ConfidenceScorer, should_ask_network
from hivemind.rag import RAGStore
from hivemind.training import TrainingManager
from hivemind.sessions import SessionManager
from hivemind.memory import GlobalMemory
from hivemind.topic import extract_fast, extract_with_model, score_peer

log = logging.getLogger(__name__)

NETWORK_QUERY_TIMEOUT = 15  # seconds


class Node:
    """A single HiveMind node — local AI + cache + plugins + P2P."""

    def __init__(self, config: Config, base_dir: Path | None = None):
        self.config = config
        self.base_dir = base_dir or Path(".")
        self.id = str(uuid.uuid4())[:8]
        self.name = config.node.name or f"node-{self.id}"
        self.version = __version__

        # Core components
        self.model = Model(config.model)
        self.fast_model = FastModel(config.fast_model)
        self.cache = SemanticCache(config.cache, cache_dir=self.base_dir / "cache")
        self.plugins = PluginManager(node=self)
        self.network = None

        # Confidence scorer
        self.scorer = ConfidenceScorer(
            expertise_tags=config.node.expertise_tags,
            specialization=config.node.specialization,
        )

        # RAG store
        self.rag = RAGStore(
            data_dir=self.base_dir / config.rag.data_dir,
            chunk_size=config.rag.chunk_size,
        )

        # Training manager
        self.training = TrainingManager(
            data_dir=self.base_dir / config.training.data_dir,
            lora_path=config.training.lora_path,
        )

        # Session manager
        self.sessions = SessionManager(
            data_dir=self.base_dir / "data" / "sessions",
        )

        # Global memory
        self.memory = GlobalMemory(
            data_dir=self.base_dir / "data",
        )

        # Conversation history (points to active session)
        self.max_history = 50

        # Relay & Updates
        self.relay = None
        self.updater = None

        self._running = False
        # query_id -> {"future": Future, "responses": list[tuple[str, float]]}
        self._pending_queries: dict[str, dict] = {}

    async def start(self) -> None:
        """Initialize the node — load model, plugins, and network."""
        log.info("Starting node: %s (%s) v%s", self.name, self.id, self.version)

        # Load model if configured
        if self.config.model.path:
            try:
                self.model.load()
                # Load LoRA adapter if configured
                if self.config.training.lora_path:
                    try:
                        self.model.load_lora(self.config.training.lora_path)
                    except Exception as e:
                        log.warning("LoRA load failed: %s", e)
            except FileNotFoundError as e:
                log.error("Model not found: %s", e)
                log.info("Node will run without local model (cache + plugins only)")
        else:
            log.info("No model configured — running in plugin-only mode")

        # Load fast model if configured
        if self.config.fast_model.path:
            try:
                self.fast_model.load()
                log.info("Fast model ready: %s", self.config.fast_model.path)
            except FileNotFoundError as e:
                log.warning("Fast model not found — two-phase chat disabled: %s", e)
            except Exception as e:
                log.warning("Fast model load error — two-phase chat disabled: %s", e)

        # Load plugins
        await self.plugins.load(
            self.config.plugins.get("enabled", []),
            self.config.plugins.get("directory", "./plugins"),
        )

        # Start P2P network if enabled
        if self.config.network.enabled:
            from hivemind.network.peerlist import PeerList
            from hivemind.network.peer import P2PNetwork
            from hivemind.network.updater import AutoUpdater

            peer_list = PeerList(
                path=self.base_dir / "peers.json",
                own_id=self.id,
            )

            for addr in self.config.network.bootstrap_nodes:
                parts = addr.split(":")
                if len(parts) == 2:
                    peer_list.add_manual(parts[0], int(parts[1]))

            self.network = P2PNetwork(
                node=self,
                peer_list=peer_list,
                listen_port=self.config.network.listen_port,
            )

            # Setup auto-updater
            self.updater = AutoUpdater(self, self.base_dir)
            self.updater.register_handlers(self.network)

            # Register query handler
            from hivemind.network.protocol import MsgType
            self.network.on_message(MsgType.QUERY, self._handle_network_query)
            self.network.on_message(MsgType.RESPONSE, self._handle_network_response)

            await self.network.start()
            log.info("P2P network started on port %d", self.config.network.listen_port)

        # Start relay client if relay servers configured
        relay_urls = getattr(self.config.network, 'relay_servers', []) or []
        if relay_urls:
            from hivemind.network.relay_client import RelayConnection
            self.relay = RelayConnection(
                relay_url=relay_urls[0],
                node=self,
                on_message=self._handle_relay_message,
                on_peers=self._handle_relay_peers,
            )
            await self.relay.start()
            log.info("Relay client started: %s", relay_urls[0])

        self._running = True
        spec = f" | Spec: {self.config.node.specialization}" if self.config.node.specialization else ""
        rag_info = f" | RAG: {self.rag.stats['documents']} docs" if self.rag.stats['documents'] else ""
        log.info(
            "Node ready: %s | Model: %s | Plugins: %s | Cache: %d%s%s",
            self.name,
            "loaded" if self.model.loaded else "none",
            ", ".join(self.plugins.loaded) or "none",
            self.cache.size,
            spec,
            rag_info,
        )

    async def chat(self, user_message: str, session_id: str = "") -> tuple[str, dict]:
        """Process a user message with confidence-based routing.

        Args:
            user_message: The user's input text.
            session_id:   Optional external session id (e.g. Telegram per-user
                          session).  When provided the active Dashboard session
                          is left untouched — messages are read/written only to
                          *session_id*.  When omitted the active session is used
                          as before.

        Flow:
        1. Check cache
        2. Build RAG context if available
        3. Generate local response
        4. Score confidence
        5. If low confidence + network available → ask peers
        6. Pick best response
        7. Save to cache + training data
        """
        # 1. Cache lookup
        cached = self.cache.lookup(user_message)
        if cached:
            log.info("Cache hit for: %s", user_message[:50])
            return cached, {
                "source": "cache", "source_name": "Cache", "source_id": "",
                "source_specialization": "", "confidence": 1.0,
                "routing": "cache", "topics": extract_fast(user_message, max_keywords=5),
            }

        # 2. Add to session history + extract memory facts
        # If a specific session_id is given (e.g. from Telegram) we write only
        # to that session without disturbing the active Dashboard session.
        if session_id:
            self.sessions.get_or_create(session_id, session_id)  # ensure exists
            self.sessions.add_message_to(session_id, "user", user_message)
        else:
            self.sessions.add_message("user", user_message)
        self.memory.process_message("user", user_message)
        history = self.sessions.get_history(session_id)
        if len(history) > self.max_history:
            history = history[-self.max_history:]

        # 3. Build messages with RAG + plugin-awareness context
        awareness_ctx = await self._plugin_awareness_context(user_message)
        messages = self._build_messages(user_message, extra_context=awareness_ctx, session_id=session_id)

        # 4. Generate local response
        local_response = await self._generate_local(messages)

        # 5. Score confidence
        confidence = self.scorer.score(user_message, local_response)
        routing = should_ask_network(confidence)
        log.info("Confidence: %.2f → routing: %s", confidence, routing)

        # 5b. Routing meta
        topics = extract_fast(user_message, max_keywords=5)
        meta = {
            "source": "local",
            "source_name": self.name,
            "source_id": self.id,
            "source_specialization": self.config.node.specialization,
            "confidence": round(confidence, 2),
            "routing": routing,
            "topics": topics,
        }

        # 6. Network routing
        best_response = local_response
        has_peers = (self.network and self.network.connected_count > 0) or (self.relay and self.relay.connected)
        if routing in ("both", "network") and has_peers:
            network_response = await self._ask_network(user_message)
            if network_response:
                net_text, net_confidence, net_name, net_spec = network_response
                if routing == "network" or net_confidence > confidence:
                    best_response = net_text
                    meta.update({
                        "source": "network",
                        "source_name": net_name or "Unbekannter Node",
                        "source_id": "",
                        "source_specialization": net_spec,
                        "confidence": round(net_confidence, 2),
                    })
                    log.info("Using network response (confidence: %.2f vs local %.2f)",
                             net_confidence, confidence)

        # 6b. Check for PDF export request
        pdf_plugin = self.plugins.get("pdf_export")
        if pdf_plugin:
            try:
                history_for_pdf = self.sessions.get_history() + [
                    {"role": "user", "content": user_message},
                    {"role": "assistant", "content": best_response},
                ]
                pdf_result = await pdf_plugin.capabilities[0].handler(messages=history_for_pdf)
                if pdf_result:
                    best_response = best_response + "\n\n" + pdf_result
            except Exception as e:
                log.debug("PDF plugin error: %s", e)

        # 6c. Link finder — direkt an Antwort anhängen wenn Link-Anfrage erkannt
        # (nicht im System-Prompt, da kleine Modelle URLs nicht zuverlässig reproduzieren)
        link_plugin = self.plugins.get("link_finder")
        if link_plugin:
            try:
                if await link_plugin.is_link_query(user_message):
                    link_result = await link_plugin.find_links(user_message)
                    if link_result:
                        best_response = best_response + "\n\n---\n" + link_result
            except Exception as e:
                log.debug("link_finder error: %s", e)

        # 7. Store results
        if session_id:
            self.sessions.add_message_to(session_id, "assistant", best_response)
        else:
            self.sessions.add_message("assistant", best_response)
        self.memory.process_message("assistant", best_response)
        self.cache.store(user_message, best_response)

        # Save conversation for training (every 5 exchanges)
        hist = self.sessions.get_history(session_id)
        if len(hist) >= 10 and len(hist) % 10 == 0:
            self.training.save_conversation(list(hist[-10:]))

        return best_response, meta

    async def chat_streaming(self, user_message: str, session_id: str = ""):
        """Two-phase chat: fast preliminary response, then full response.

        Yields SSE-formatted strings:
          data: {"type": "fast",  "text": "...", "meta": {...}}
          data: {"type": "full",  "text": "...", "meta": {...}}
          data: {"type": "done"}

        If no fast model is configured, only "full" + "done" are yielded.
        The large model always runs — either on the original or on a query
        that was reformulated by the fast model to be more specific.
        """
        import json as _json

        # ── Phase 0: Cache ───────────────────────────────────────────────────
        cached = self.cache.lookup(user_message)
        if cached:
            yield f"data: {_json.dumps({'type': 'full', 'text': cached, 'meta': {'source': 'cache', 'source_name': 'Cache', 'confidence': 1.0}})}\n\n"
            yield "data: {\"type\": \"done\"}\n\n"
            return

        # ── Phase 1: Fast model ──────────────────────────────────────────────
        enhanced_query = user_message  # default: unchanged
        if self.fast_model.loaded:
            try:
                # Run fast model and rephrase in parallel
                loop = asyncio.get_event_loop()
                fast_messages = [
                    {
                        "role": "system",
                        "content": (
                            "Du bist ein kompakter KI-Assistent. "
                            "Gib eine kurze, sofortige Erstantwort (2-4 Sätze). "
                            "Weise darauf hin, dass du gleich eine ausführlichere Antwort lieferst."
                        ),
                    },
                    {"role": "user", "content": user_message},
                ]
                fast_text, enhanced_query = await asyncio.gather(
                    loop.run_in_executor(None, self.fast_model.generate_quick, fast_messages),
                    loop.run_in_executor(None, self.fast_model.rephrase_for_large_model, user_message),
                )
                fast_meta = {
                    "source": "fast_model",
                    "source_name": f"{self.name} (schnell)",
                    "confidence": 0.5,
                    "routing": "local_fast",
                    "topics": extract_fast(user_message, max_keywords=5),
                }
                yield f"data: {_json.dumps({'type': 'fast', 'text': fast_text, 'meta': fast_meta})}\n\n"
                log.debug("Fast response sent; enhanced query: %r", enhanced_query[:80])
            except Exception as e:
                log.warning("Fast model error: %s", e)

        # ── Phase 2: Full response via normal chat() pipeline ────────────────
        try:
            full_text, full_meta = await self.chat(enhanced_query, session_id=session_id)

            # If the query was reformulated, store the original question in history
            # (chat() already stored enhanced_query — fix the last user message)
            if enhanced_query != user_message and session_id:
                history = self.sessions.get_history(session_id)
                for i in range(len(history) - 1, -1, -1):
                    if history[i]["role"] == "user" and history[i]["content"] == enhanced_query:
                        history[i]["content"] = user_message
                        break

            yield f"data: {_json.dumps({'type': 'full', 'text': full_text, 'meta': full_meta})}\n\n"
        except Exception as e:
            log.error("chat_streaming Phase 2 error: %s", e, exc_info=True)
            yield f"data: {_json.dumps({'type': 'full', 'text': 'Fehler bei der Antwortgenerierung. Bitte erneut versuchen.', 'meta': {'source': 'error', 'source_name': 'Fehler', 'confidence': 0.0}})}\n\n"
        finally:
            yield "data: {\"type\": \"done\"}\n\n"

    def _build_messages(
        self, user_message: str, extra_context: str = "", session_id: str = ""
    ) -> list[dict]:
        """Build message list with system prompt, memory, RAG context, and history.

        Args:
            user_message:  The current user input (used for RAG retrieval).
            extra_context: Optional additional context injected at the end of the
                           system prompt (e.g. from datetime/weather/news plugins).
            session_id:    Session whose history to include (empty = active session).
        """
        messages = []

        # System prompt with specialization
        system_parts = ["Du bist ein hilfreicher KI-Assistent."]
        if self.config.node.specialization:
            system_parts.append(
                f"Deine Spezialisierung: {self.config.node.specialization}"
            )
        if self.config.node.expertise_tags:
            system_parts.append(
                f"Deine Expertise: {', '.join(self.config.node.expertise_tags)}"
            )

        # Global memory
        memory_ctx = self.memory.build_context()
        if memory_ctx:
            system_parts.append(f"\n{memory_ctx}")

        # RAG context
        rag_context = self.rag.build_context(
            user_message,
            top_k=self.config.rag.top_k,
        )
        if rag_context:
            system_parts.append(
                f"\nRelevantes Wissen aus deinen Dokumenten:\n{rag_context}"
            )

        # Plugin-awareness context (datetime / weather / news)
        if extra_context:
            system_parts.append(f"\n{extra_context}")

        messages.append({"role": "system", "content": "\n".join(system_parts)})

        # Add conversation history from the correct session
        history = self.sessions.get_history(session_id)
        if len(history) > self.max_history:
            history = history[-self.max_history:]
        messages.extend(history)

        return messages

    async def _generate_local(self, messages: list[dict]) -> str:
        """Generate response using local model or plugins."""
        chat_plugin = self.plugins.get("chat")
        if chat_plugin:
            return await chat_plugin.capabilities[0].handler(messages=messages)
        elif self.model.loaded:
            loop = asyncio.get_event_loop()
            return await loop.run_in_executor(None, self.model.generate, messages)
        else:
            return (
                "⚠️ Kein Modell geladen und kein Chat-Plugin verfügbar.\n"
                "Konfiguriere model.path in config.yaml oder installiere ein Plugin."
            )

    async def _datetime_context_string(self) -> str:
        """Return the current date/time as a single line for the system prompt.
        Used for network/relay query handlers where speed matters (no HTTP calls)."""
        dt_plugin = self.plugins.get("datetime_info")
        if dt_plugin:
            try:
                return await dt_plugin.context_string()
            except Exception as e:
                log.debug("datetime_info context error: %s", e)
        return ""

    async def _plugin_awareness_context(self, user_message: str) -> str:
        """Collect live-awareness context from datetime/weather/news plugins.

        Always injects current date & time.  Weather and news are only fetched
        when the user's message appears to ask about them — keeping latency low
        for unrelated queries.
        """
        import re as _re

        parts: list[str] = []

        # 1. Date/time — always, instant (stdlib only)
        dt_str = await self._datetime_context_string()
        if dt_str:
            parts.append(dt_str)

        # 2. Weather — only if query is weather-related
        wx_plugin = self.plugins.get("weather")
        if wx_plugin:
            try:
                if await wx_plugin.is_weather_query(user_message):
                    # Try to extract a location ("in Berlin", "für München", …)
                    location = "auto"
                    m = _re.search(
                        r"\b(?:in|für|for|at|near)\s+([A-ZÄÖÜ][a-zäöüA-ZÄÖÜ\-]+(?:\s+[A-ZÄÖÜ][a-zäöüA-ZÄÖÜ\-]+)?)",
                        user_message,
                    )
                    if m:
                        location = m.group(1)
                    parts.append(await wx_plugin.get_weather(location=location))
            except Exception as e:
                log.debug("weather context error: %s", e)

        # 3. News — only if query is news-related
        news_plugin = self.plugins.get("news_feed")
        if news_plugin:
            try:
                if await news_plugin.is_news_query(user_message):
                    # Try to extract a topic keyword ("über KI", "about Ukraine", …)
                    topic = ""
                    m = _re.search(
                        r"\b(?:über|about|zu|von)\s+(\w+)",
                        user_message.lower(),
                    )
                    if m:
                        topic = m.group(1)
                    parts.append(await news_plugin.get_headlines(topic=topic))
            except Exception as e:
                log.debug("news_feed context error: %s", e)

        return "\n\n".join(parts)

    async def _ask_network(self, query: str) -> tuple[str, float] | None:
        """Ask the P2P network for a response, routing to topic-matching peers.

        Routing strategy:
        1. Extract topics: fast regex first, then model refinement (async, 5s budget)
        2. Score all online peers against topics via their specialization/expertise_tags
        3. Send QUERY only to top-3 matching peers (targeted) — or broadcast if no
           specialised peers are known
        4. Collect all responses within NETWORK_QUERY_TIMEOUT seconds
        5. Return the response with the highest confidence score
        """
        if not self.network:
            return None

        from hivemind.network.protocol import Message, MsgType
        import uuid as _uuid

        # ── 1. Topic extraction ──────────────────────────────────────
        topics = extract_fast(query)  # immediate, always works
        if self.model.loaded:
            try:
                # Model refinement — tight 5s budget so routing stays snappy
                topics = await extract_with_model(query, self.model, max_keywords=6)
            except Exception:
                pass  # already have fast topics as fallback
        log.info("Query topics: %s", topics)

        # ── 2. Peer selection ────────────────────────────────────────
        MAX_TARGETS = 3
        matched = self.network.peers.find_by_topics(topics)  # [(PeerInfo, score), ...]
        top_peers = matched[:MAX_TARGETS]

        if top_peers:
            best_score = top_peers[0][1]
            log.info(
                "Targeting %d peer(s) — best match score: %.2f  (%s)",
                len(top_peers), best_score,
                ", ".join(p.name or p.node_id for p, _ in top_peers),
            )
        else:
            log.info("No specialised peers found — falling back to broadcast")

        # ── 3. Send query ────────────────────────────────────────────
        query_id = str(_uuid.uuid4())[:8]
        entry: dict = {"future": asyncio.get_event_loop().create_future(),
                       "responses": []}
        self._pending_queries[query_id] = entry

        msg = Message(
            type=MsgType.QUERY,
            sender_id=self.id,
            payload={
                "query_id": query_id,
                "query": query,
                "topics": topics,                      # NEW: topics for receiver-side filtering
                "expertise_wanted": self.config.node.expertise_tags,
            },
        )

        sent = 0
        if top_peers and self.network:
            # Targeted: only send to matching peers
            for peer_info, _score in top_peers:
                ok = await self.network.send_to(peer_info.address, msg)
                if ok:
                    sent += 1
            # If targeted sends all failed (peers went offline), fall through to broadcast
            if sent == 0:
                log.info("Targeted sends failed — broadcasting")
                sent = await self.network.broadcast(msg)
        else:
            # No topic-matched peers — broadcast to everyone
            sent = await self.network.broadcast(msg) if self.network else 0

        # Also send via relay (always broadcast there — relay peers are pre-filtered online)
        if self.relay and self.relay.connected:
            relay_ok = await self.relay.broadcast({
                "type": "QUERY",
                "query_id": query_id,
                "query": query,
                "topics": topics,
                "expertise_wanted": self.config.node.expertise_tags,
            })
            if relay_ok:
                sent += 1

        log.info("Query sent to %d channel(s) (id: %s)", sent, query_id)

        if sent == 0:
            del self._pending_queries[query_id]
            return None

        # ── 4. Collect responses ─────────────────────────────────────
        try:
            # Wait for a high-confidence answer OR until timeout
            result = await asyncio.wait_for(
                asyncio.shield(entry["future"]),
                timeout=NETWORK_QUERY_TIMEOUT,
            )
            return result
        except asyncio.TimeoutError:
            # Timeout: pick the best from whatever arrived
            responses: list[tuple[str, float]] = entry["responses"]
            if responses:
                best = max(responses, key=lambda r: r[1])
                log.info(
                    "Network timeout — picked best of %d response(s), confidence %.2f",
                    len(responses), best[1],
                )
                return best
            log.info("Network query timeout with no responses (id: %s)", query_id)
            return None
        finally:
            self._pending_queries.pop(query_id, None)

    async def _handle_network_query(self, conn, msg):
        """Handle incoming query from a peer."""
        from hivemind.network.protocol import Message, MsgType

        query = msg.payload.get("query", "")
        query_id = msg.payload.get("query_id", "")
        incoming_topics: list[str] = msg.payload.get("topics", [])

        if not query or not self.model.loaded:
            return

        # ── Receiver-side topic filter ───────────────────────────────
        # If the sender included topics AND we have a declared specialization,
        # skip answering if the match score is very low (below 0.1).
        # This avoids generating irrelevant answers and saves resources.
        my_spec = self.config.node.specialization
        my_tags = self.config.node.expertise_tags
        if incoming_topics and (my_spec or my_tags):
            my_score = score_peer(incoming_topics, my_spec, my_tags)
            if my_score < 0.1:
                log.debug(
                    "Skipping query (topic mismatch, score=%.2f): %s",
                    my_score, query[:60],
                )
                return

        # Generate local response (inject datetime so the node knows "now")
        dt_ctx = await self._datetime_context_string()
        messages = self._build_messages(query, extra_context=dt_ctx)
        response = await self._generate_local(messages)
        confidence = self.scorer.score(query, response)

        # Send response back
        await conn.send(Message(
            type=MsgType.RESPONSE,
            sender_id=self.id,
            payload={
                "query_id": query_id,
                "response": response,
                "confidence": confidence,
                "specialization": self.config.node.specialization,
                "node_name": self.name,
            },
        ))

    async def _handle_network_response(self, conn, msg):
        """Handle response from a peer to our query."""
        query_id = msg.payload.get("query_id", "")
        response = msg.payload.get("response", "")
        confidence = msg.payload.get("confidence", 0.0)
        # Sender identity — prefer explicit payload fields, fallback to connection info
        sender_name = msg.payload.get("node_name", "")
        sender_spec = msg.payload.get("specialization", "")
        if not sender_name and conn and conn.peer_info:
            sender_name = conn.peer_info.name or conn.peer_info.node_id
        if not sender_spec and conn and conn.peer_info:
            sender_spec = conn.peer_info.specialization

        entry = self._pending_queries.get(query_id)
        if not entry:
            return

        # Accumulate all responses as 4-tuples: (text, confidence, name, specialization)
        entry["responses"].append((response, confidence, sender_name, sender_spec))
        future: asyncio.Future = entry["future"]

        if not future.done():
            if confidence >= 0.85:
                log.info("Network: high-confidence response (%.2f) from '%s' — resolving immediately",
                         confidence, sender_name or '?')
                future.set_result((response, confidence, sender_name, sender_spec))
            elif len(entry["responses"]) >= 3:
                best = max(entry["responses"], key=lambda r: r[1])
                log.info(
                    "Network: collected 3 responses — picking best (confidence %.2f, source '%s')",
                    best[1], best[2] or '?',
                )
                future.set_result(best)

    async def _handle_relay_peers(self, nodes: list[dict]):
        """Try direct P2P connections to nodes discovered via relay."""
        if not self.network:
            return
        for n in nodes:
            info = n.get("info", n)
            host = info.get("host", "")
            port = info.get("port", 0)
            node_id = n.get("node_id", info.get("node_id", ""))
            if not host or host == "0.0.0.0" or not port or node_id == self.id:
                continue
            # Don't retry if already connected
            from hivemind.network.protocol import PeerInfo
            addr = PeerInfo.format_address(host, port)
            if addr in self.network._connections and self.network._connections[addr].alive:
                continue
            log.info("Relay: trying direct P2P to %s (%s)", n.get("name", "?"), PeerInfo.format_address(host, port))
            asyncio.create_task(self.network.connect_to(host, port))

    async def _handle_relay_message(self, sender_id: str, sender_name: str, payload: dict):
        """Handle a message received via relay."""
        msg_type = payload.get("type", "")

        if msg_type == "QUERY":
            # Someone asks us a question via relay
            query = payload.get("query", "")
            query_id = payload.get("query_id", "")
            if not query or not self.model.loaded:
                return
            dt_ctx = await self._datetime_context_string()
            messages = self._build_messages(query, extra_context=dt_ctx)
            response = await self._generate_local(messages)
            confidence = self.scorer.score(query, response)
            if self.relay:
                await self.relay.send_to(sender_id, {
                    "type": "RESPONSE",
                    "query_id": query_id,
                    "response": response,
                    "confidence": confidence,
                    "specialization": self.config.node.specialization,
                    "node_name": self.name,
                })

        elif msg_type == "RESPONSE":
            # Response to our query via relay
            query_id = payload.get("query_id", "")
            response = payload.get("response", "")
            confidence = payload.get("confidence", 0.0)
            relay_node_name = payload.get("node_name", sender_name)  # sender_name from relay envelope
            relay_node_spec = payload.get("specialization", "")
            entry = self._pending_queries.get(query_id)
            if entry:
                entry["responses"].append((response, confidence, relay_node_name, relay_node_spec))
                future: asyncio.Future = entry["future"]
                if not future.done():
                    if confidence >= 0.85:
                        future.set_result((response, confidence, relay_node_name, relay_node_spec))
                    elif len(entry["responses"]) >= 3:
                        best = max(entry["responses"], key=lambda r: r[1])
                        future.set_result(best)

        elif msg_type == "UPDATE_DATA":
            # Update received via relay — delegate to updater
            if self.updater:
                from hivemind.network.protocol import Message, MsgType
                fake_msg = Message(
                    type=MsgType.UPDATE_DATA,
                    sender_id=sender_id,
                    payload={
                        "manifest": payload.get("manifest", {}),
                        "data_b64": payload.get("data_b64", ""),
                    },
                )
                await self.updater._handle_update_data(None, fake_msg)

    async def stop(self) -> None:
        """Shutdown the node gracefully."""
        log.info("Stopping node: %s", self.name)

        # Save remaining conversation data
        hist = self.sessions.get_history()
        if len(hist) >= 4:
            self.training.save_conversation(list(hist))
        self.sessions.save_active()

        if self.relay:
            await self.relay.stop()
        if self.network:
            await self.network.stop()
        await self.plugins.shutdown_all()
        self._running = False

    @property
    def status(self) -> dict:
        s = {
            "id": self.id,
            "name": self.name,
            "version": self.version,
            "model_loaded": self.model.loaded,
            "plugins": self.plugins.loaded,
            "cache_size": self.cache.size,
            "history_length": len(self.sessions.get_history()),
            "session": self.sessions.active.summary_info,
            "running": self._running,
            "specialization": self.config.node.specialization,
            "expertise_tags": self.config.node.expertise_tags,
            "rag": self.rag.stats,
            "training": self.training.stats,
            "memory": self.memory.stats,
        }
        if self.network:
            s["network"] = self.network.status
        if self.relay:
            s["relay"] = self.relay.status
        return s
