"""Training data management and LoRA adapter support.

Handles:
- Collecting conversation data for future fine-tuning
- Exporting training data as JSONL
- Loading LoRA adapters into the model
- LoRA metadata for P2P sharing
"""
from __future__ import annotations

import json
import logging
import time
from collections import Counter
from pathlib import Path
from typing import Any

log = logging.getLogger(__name__)


class TrainingManager:
    """Manages training data collection and LoRA adapters."""

    def __init__(self, data_dir: str | Path = "data/training",
                 lora_path: str = ""):
        self.data_dir = Path(data_dir)
        self.conversations_dir = self.data_dir / "conversations"
        self.lora_dir = self.data_dir / "lora"
        self.export_dir = self.data_dir / "exports"
        self.lora_path = lora_path

        for d in [self.data_dir, self.conversations_dir,
                  self.lora_dir, self.export_dir]:
            d.mkdir(parents=True, exist_ok=True)

        self._conversation_count = 0
        self._sample_count = 0
        self._refresh_counts()

    def _refresh_counts(self):
        """Count existing training data."""
        self._conversation_count = len(list(self.conversations_dir.glob("*.jsonl")))
        self._sample_count = 0
        for f in self.conversations_dir.glob("*.jsonl"):
            try:
                self._sample_count += sum(1 for _ in f.open())
            except Exception:
                pass

    def save_conversation(self, messages: list[dict],
                          metadata: dict | None = None) -> str:
        """Save a conversation as training data.

        Args:
            messages: List of {role, content} dicts
            metadata: Optional metadata (topic, quality, etc.)

        Returns:
            Filename of saved conversation
        """
        if len(messages) < 2:
            return ""

        timestamp = int(time.time())
        filename = f"conv_{timestamp}.jsonl"
        filepath = self.conversations_dir / filename

        with open(filepath, "w", encoding="utf-8") as f:
            entry = {
                "messages": messages,
                "timestamp": timestamp,
                "metadata": metadata or {},
            }
            f.write(json.dumps(entry, ensure_ascii=False) + "\n")

        self._conversation_count += 1
        self._sample_count += 1
        log.info("Saved conversation: %s (%d messages)", filename, len(messages))
        return filename

    def export_jsonl(self, format_type: str = "chatml") -> Path:
        """Export all conversations as a single JSONL file for training.

        Args:
            format_type: 'chatml' (OpenAI format) or 'alpaca'

        Returns:
            Path to exported file
        """
        timestamp = int(time.time())
        export_path = self.export_dir / f"training_{timestamp}.jsonl"

        count = 0
        with open(export_path, "w", encoding="utf-8") as out:
            for conv_file in sorted(self.conversations_dir.glob("*.jsonl")):
                try:
                    for line in conv_file.open(encoding="utf-8"):
                        data = json.loads(line.strip())
                        messages = data.get("messages", [])

                        if format_type == "chatml":
                            # OpenAI ChatML format
                            entry = {"messages": messages}
                        else:
                            # Alpaca format: extract instruction/input/output
                            user_msgs = [m["content"] for m in messages
                                         if m.get("role") == "user"]
                            asst_msgs = [m["content"] for m in messages
                                         if m.get("role") == "assistant"]
                            if user_msgs and asst_msgs:
                                entry = {
                                    "instruction": user_msgs[0],
                                    "input": "",
                                    "output": asst_msgs[0],
                                }
                            else:
                                continue

                        out.write(json.dumps(entry, ensure_ascii=False) + "\n")
                        count += 1
                except Exception as e:
                    log.warning("Error processing %s: %s", conv_file, e)

        log.info("Exported %d training samples to %s", count, export_path)
        return export_path

    def get_lora_adapters(self) -> list[dict]:
        """List available LoRA adapters."""
        adapters = []
        for f in self.lora_dir.glob("*.gguf"):
            adapters.append({
                "name": f.stem,
                "path": str(f),
                "size_mb": round(f.stat().st_size / (1024 * 1024), 1),
                "modified": f.stat().st_mtime,
            })
        # Also check configured lora_path
        if self.lora_path:
            lp = Path(self.lora_path)
            if lp.exists() and str(lp) not in [a["path"] for a in adapters]:
                adapters.append({
                    "name": lp.stem,
                    "path": str(lp),
                    "size_mb": round(lp.stat().st_size / (1024 * 1024), 1),
                    "modified": lp.stat().st_mtime,
                    "active": True,
                })
        return adapters

    def save_lora(self, name: str, data: bytes) -> Path:
        """Save an uploaded LoRA adapter."""
        if not name.endswith(".gguf"):
            name += ".gguf"
        path = self.lora_dir / name
        path.write_bytes(data)
        log.info("Saved LoRA adapter: %s (%.1f MB)", name,
                 len(data) / (1024 * 1024))
        return path

    def topic_analysis(self) -> dict:
        """Analyze topics in training data."""
        topics: Counter = Counter()
        total = 0

        for conv_file in self.conversations_dir.glob("*.jsonl"):
            try:
                for line in conv_file.open(encoding="utf-8"):
                    data = json.loads(line.strip())
                    messages = data.get("messages", [])
                    meta = data.get("metadata", {})

                    if "topic" in meta:
                        topics[meta["topic"]] += 1

                    # Simple topic extraction from user messages
                    for m in messages:
                        if m.get("role") == "user":
                            total += 1
                            content = m["content"].lower()
                            # Simple keyword-based topic detection
                            for keyword, topic in _TOPIC_KEYWORDS.items():
                                if keyword in content:
                                    topics[topic] += 1
                                    break
            except Exception:
                pass

        return {
            "total_samples": total,
            "topics": dict(topics.most_common(20)),
        }

    @property
    def stats(self) -> dict:
        self._refresh_counts()
        lora_adapters = self.get_lora_adapters()
        return {
            "conversations": self._conversation_count,
            "samples": self._sample_count,
            "lora_adapters": len(lora_adapters),
            "active_lora": self.lora_path or None,
            "export_count": len(list(self.export_dir.glob("*.jsonl"))),
        }

    def lora_metadata_for_sharing(self) -> dict | None:
        """Get metadata about active LoRA for P2P sharing."""
        if not self.lora_path:
            return None
        lp = Path(self.lora_path)
        if not lp.exists():
            return None
        return {
            "name": lp.stem,
            "size": lp.stat().st_size,
            "modified": lp.stat().st_mtime,
        }


# Simple keyword → topic mapping for analysis
_TOPIC_KEYWORDS = {
    "code": "Programmierung",
    "python": "Programmierung",
    "javascript": "Programmierung",
    "html": "Webentwicklung",
    "css": "Webentwicklung",
    "api": "Programmierung",
    "datenbank": "Datenbanken",
    "sql": "Datenbanken",
    "linux": "System/DevOps",
    "docker": "System/DevOps",
    "rezept": "Kochen",
    "kochen": "Kochen",
    "mathe": "Mathematik",
    "rechne": "Mathematik",
    "übersetze": "Sprache",
    "translate": "Sprache",
    "email": "Kommunikation",
    "brief": "Kommunikation",
    "zusammenfassung": "Text/Analyse",
    "erkläre": "Bildung",
    "explain": "Bildung",
}
