"""Semantic cache — stores and retrieves responses for similar queries."""
from __future__ import annotations

import json
import hashlib
import logging
import time
from pathlib import Path
from dataclasses import dataclass, asdict

from hivemind.config import CacheConfig

log = logging.getLogger(__name__)


@dataclass
class CacheEntry:
    query: str
    response: str
    query_hash: str
    timestamp: float
    hits: int = 0


class SemanticCache:
    """Simple semantic cache with exact-match and hash-based lookup.
    
    Phase 1: Exact hash matching (fast, no embeddings needed).
    Phase 3+: Will add embedding-based similarity matching.
    """

    def __init__(self, config: CacheConfig, cache_dir: Path | None = None):
        self.config = config
        self.cache_dir = cache_dir or Path("cache")
        self._entries: dict[str, CacheEntry] = {}
        self._load()

    def _hash(self, text: str) -> str:
        """Normalize and hash a query for lookup."""
        normalized = text.strip().lower()
        return hashlib.sha256(normalized.encode()).hexdigest()[:16]

    def lookup(self, query: str) -> str | None:
        """Check cache for a matching query. Returns response or None."""
        if not self.config.enabled:
            return None

        h = self._hash(query)
        entry = self._entries.get(h)
        if entry:
            entry.hits += 1
            log.debug("Cache hit: %s (hits=%d)", h, entry.hits)
            return entry.response
        return None

    def store(self, query: str, response: str) -> None:
        """Store a query-response pair in cache."""
        if not self.config.enabled:
            return

        h = self._hash(query)
        self._entries[h] = CacheEntry(
            query=query,
            response=response,
            query_hash=h,
            timestamp=time.time(),
        )

        # Evict oldest if over limit
        if len(self._entries) > self.config.max_entries:
            oldest_key = min(self._entries, key=lambda k: self._entries[k].timestamp)
            del self._entries[oldest_key]

        self._save()

    def _load(self) -> None:
        """Load cache from disk."""
        cache_file = self.cache_dir / "cache.json"
        if cache_file.exists():
            try:
                # Versuche zunächst utf-8; Fallback auf latin-1 für Caches die
                # mit der alten cp1252-Codierung gespeichert wurden.
                data = None
                last_enc = "utf-8"
                for enc in ("utf-8", "latin-1"):
                    try:
                        with open(cache_file, encoding=enc) as f:
                            data = json.load(f)
                        last_enc = enc
                        break
                    except (UnicodeDecodeError, UnicodeError):
                        continue
                    except json.JSONDecodeError as je:
                        log.warning(
                            "Cache-Datei korrumpiert (%s) — wird zurückgesetzt: %s", enc, je
                        )
                        try:
                            cache_file.unlink()
                        except OSError:
                            pass
                        data = None
                        break
                if data is None and cache_file.exists():
                    raise ValueError("Cache-Datei konnte nicht gelesen werden")
                if data:
                    for h, entry_data in data.items():
                        self._entries[h] = CacheEntry(**entry_data)
                    log.info("Cache loaded: %d entries", len(self._entries))
                    # Sofort mit utf-8 neu schreiben (Migration)
                    if last_enc != "utf-8":
                        log.info("Cache-Datei war %s-kodiert — wird auf utf-8 migriert", last_enc)
                        self._save()
            except Exception as e:
                log.warning("Failed to load cache: %s", e)

    def _save(self) -> None:
        """Persist cache to disk."""
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        cache_file = self.cache_dir / "cache.json"
        data = {h: asdict(e) for h, e in self._entries.items()}
        with open(cache_file, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False)

    @property
    def size(self) -> int:
        return len(self._entries)
