"""Semantik retriever — query embed + cosine top-k. Query embedding Redis cache'li."""
import hashlib
import json

from fastapi import Depends

from app.cache import cache
from app.repositories.knowledge_repo import KnowledgeRepository
from app.services.rag.embedding_client import EmbeddingClient, get_embedding_client
from app.utils.cosine import cosine_similarity

_QUERY_EMBED_TTL = 86400  # 24 saat


class KnowledgeRetriever:
    def __init__(
        self,
        repo: KnowledgeRepository = Depends(),
        embedding: EmbeddingClient = Depends(get_embedding_client),
    ):
        self.repo = repo
        self.embedding = embedding

    async def _query_embedding(self, tenant_id: str, query: str) -> list[float]:
        norm = query.lower().strip()
        cache_key = f"flovy:qemb:{tenant_id}:{hashlib.md5(norm.encode()).hexdigest()}"
        cached = await cache.get(cache_key)
        if cached:
            return json.loads(cached)
        vec = await self.embedding.embed(query, task="RETRIEVAL_QUERY")
        await cache.setex(cache_key, _QUERY_EMBED_TTL, json.dumps(vec))
        return vec

    async def top_k(
        self,
        tenant_id: str,
        query: str,
        k: int = 5,
        source_type: str | None = None,
    ) -> list[dict]:
        query_vec = await self._query_embedding(tenant_id, query)
        chunks = await self.repo.list_chunks_for_search(tenant_id, source_type)

        scored = [
            {
                "chunk_id": c.id,
                "source_id": c.source_id,
                "source_type": c.source_type,
                "content": c.content,
                "meta": c.meta,
                "score": cosine_similarity(query_vec, c.embedding),
            }
            for c in chunks
            if c.embedding
        ]
        scored.sort(key=lambda x: x["score"], reverse=True)
        return scored[:k]
