"""RAG testleri — chunker/cosine (saf), kaynak CRUD + izolasyon, respx ile index+search."""
import json

import httpx
import pytest
import respx

from app.config import settings
from app.services.rag.knowledge_indexer import chunk_text
from app.utils.cosine import cosine_similarity, top_k_by_cosine
from tests.conftest import unique_email


async def _auth_headers(client) -> dict:
    r = await client.post(
        "/api/auth/register",
        json={"name": "Shop", "email": unique_email(), "password": "secret123"},
    )
    return {"Authorization": f"Bearer {r.json()['data']['token']['access_token']}"}


# ============================================================
# SAF BİRİM TESTLER
# ============================================================


def test_chunk_text_empty():
    assert chunk_text("") == []
    assert chunk_text("   ") == []


def test_chunk_text_paragraphs():
    text = "Birinci paragraf burada.\n\nİkinci paragraf burada."
    chunks = chunk_text(text, max_words=5)
    assert len(chunks) >= 2


def test_chunk_text_long_paragraph_split():
    text = " ".join(["kelime"] * 1000)
    chunks = chunk_text(text, max_words=100, overlap=10)
    assert len(chunks) > 1
    assert all(len(c.split()) <= 100 for c in chunks)


def test_cosine_similarity():
    assert cosine_similarity([1, 0], [1, 0]) == pytest.approx(1.0, abs=1e-5)
    assert cosine_similarity([1, 0], [0, 1]) == pytest.approx(0.0, abs=1e-5)


def test_top_k_by_cosine_ranking():
    q = [1.0, 0.0]
    cands = [("a", [0.0, 1.0]), ("b", [1.0, 0.1]), ("c", [0.9, 0.0])]
    ranked = top_k_by_cosine(q, cands, k=2)
    assert [r[0] for r in ranked] == ["b", "c"] or [r[0] for r in ranked] == ["c", "b"]
    assert ranked[0][1] >= ranked[1][1]


# ============================================================
# KAYNAK CRUD + İZOLASYON
# ============================================================


@pytest.mark.asyncio
async def test_faq_source_crud(client):
    h = await _auth_headers(client)
    r = await client.post(
        "/api/knowledge/sources/faq",
        headers=h,
        json={"title": "Kargo", "content": "Kargo 2 günde teslim edilir."},
    )
    assert r.status_code == 201
    assert r.json()["data"]["source_type"] == "faq"
    assert r.json()["data"]["status"] == "pending"
    sid = r.json()["data"]["id"]

    lst = await client.get("/api/knowledge/sources", headers=h)
    assert any(s["id"] == sid for s in lst.json()["data"])

    d = await client.delete(f"/api/knowledge/sources/{sid}", headers=h)
    assert d.status_code == 204


@pytest.mark.asyncio
async def test_knowledge_tenant_isolation(client):
    h_a = await _auth_headers(client)
    h_b = await _auth_headers(client)
    sid = (await client.post(
        "/api/knowledge/sources/manual",
        headers=h_a,
        json={"title": "Gizli", "content": "İç bilgi"},
    )).json()["data"]["id"]

    # B silemez → 404
    assert (await client.delete(f"/api/knowledge/sources/{sid}", headers=h_b)).status_code == 404
    assert (await client.get("/api/knowledge/sources", headers=h_b)).json()["data"] == []


# ============================================================
# INDEX + SEARCH (respx ile Gemini mock)
# ============================================================

_VOCAB = ["kargo", "teslimat", "gün", "iade", "ücret", "para"]


def _embed_side_effect(request: httpx.Request) -> httpx.Response:
    body = json.loads(request.content)
    text = body["content"]["parts"][0]["text"].lower()
    vec = [float(text.count(w)) for w in _VOCAB]
    if sum(vec) == 0:
        vec = [0.01] * len(_VOCAB)
    return httpx.Response(200, json={"embedding": {"values": vec}})


@pytest.mark.asyncio
@respx.mock
async def test_index_and_search_ranking(client, monkeypatch):
    monkeypatch.setattr(settings, "gemini_api_key", "test-key")
    respx.post(url__regex=r".*:embedContent").mock(side_effect=_embed_side_effect)

    from app.tasks.index_knowledge import _index_source

    h = await _auth_headers(client)
    s1 = (await client.post(
        "/api/knowledge/sources/faq",
        headers=h,
        json={"title": "Kargo", "content": "Kargo teslimat 2 gün içinde yapılır."},
    )).json()["data"]
    s2 = (await client.post(
        "/api/knowledge/sources/faq",
        headers=h,
        json={"title": "İade", "content": "İade ücret para iadesi 14 gün içinde."},
    )).json()["data"]

    # worker simülasyonu — broker kapalı, indexer'ı doğrudan çalıştır
    tenant_id = (await client.get("/api/auth/me", headers=h)).json()["data"]["id"]
    n1 = await _index_source(s1["id"], tenant_id)
    n2 = await _index_source(s2["id"], tenant_id)
    assert n1 >= 1 and n2 >= 1

    # kargo sorgusu → kargo FAQ üstte
    res = await client.get("/api/knowledge/search?q=kargo kaç günde gelir", headers=h)
    assert res.status_code == 200
    results = res.json()["data"]
    assert len(results) >= 1
    assert results[0]["source_id"] == s1["id"]
    assert results[0]["score"] >= results[-1]["score"]


@pytest.mark.asyncio
async def test_search_without_api_key_503(client):
    h = await _auth_headers(client)
    # gemini_api_key boş (default) → embedding 503
    res = await client.get("/api/knowledge/search?q=test", headers=h)
    assert res.status_code == 503
    assert res.json()["error"]["code"] in ("AI_NOT_CONFIGURED", "AI_ERROR")
