import uuid
from datetime import datetime
from typing import Any, Literal

from pgvector.sqlalchemy import Vector
from pydantic import model_validator
from sqlalchemy import Column, Text
from sqlalchemy.dialects.postgresql import JSONB
from sqlmodel import Field, SQLModel

from app.db.base import TenantModel


class KnowledgeDocumentBase(SQLModel):
    content: str = Field(sa_column=Column(Text, nullable=False))
    source: str = Field(nullable=False)


class KnowledgeDocument(KnowledgeDocumentBase, TenantModel, table=True):
    __tablename__ = "knowledge_document"

    id: str = Field(
        default_factory=lambda: str(uuid.uuid4()),
        primary_key=True,
    )
    embedding: Any | None = Field(
        default=None,
        sa_column=Column(Vector(1536)),
    )
    metadata_: dict[str, Any] | None = Field(
        default=None,
        sa_column=Column("metadata", JSONB),
    )


class KnowledgeDocumentCreate(KnowledgeDocumentBase):
    embedding: list[float] | None = None
    metadata_: dict[str, Any] | None = None


class KnowledgeDocumentRead(KnowledgeDocumentBase):
    id: str
    restaurant_id: str
    created_at: datetime | None = None
    metadata_: dict[str, Any] | None = None
    menu_item_count: int = 0
    menu_item_ids: list[str] = []
    source_type: str = "document"
    # Derived display fields. The router fills `status` based on `embedding`;
    # everything else is computed from `metadata_` + `content` so the frontend
    # never has to parse content strings or guess at intent.
    title: str = ""
    status: Literal["ready", "processing", "error"] = "ready"
    question_text: str | None = None
    answer: str | None = None
    common_question_id: str | None = None
    question_key: str | None = None
    # Original-file fields. `file_url` resolves to the public S3 URL the UI
    # uses for inline preview; `content_type` decides whether to iframe-embed
    # (PDF), render as an image, or fall back to the parsed markdown body.
    file_url: str | None = None
    content_type: str | None = None
    file_size: int | None = None

    @model_validator(mode="after")
    def _derive_display_fields(self) -> "KnowledgeDocumentRead":
        meta = self.metadata_ or {}

        links = meta.get("menu_item_links", {})
        self.menu_item_ids = links.get("item_ids", [])
        self.menu_item_count = len(self.menu_item_ids)

        meta_source_type = meta.get("source_type")
        if self.source == "faq" or meta_source_type == "faq":
            self.source_type = "faq"
        elif self.source == "common-question" or meta_source_type == "common-question":
            self.source_type = "common-question"

        if self.source_type == "common-question":
            # Stored as `Q: <question>\nA: <answer>` by the common-question sync.
            question_text, answer = _split_qa(self.content)
            self.question_text = question_text
            self.answer = answer
            self.common_question_id = meta.get("common_question_id")
            self.question_key = meta.get("question_key")
            self.title = question_text or ""
        else:
            filename = meta.get("filename")
            if isinstance(filename, str) and filename:
                self.title = filename
            else:
                # Fall back to the first non-empty content line so the row is
                # never blank in the UI.
                first_line = next(
                    (line.strip() for line in self.content.splitlines() if line.strip()),
                    "",
                )
                self.title = first_line[:80]

            self.file_url = meta.get("file_url")
            self.content_type = meta.get("content_type")
            file_size = meta.get("file_size")
            if isinstance(file_size, int):
                self.file_size = file_size

        return self


def _split_qa(content: str) -> tuple[str | None, str | None]:
    """Parse the `Q: ...\\nA: ...` shape persisted by the common-question sync.

    Returns `(question, answer)`. Either may be `None` if the content
    doesn't follow the expected shape (defensive — old rows, manual edits).
    """
    question: str | None = None
    answer: str | None = None
    lines = content.splitlines()
    answer_lines: list[str] = []
    in_answer = False
    for line in lines:
        if not in_answer and line.startswith("Q: "):
            question = line[3:].strip() or None
        elif line.startswith("A: "):
            in_answer = True
            answer_lines.append(line[3:])
        elif in_answer:
            answer_lines.append(line)
    if answer_lines:
        answer = "\n".join(answer_lines).strip() or None
    return question, answer
