"""Dineo v3 — FastAPI application entrypoint.

Startup order:
1. Strip OTEL env vars — must happen before any import touches OpenTelemetry
2. logfire.configure() — must be first to instrument everything
3. FastAPI app creation
4. Middleware (CORS, SlowAPI)
5. Lifespan (httpx client, DB warmup)
6. Router registration
"""

import os

# ---------------------------------------------------------------------------
# Defense: strip the standard OTel env vars BEFORE importing logfire / otel.
#
# If OTEL_EXPORTER_OTLP_ENDPOINT is in the environment when the OTel SDK is
# first imported, the SDK auto-configures a global exporter to that endpoint,
# sending ALL spans (not just LLM ones) to Langfuse.  We use custom
# LANGFUSE_OTLP_ENDPOINT / LANGFUSE_OTLP_HEADERS env vars and configure the
# exporter programmatically via _LLMFilterExporter.
# ---------------------------------------------------------------------------
for _otel_var in ("OTEL_EXPORTER_OTLP_ENDPOINT", "OTEL_EXPORTER_OTLP_HEADERS"):
    os.environ.pop(_otel_var, None)

import asyncio  # noqa: E402
from collections.abc import AsyncGenerator, Sequence  # noqa: E402
from contextlib import asynccontextmanager, suppress  # noqa: E402
from typing import Any, override  # noqa: E402
from urllib.parse import urlparse, urlunparse  # noqa: E402

import httpx  # noqa: E402
import logfire  # noqa: E402
from fastapi import FastAPI, HTTPException, Request, status
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from slowapi import Limiter, _rate_limit_exceeded_handler
from slowapi.errors import RateLimitExceeded
from slowapi.middleware import SlowAPIMiddleware
from slowapi.util import get_remote_address

from app.config import get_settings
from app.routers import (  # noqa: E501
    billing,
    chat,
    common_questions,
    customers,
    faq_entries,
    internal_email,
    knowledge,
    menu_items,
    orders,
    public,
    reservations,
    restaurants,
    service_block_overrides,
    service_blocks,
    sse,
    table_combinations,
    tables,
    webhooks,
    whatsapp,
    whatsapp_management,
    zones,
)
from app.routers.notifications import router as notifications_router

settings = get_settings()

from app.dependencies import restate_auth_headers  # noqa: E402

_RESTATE_AUTH_HEADERS: dict[str, str] = restate_auth_headers(settings)

# ---------------------------------------------------------------------------
# Observability: Logfire (full-stack APM) + Langfuse (LLM-only, filtered)
#
# Logfire captures everything: HTTP requests, DB queries, app spans.
# Langfuse receives ONLY LLM/agent spans via a filtered OTel exporter.
# ---------------------------------------------------------------------------

_logfire_token = settings.LOGFIRE_TOKEN or None
_additional_processors: list[Any] = []

# Langfuse as secondary exporter — LLM/agent spans only.
if settings.LANGFUSE_OTLP_ENDPOINT and settings.LANGFUSE_OTLP_HEADERS:
    from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
    from opentelemetry.sdk.trace import ReadableSpan
    from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanExporter, SpanExportResult

    # Attribute prefixes and span-name keywords that identify AI/LLM work.
    # PydanticAI (via logfire.instrument_pydantic_ai) emits gen_ai.* attributes.
    _LLM_ATTR_PREFIXES = ("gen_ai.", "llm.", "ai.", "openai.", "anthropic.")
    _LLM_SPAN_KEYWORDS = (
        "completion",
        "embedding",
        "tool_call",
        "ai_agent",
        "chat",
        "agent",
        "agent_conversation",
    )

    class _LLMFilterExporter(SpanExporter):
        """Forwards only AI/LLM spans to Langfuse; drops all HTTP/DB/app spans."""

        def __init__(self, exporter: SpanExporter) -> None:
            self._inner = exporter

        def _is_llm_span(self, span: ReadableSpan) -> bool:
            attrs = dict(span.attributes or {})
            if any(k.startswith(p) for k in attrs for p in _LLM_ATTR_PREFIXES):
                return True
            name = (span.name or "").lower()
            return any(kw in name for kw in _LLM_SPAN_KEYWORDS)

        @override
        def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
            llm_spans = [s for s in spans if self._is_llm_span(s)]
            if not llm_spans:
                return SpanExportResult.SUCCESS
            return self._inner.export(llm_spans)

        @override
        def shutdown(self) -> None:
            self._inner.shutdown()

        @override
        def force_flush(self, timeout_millis: int = 30000) -> bool:
            return self._inner.force_flush(timeout_millis)

    _headers: dict[str, str] = {}
    for pair in settings.LANGFUSE_OTLP_HEADERS.split(","):
        if "=" in pair:
            k, v = pair.split("=", 1)
            _headers[k.strip()] = v.strip()

    _langfuse_exporter = _LLMFilterExporter(
        OTLPSpanExporter(
            endpoint=f"{settings.LANGFUSE_OTLP_ENDPOINT}/v1/traces",
            headers=_headers,
        )
    )
    _additional_processors.append(BatchSpanProcessor(_langfuse_exporter))

logfire.configure(
    token=_logfire_token,
    service_name="dineo-api",
    additional_span_processors=_additional_processors,
)
logfire.instrument_pydantic_ai()  # gen_ai.* spans → Logfire + Langfuse
# logfire.instrument_asyncpg()  — leave OFF; instrument at the SQLAlchemy layer instead.
logfire.instrument_httpx()

# Trace every query through the SQLAlchemy engine (asyncpg auto-instrumentation stays off).
from app.db.session import engine as _sa_engine

logfire.instrument_sqlalchemy(engine=_sa_engine)

limiter = Limiter(key_func=get_remote_address, storage_uri=settings.REDIS_URL)

_CRITICAL_READINESS_SERVICE_ALIASES: dict[str, tuple[str, ...]] = {
    "ReservationSaga": ("ReservationWorkflow",),
    "PaymentService": ("PaymentService",),
    "CleanupService": ("CleanupService",),
    "CronService": ("CronJob", "CronJobInitiator"),
    "ReminderService": ("ReminderService",),
}

_STARTUP_DEPENDENCY_GROUP = "critical_readiness_services"
_STARTUP_DIAGNOSTIC_DEPENDENCY = "restate_discover"


def _build_restate_discovery_urls() -> list[str]:
    """Return discovery endpoints compatible with old/new Restate versions."""
    ingress_url = settings.RESTATE_INGRESS_URL.rstrip("/")
    candidates = [f"{ingress_url}/discover"]

    parsed = urlparse(ingress_url)
    if parsed.port == 8080:
        admin_netloc = parsed.netloc.rsplit(":", 1)[0] + ":9070"
        admin_url = urlunparse(parsed._replace(netloc=admin_netloc, path="", params="", query=""))
        candidates.append(f"{admin_url.rstrip('/')}/services")

    return candidates


def _extract_discovered_service_names(discover_payload: Any) -> set[str]:
    services: list[Any] = []
    if isinstance(discover_payload, dict):
        direct_services = discover_payload.get("services")
        if isinstance(direct_services, list):
            services.extend(direct_services)

        deployments = discover_payload.get("deployments")
        if isinstance(deployments, list):
            for deployment in deployments:
                if not isinstance(deployment, dict):
                    continue
                deployment_services = deployment.get("services")
                if isinstance(deployment_services, list):
                    services.extend(deployment_services)

    discovered_names: set[str] = set()
    for service in services:
        if not isinstance(service, dict):
            continue
        service_name = service.get("name") or service.get("service") or service.get("serviceName")
        if isinstance(service_name, str) and service_name:
            discovered_names.add(service_name)
    return discovered_names


def _missing_critical_readiness_services(discovered_names: set[str]) -> list[str]:
    missing_services: list[str] = []
    for required_name, discover_names in _CRITICAL_READINESS_SERVICE_ALIASES.items():
        if required_name == "CronService":
            is_present = all(name in discovered_names for name in discover_names)
        else:
            is_present = any(name in discovered_names for name in discover_names)
        if not is_present:
            missing_services.append(required_name)
    return missing_services


async def _discover_restate_service_names(
    client: httpx.AsyncClient,
    *,
    max_attempts: int = 6,
    backoff_seconds: float = 1.0,
) -> tuple[set[str] | None, str, str | None]:
    """Attempt discovery through known endpoints and return names + diagnostics.

    Retries each endpoint up to ``max_attempts`` times with linear backoff to
    absorb Restate's cold-start window. Without this, the first discover call
    fires within milliseconds of FastAPI boot and reliably fails before
    restate-server has finished accepting connections — flooding Logfire with
    transient httpx errors that mask real outages.
    """
    last_error: str | None = None
    candidates = _build_restate_discovery_urls()
    last_url = candidates[0]

    for attempt in range(max_attempts):
        for url in candidates:
            last_url = url
            try:
                response = await client.get(
                    url,
                    headers=_RESTATE_AUTH_HEADERS,
                    timeout=5.0,
                )
                response.raise_for_status()
                return _extract_discovered_service_names(response.json()), url, None
            except (httpx.HTTPError, ValueError) as exc:
                last_error = str(exc)
                continue
        if attempt < max_attempts - 1:
            await asyncio.sleep(backoff_seconds * (attempt + 1))

    return None, last_url, last_error


async def _emit_startup_dependency_diagnostics(client: httpx.AsyncClient) -> None:
    phase = "startup"
    context = "post_cron_registration"
    discovered_names, discover_url, discover_error = await _discover_restate_service_names(client)
    if discovered_names is None:
        logfire.warning(
            "critical_dependency_diagnostics_failed",
            phase=phase,
            context=context,
            dependency=_STARTUP_DIAGNOSTIC_DEPENDENCY,
            affected_service_group=_STARTUP_DEPENDENCY_GROUP,
            url=discover_url,
            error=discover_error,
        )
        return

    missing_services = _missing_critical_readiness_services(discovered_names)
    if missing_services:
        logfire.warning(
            "critical_startup_dependencies_missing",
            phase=phase,
            context=context,
            dependency=_STARTUP_DIAGNOSTIC_DEPENDENCY,
            affected_service_group=_STARTUP_DEPENDENCY_GROUP,
            missing_services=missing_services,
            discovered_services=sorted(discovered_names),
        )


@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
    """Manage shared resources across app lifetime."""
    # Shared httpx client for all Restate proxy calls
    app.state.http_client = httpx.AsyncClient(timeout=30.0)
    logfire.info(
        "dineo_startup",
        environment=settings.ENVIRONMENT,
        logfire_enabled=bool(settings.LOGFIRE_TOKEN),
        langfuse_enabled=bool(settings.LANGFUSE_OTLP_ENDPOINT),
    )
    # Register recurring cron jobs and run discovery diagnostics in the
    # background — these block on Restate Cloud, which may itself be
    # cold-starting. Letting the lifespan return immediately means granian's
    # worker accepts traffic right away, so Scaleway's startup probe sees a
    # responsive container instead of timing out the deploy.
    from app.cron_registration import register_cron_jobs

    cron_task = asyncio.create_task(register_cron_jobs(app.state.http_client))
    diag_task = asyncio.create_task(_emit_startup_dependency_diagnostics(app.state.http_client))
    from app.realtime.pubsub import start_subscriber

    subscriber_task = asyncio.create_task(start_subscriber())

    yield
    for task in (cron_task, diag_task, subscriber_task):
        task.cancel()
    for task in (cron_task, diag_task, subscriber_task):
        with suppress(asyncio.CancelledError, Exception):
            await task
    await app.state.http_client.aclose()
    logfire.info("dineo_shutdown")


app = FastAPI(
    title="Dineo API",
    version="3.0.0",
    description="Dineo restaurant management platform — v3",
    lifespan=lifespan,
)

# Instrument FastAPI after app creation
logfire.instrument_fastapi(app)


# CORS — accept a comma-separated list in FRONTEND_URL so production
# subdomain splits work (e.g. "https://app.dineo.io,https://api.dineo.io" in prod
# and "https://app.dineo.dev,https://api.dineo.dev" on the dev environment).
# Localhost is always permitted for dev round-tripping.
def _resolve_cors_origins(frontend_url: str) -> list[str]:
    """Parse FRONTEND_URL into the CORS allow-list.

    Splits on commas, trims whitespace, drops empty entries, deduplicates
    while preserving first-occurrence order, and appends localhost:3000 when
    missing. Pure function so it can be unit-tested without spinning up the
    FastAPI app (which would pull in the full lifespan + Restate discovery).
    """
    seen: list[str] = []
    for raw in frontend_url.split(","):
        candidate = raw.strip()
        if candidate and candidate not in seen:
            seen.append(candidate)
    if "http://localhost:3000" not in seen:
        seen.append("http://localhost:3000")
    return seen


_cors_origins = _resolve_cors_origins(settings.FRONTEND_URL)
app.add_middleware(
    CORSMiddleware,  # ty: ignore[invalid-argument-type]
    allow_origins=_cors_origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Rate limiting via SlowAPI + Redis
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)  # type: ignore[arg-type]
app.add_middleware(SlowAPIMiddleware)  # ty: ignore[invalid-argument-type]


# --- Exception handlers ---


@app.exception_handler(httpx.ConnectError)
async def httpx_connect_error_handler(request: Request, exc: httpx.ConnectError) -> Any:
    logfire.error("restate_connect_error", url=str(request.url), error=str(exc))
    raise HTTPException(
        status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
        detail="Workflow service unavailable. Please try again.",
    )


# --- Health check (no auth) ---


@app.get("/health", tags=["health"])
async def health_check(request: Request) -> Any:
    client = getattr(request.app.state, "http_client", None)
    if not isinstance(client, httpx.AsyncClient):
        return JSONResponse(
            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
            content={
                "status": "unhealthy",
                "version": "3.0.0",
                "reason": "startup_incomplete",
                "missing_services": list(_CRITICAL_READINESS_SERVICE_ALIASES.keys()),
            },
        )

    discovered_names, discover_url, discover_error = await _discover_restate_service_names(client)
    if discovered_names is None:
        logfire.warning("restate_discover_unavailable", error=discover_error, url=discover_url)
        return JSONResponse(
            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
            content={
                "status": "unhealthy",
                "version": "3.0.0",
                "reason": "discover_unavailable",
                "missing_services": list(_CRITICAL_READINESS_SERVICE_ALIASES.keys()),
            },
        )

    missing_services = _missing_critical_readiness_services(discovered_names)
    if missing_services:
        return JSONResponse(
            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
            content={
                "status": "unhealthy",
                "version": "3.0.0",
                "reason": "critical_services_missing",
                "missing_services": missing_services,
            },
        )

    return {"status": "healthy", "version": "3.0.0"}


# --- API v1 routers ---

API_PREFIX = "/api/v1"

app.include_router(restaurants.router, prefix=API_PREFIX)
app.include_router(reservations.router, prefix=API_PREFIX)
app.include_router(orders.router, prefix=API_PREFIX)
app.include_router(customers.router, prefix=API_PREFIX)
app.include_router(faq_entries.router, prefix=API_PREFIX)
app.include_router(chat.router, prefix=API_PREFIX)
app.include_router(chat.public_router, prefix=API_PREFIX)
app.include_router(common_questions.router, prefix=API_PREFIX)
app.include_router(billing.router, prefix=API_PREFIX)
app.include_router(webhooks.router, prefix=API_PREFIX)
app.include_router(knowledge.router, prefix=API_PREFIX)
app.include_router(service_blocks.router, prefix=API_PREFIX)
app.include_router(service_block_overrides.router, prefix=API_PREFIX)
app.include_router(public.router, prefix=API_PREFIX)
app.include_router(sse.router, prefix=API_PREFIX)
app.include_router(zones.router, prefix=API_PREFIX)
app.include_router(tables.router, prefix=API_PREFIX)
app.include_router(table_combinations.router, prefix=API_PREFIX)
app.include_router(menu_items.router, prefix=API_PREFIX)
app.include_router(notifications_router, prefix=API_PREFIX)
app.include_router(whatsapp.router, prefix=API_PREFIX)
app.include_router(whatsapp_management.router, prefix=API_PREFIX)
app.include_router(internal_email.router)
# bump: force module reload to pick up TableCombinationUpdate merged_x/y
