"""Surveillance santé et métriques performance du service vision."""

import time
import logging
import statistics
from typing import List, Dict, Any, Optional
from collections import deque
from dataclasses import dataclass

logger = logging.getLogger(__name__)


@dataclass
class HealthStatus:
    """État de santé du service."""

    ok: bool
    fps_capture: float
    fps_pipeline: float
    latency_p50: float
    latency_p95: float
    timestamp: float
    details: Dict[str, Any]


class PerformanceTracker:
    """Tracker de performance temps réel."""

    def __init__(self, window_size: int = 100):
        self.window_size = window_size

        # Métriques de timing
        self.capture_times: deque = deque(maxlen=window_size)
        self.pipeline_times: deque = deque(maxlen=window_size)
        self.latencies: deque = deque(maxlen=window_size)

        # Compteurs
        self.frame_count = 0
        self.pipeline_count = 0
        self.error_count = 0

        # Timestamps
        self.start_time = time.time()
        self.last_capture_time = 0.0
        self.last_pipeline_time = 0.0

    def record_frame_capture(self, timestamp: Optional[float] = None) -> None:
        """Enregistre capture d'une frame."""
        if timestamp is None:
            timestamp = time.time()

        if self.last_capture_time > 0:
            delta = timestamp - self.last_capture_time
            if delta > 0:
                self.capture_times.append(delta)

        self.last_capture_time = timestamp
        self.frame_count += 1

    def record_pipeline_processing(
        self, start_time: float, end_time: Optional[float] = None
    ) -> None:
        """Enregistre traitement pipeline complet."""
        if end_time is None:
            end_time = time.time()

        processing_time = end_time - start_time
        latency = end_time - start_time  # Latence = temps de traitement

        if self.last_pipeline_time > 0:
            delta = end_time - self.last_pipeline_time
            if delta > 0:
                self.pipeline_times.append(delta)

        self.latencies.append(latency)
        self.last_pipeline_time = end_time
        self.pipeline_count += 1

    def record_error(self) -> None:
        """Enregistre une erreur."""
        self.error_count += 1

    def get_fps_capture(self) -> float:
        """Calcule FPS de capture."""
        if len(self.capture_times) < 2:
            return 0.0

        avg_delta = statistics.mean(self.capture_times)
        return 1.0 / avg_delta if avg_delta > 0 else 0.0

    def get_fps_pipeline(self) -> float:
        """Calcule FPS du pipeline."""
        if len(self.pipeline_times) < 2:
            return 0.0

        avg_delta = statistics.mean(self.pipeline_times)
        return 1.0 / avg_delta if avg_delta > 0 else 0.0

    def get_latency_percentiles(self) -> tuple[float, float]:
        """Calcule percentiles de latence (P50, P95)."""
        if len(self.latencies) < 2:
            return 0.0, 0.0

        sorted_latencies = sorted(self.latencies)
        n = len(sorted_latencies)

        p50_idx = int(n * 0.5)
        p95_idx = int(n * 0.95)

        p50 = sorted_latencies[p50_idx] * 1000  # en ms
        p95 = sorted_latencies[p95_idx] * 1000  # en ms

        return p50, p95

    def get_uptime(self) -> float:
        """Retourne uptime en secondes."""
        return time.time() - self.start_time

    def reset_stats(self) -> None:
        """Remet à zéro les statistiques."""
        self.capture_times.clear()
        self.pipeline_times.clear()
        self.latencies.clear()

        self.frame_count = 0
        self.pipeline_count = 0
        self.error_count = 0

        self.start_time = time.time()
        self.last_capture_time = 0.0
        self.last_pipeline_time = 0.0


class HealthMonitor:
    """Moniteur de santé du service vision."""

    def __init__(
        self,
        fps_threshold: float = 10.0,
        latency_threshold_ms: float = 150.0,
        error_rate_threshold: float = 0.1,
    ):

        self.fps_threshold = fps_threshold
        self.latency_threshold_ms = latency_threshold_ms
        self.error_rate_threshold = error_rate_threshold

        self.performance_tracker = PerformanceTracker()

        # États de santé
        self.camera_ok = False
        self.mediapipe_ok = False
        self.mqtt_ok = False

        # Historique des checks
        self.health_history: deque = deque(maxlen=20)

    def update_component_status(
        self, camera: bool = None, mediapipe: bool = None, mqtt: bool = None
    ) -> None:
        """Met à jour état des composants."""
        if camera is not None:
            self.camera_ok = camera
        if mediapipe is not None:
            self.mediapipe_ok = mediapipe
        if mqtt is not None:
            self.mqtt_ok = mqtt

    def check_health(self) -> HealthStatus:
        """Effectue check de santé complet."""
        current_time = time.time()

        # Métriques performance
        fps_capture = self.performance_tracker.get_fps_capture()
        fps_pipeline = self.performance_tracker.get_fps_pipeline()
        latency_p50, latency_p95 = self.performance_tracker.get_latency_percentiles()

        # Calcul taux d'erreur
        uptime = self.performance_tracker.get_uptime()
        error_rate = 0.0
        if self.performance_tracker.pipeline_count > 0:
            error_rate = (
                self.performance_tracker.error_count
                / self.performance_tracker.pipeline_count
            )

        # Critères santé
        fps_ok = fps_pipeline >= self.fps_threshold
        latency_ok = latency_p50 <= self.latency_threshold_ms
        error_rate_ok = error_rate <= self.error_rate_threshold
        components_ok = self.camera_ok and self.mediapipe_ok and self.mqtt_ok

        # État global
        overall_ok = fps_ok and latency_ok and error_rate_ok and components_ok

        # Détails diagnostic
        details = {
            "uptime_s": uptime,
            "frame_count": self.performance_tracker.frame_count,
            "pipeline_count": self.performance_tracker.pipeline_count,
            "error_count": self.performance_tracker.error_count,
            "error_rate": error_rate,
            "components": {
                "camera": self.camera_ok,
                "mediapipe": self.mediapipe_ok,
                "mqtt": self.mqtt_ok,
            },
            "thresholds": {
                "fps_min": self.fps_threshold,
                "latency_max_ms": self.latency_threshold_ms,
                "error_rate_max": self.error_rate_threshold,
            },
            "checks": {
                "fps_ok": fps_ok,
                "latency_ok": latency_ok,
                "error_rate_ok": error_rate_ok,
                "components_ok": components_ok,
            },
        }

        health_status = HealthStatus(
            ok=overall_ok,
            fps_capture=fps_capture,
            fps_pipeline=fps_pipeline,
            latency_p50=latency_p50,
            latency_p95=latency_p95,
            timestamp=current_time,
            details=details,
        )

        # Sauvegarder dans historique
        self.health_history.append(health_status)

        # Log si problème détecté
        if not overall_ok:
            issues = []
            if not fps_ok:
                issues.append(f"FPS faible ({fps_pipeline:.1f} < {self.fps_threshold})")
            if not latency_ok:
                issues.append(
                    f"Latence élevée ({latency_p50:.1f}ms > {self.latency_threshold_ms}ms)"
                )
            if not error_rate_ok:
                issues.append(
                    f"Taux erreur élevé ({error_rate:.2%} > {self.error_rate_threshold:.2%})"
                )
            if not components_ok:
                failed_components = []
                if not self.camera_ok:
                    failed_components.append("camera")
                if not self.mediapipe_ok:
                    failed_components.append("mediapipe")
                if not self.mqtt_ok:
                    failed_components.append("mqtt")
                issues.append(f"Composants défaillants: {', '.join(failed_components)}")

            logger.warning(f"Problèmes santé détectés: {'; '.join(issues)}")

        return health_status

    def get_health_summary(self) -> Dict[str, Any]:
        """Retourne résumé de santé pour publication MQTT."""
        if not self.health_history:
            return {"ok": False, "error": "Aucun check effectué"}

        latest = self.health_history[-1]

        return {
            "ok": latest.ok,
            "fps": round(latest.fps_pipeline, 1),
            "latency_p50": round(latest.latency_p50, 1),
            "ts_ms": int(latest.timestamp * 1000),
            "uptime_s": round(latest.details["uptime_s"], 1),
            "components": latest.details["components"],
        }

    def get_detailed_metrics(self) -> Dict[str, Any]:
        """Retourne métriques détaillées pour debug."""
        if not self.health_history:
            return {}

        latest = self.health_history[-1]

        return {
            "performance": {
                "fps_capture": round(latest.fps_capture, 1),
                "fps_pipeline": round(latest.fps_pipeline, 1),
                "latency_p50_ms": round(latest.latency_p50, 1),
                "latency_p95_ms": round(latest.latency_p95, 1),
            },
            "counters": {
                "frames_total": latest.details["frame_count"],
                "pipeline_total": latest.details["pipeline_count"],
                "errors_total": latest.details["error_count"],
                "error_rate": round(latest.details["error_rate"], 3),
            },
            "status": latest.details["components"],
            "health_checks": latest.details["checks"],
            "timestamp": latest.timestamp,
        }


class Watchdog:
    """Watchdog pour redémarrage automatique en cas de problème."""

    def __init__(
        self,
        health_monitor: HealthMonitor,
        check_interval: float = 5.0,
        failure_threshold: int = 3,
    ):

        self.health_monitor = health_monitor
        self.check_interval = check_interval
        self.failure_threshold = failure_threshold

        self.consecutive_failures = 0
        self.last_check_time = time.time()
        self.restart_callbacks: List[callable] = []

        self.enabled = True
        self.total_restarts = 0

    def add_restart_callback(self, callback: callable) -> None:
        """Ajoute callback appelé lors d'un redémarrage."""
        self.restart_callbacks.append(callback)

    def check_and_restart_if_needed(self) -> bool:
        """Vérifie santé et redémarre si nécessaire. Retourne True si redémarrage effectué."""
        if not self.enabled:
            return False

        current_time = time.time()
        if current_time - self.last_check_time < self.check_interval:
            return False

        self.last_check_time = current_time

        # Vérifier santé
        health_status = self.health_monitor.check_health()

        if health_status.ok:
            # Reset compteur si OK
            if self.consecutive_failures > 0:
                logger.info(
                    f"Service récupéré après {self.consecutive_failures} échecs"
                )
                self.consecutive_failures = 0
            return False

        # Incrémenter échecs
        self.consecutive_failures += 1
        logger.warning(
            f"Échec santé #{self.consecutive_failures}/{self.failure_threshold}"
        )

        # Déclencher redémarrage si seuil atteint
        if self.consecutive_failures >= self.failure_threshold:
            logger.error(
                f"Seuil d'échecs atteint ({self.failure_threshold}), redémarrage..."
            )

            # Appeler callbacks de redémarrage
            for callback in self.restart_callbacks:
                try:
                    callback()
                except Exception as e:
                    logger.error(f"Erreur callback redémarrage: {e}")

            self.consecutive_failures = 0
            self.total_restarts += 1

            logger.info(f"Redémarrage #{self.total_restarts} effectué")
            return True

        return False

    def enable(self) -> None:
        """Active le watchdog."""
        self.enabled = True
        logger.info("Watchdog activé")

    def disable(self) -> None:
        """Désactive le watchdog."""
        self.enabled = False
        logger.info("Watchdog désactivé")

    @property
    def stats(self) -> Dict[str, Any]:
        """Statistiques watchdog."""
        return {
            "enabled": self.enabled,
            "consecutive_failures": self.consecutive_failures,
            "total_restarts": self.total_restarts,
            "failure_threshold": self.failure_threshold,
            "check_interval": self.check_interval,
        }


class SystemResourceMonitor:
    """Moniteur ressources système (CPU, mémoire)."""

    def __init__(self):
        self.process_start_time = time.time()

        # Tentative d'import psutil (optionnel)
        try:
            import psutil

            self.psutil = psutil
            self.process = psutil.Process()
            self.psutil_available = True
        except ImportError:
            self.psutil = None
            self.process = None
            self.psutil_available = False
            logger.info("psutil non disponible, monitoring ressources limité")

    def get_resource_usage(self) -> Dict[str, Any]:
        """Retourne utilisation ressources."""
        if not self.psutil_available:
            return {
                "available": False,
                "uptime_s": time.time() - self.process_start_time,
            }

        try:
            # Stats processus
            cpu_percent = self.process.cpu_percent()
            memory_info = self.process.memory_info()
            memory_mb = memory_info.rss / 1024 / 1024

            # Stats système
            system_cpu = self.psutil.cpu_percent()
            system_memory = self.psutil.virtual_memory()

            return {
                "available": True,
                "process": {
                    "cpu_percent": round(cpu_percent, 1),
                    "memory_mb": round(memory_mb, 1),
                    "uptime_s": time.time() - self.process_start_time,
                },
                "system": {
                    "cpu_percent": round(system_cpu, 1),
                    "memory_percent": round(system_memory.percent, 1),
                    "memory_available_mb": round(
                        system_memory.available / 1024 / 1024, 1
                    ),
                },
            }
        except Exception as e:
            logger.error(f"Erreur monitoring ressources: {e}")
            return {
                "available": False,
                "error": str(e),
                "uptime_s": time.time() - self.process_start_time,
            }


class HealthManager:
    """Gestionnaire centralisé de santé."""

    def __init__(self):
        self.health_monitor = HealthMonitor()
        self.watchdog = Watchdog(self.health_monitor)
        self.resource_monitor = SystemResourceMonitor()

        # Performance tracking
        self.perf_tracker = self.health_monitor.performance_tracker

    def record_frame_capture(self) -> None:
        """Enregistre capture frame."""
        self.perf_tracker.record_frame_capture()

    def record_pipeline_start(self) -> float:
        """Démarre mesure pipeline, retourne timestamp."""
        return time.time()

    def record_pipeline_end(self, start_time: float) -> None:
        """Termine mesure pipeline."""
        self.perf_tracker.record_pipeline_processing(start_time)

    def record_error(self) -> None:
        """Enregistre erreur."""
        self.perf_tracker.record_error()

    def update_component_status(self, **kwargs) -> None:
        """Met à jour statut composants."""
        self.health_monitor.update_component_status(**kwargs)

    def get_health_status(self) -> HealthStatus:
        """Retourne statut santé complet."""
        return self.health_monitor.check_health()

    def get_health_summary(self) -> Dict[str, Any]:
        """Résumé santé pour MQTT."""
        return self.health_monitor.get_health_summary()

    def get_full_report(self) -> Dict[str, Any]:
        """Rapport complet pour debug."""
        return {
            "health": self.health_monitor.get_detailed_metrics(),
            "resources": self.resource_monitor.get_resource_usage(),
            "watchdog": self.watchdog.stats,
        }

    def check_watchdog(self) -> bool:
        """Check watchdog, retourne True si redémarrage."""
        return self.watchdog.check_and_restart_if_needed()

    def add_restart_callback(self, callback: callable) -> None:
        """Ajoute callback redémarrage watchdog."""
        self.watchdog.add_restart_callback(callback)