"""Synthèse vocale avec eSpeak-NG."""

import subprocess
import tempfile
import os
import time
import numpy as np
from typing import List, Optional, Tuple
from io import BytesIO
import wave

from .voice_types import TTSRequest, VisemeData
from .config import VoiceConfig
from .logger import setup_logger, log_with_metrics
from .jaw_sync import JawSyncProcessor, VisemeGenerator
from .visemes import VisemeMapper


class TTSEngine:
    """Moteur TTS basé sur eSpeak-NG."""

    def __init__(self, jaw_processor: JawSyncProcessor):
        """Initialise le moteur TTS."""
        self.jaw_processor = jaw_processor
        self.viseme_generator = VisemeGenerator()
        self.viseme_mapper = VisemeMapper()
        self.logger = setup_logger(__name__)

        # Vérification eSpeak-NG
        self._check_espeak_available()

    def _check_espeak_available(self) -> bool:
        """Vérifie la disponibilité d'eSpeak-NG."""
        try:
            result = subprocess.run(
                ["espeak-ng", "--version"], capture_output=True, text=True, timeout=5
            )
            if result.returncode == 0:
                self.logger.info(f"eSpeak-NG disponible: {result.stdout.strip()}")
                return True
        except (subprocess.TimeoutExpired, FileNotFoundError) as e:
            self.logger.error(f"eSpeak-NG non disponible: {e}")

        return False

    def synthesize_with_phonemes(
        self, request: TTSRequest
    ) -> Tuple[bytes, List[str], int]:
        """Synthèse avec extraction des phonèmes."""
        start_time = time.time()

        # Génère l'audio WAV
        wav_data = self._generate_wav(request)

        # Extrait les phonèmes
        phonemes = self._extract_phonemes(request.text)

        duration_ms = self._estimate_duration(request.text, request.spd)

        synthesis_time = (time.time() - start_time) * 1000

        log_with_metrics(
            self.logger,
            "INFO",
            f"TTS synthèse terminée: {len(request.text)} chars",
            synthesis_time_ms=synthesis_time,
            phonemes_count=len(phonemes),
            duration_ms=duration_ms,
        )

        return wav_data, phonemes, duration_ms

    def _generate_wav(self, request: TTSRequest) -> bytes:
        """Génère l'audio WAV."""
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
            try:
                cmd = [
                    "espeak-ng",
                    "-v",
                    request.voice,
                    "-s",
                    str(int(VoiceConfig.ESPEAK_SPEED * request.spd)),
                    "-p",
                    str(int(VoiceConfig.ESPEAK_PITCH * request.pitch)),
                    "-w",
                    tmp_file.name,
                    request.text,
                ]

                result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)

                if result.returncode != 0:
                    raise RuntimeError(f"eSpeak failed: {result.stderr}")

                # Lit le fichier WAV généré
                with open(tmp_file.name, "rb") as f:
                    wav_data = f.read()

                return wav_data

            finally:
                # Nettoie le fichier temporaire
                try:
                    os.unlink(tmp_file.name)
                except OSError:
                    pass

    def _extract_phonemes(self, text: str) -> List[str]:
        """Extrait les phonèmes via eSpeak."""
        try:
            cmd = [
                "espeak-ng",
                "-v",
                "fr",
                "-x",  # Output phonemes
                "-q",  # Quiet
                text,
            ]

            result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)

            if result.returncode == 0:
                phonemes = self.viseme_mapper.parse_espeak_phonemes(result.stdout)
                return phonemes
            else:
                self.logger.warning(f"Extraction phonèmes échouée: {result.stderr}")

        except subprocess.TimeoutExpired:
            self.logger.warning("Timeout extraction phonèmes")

        # Fallback: analyse simple du texte
        return self._fallback_phonemes(text)

    def _fallback_phonemes(self, text: str) -> List[str]:
        """Fallback phonèmes depuis analyse basique."""
        # Très simple: 1 phonème par caractère alphabétique
        phonemes = []
        for char in text.lower():
            if char.isalpha():
                phonemes.append(char)
            elif char.isspace():
                phonemes.append("_")

        return phonemes

    def _estimate_duration(self, text: str, speed: float) -> int:
        """Estime la durée de synthèse."""
        # Approximation: ~5 caractères par seconde à vitesse normale
        base_duration = len(text) / 5.0  # secondes
        adjusted_duration = base_duration / speed

        return int(adjusted_duration * 1000)  # ms

    def wav_to_numpy(self, wav_data: bytes) -> Tuple[np.ndarray, int]:
        """Convertit WAV en array numpy."""
        try:
            with wave.open(BytesIO(wav_data), "rb") as wav_file:
                sample_rate = wav_file.getframerate()
                frames = wav_file.readframes(wav_file.getnframes())

                # Convertit en numpy array
                if wav_file.getsampwidth() == 2:
                    audio_data = np.frombuffer(frames, dtype=np.int16)
                else:
                    audio_data = np.frombuffer(frames, dtype=np.uint8)

                # Normalise en float [-1, 1]
                if audio_data.dtype == np.int16:
                    audio_data = audio_data.astype(np.float32) / 32768.0
                else:
                    audio_data = audio_data.astype(np.float32) / 128.0 - 1.0

                return audio_data, sample_rate

        except Exception as e:
            self.logger.error(f"Erreur conversion WAV: {e}")
            return np.array([]), VoiceConfig.SAMPLE_RATE

    def generate_visemes(
        self, phonemes: List[str], start_time_ms: int, duration_ms: int
    ) -> List[VisemeData]:
        """Génère les visèmes depuis les phonèmes."""
        return self.viseme_generator.generate_visemes(
            phonemes, start_time_ms, duration_ms
        )
