// speech.jsx — TTS with ElevenLabs (natural) → browser fallback (robotic)
// ElevenLabs free tier: 10k chars/month, no credit card.
// Get key at elevenlabs.io → paste in Tweaks panel.

// ── ElevenLabs config ────────────────────────────────────────────────────────
// "Adam" — deep, warm, authoritative male. Works in all languages with
// eleven_multilingual_v2 model.
const ELEVEN_VOICE_ID = 'pNInz6obpgDQGcFmaJgB'; // Adam (premade, free)
const ELEVEN_MODEL    = 'eleven_multilingual_v2';
const ELEVEN_API      = 'https://api.elevenlabs.io/v1/text-to-speech';

let _currentAudio   = null;  // active playing audio
let _currentAbort   = null;  // AbortController for in-flight fetch
let _speakSeq       = 0;     // monotonic id — only latest result is honored

async function speakElevenLabs(text, key, volume = 1) {
  // Cancel any in-flight request AND any playing audio
  if (_currentAbort) { try { _currentAbort.abort(); } catch(e) {} }
  if (_currentAudio) { try { _currentAudio.pause(); _currentAudio.src = ''; } catch(e) {} _currentAudio = null; }

  const mySeq = ++_speakSeq;
  const ctrl  = new AbortController();
  _currentAbort = ctrl;

  try {
    const res = await fetch(`${ELEVEN_API}/${ELEVEN_VOICE_ID}`, {
      method: 'POST',
      signal: ctrl.signal,
      headers: {
        'xi-api-key':   key,
        'Content-Type': 'application/json',
        'Accept':       'audio/mpeg',
      },
      body: JSON.stringify({
        text,
        model_id: ELEVEN_MODEL,
        voice_settings: {
          stability:        0.45,
          similarity_boost: 0.80,
          style:            0.25,
          use_speaker_boost: true,
        },
      }),
    });

    // If a newer speak() was called while we were waiting, drop this one
    if (mySeq !== _speakSeq) return null;

    if (!res.ok) {
      const err = await res.text();
      console.warn('[ElevenLabs] error', res.status, err);
      if (res.status === 401 || /quota/i.test(err)) { window.__elevenOff = true; } // out of free credit → next provider (Fish)
      return null; // caller will use the next provider / browser TTS
    }

    const blob = await res.blob();
    if (mySeq !== _speakSeq) return null; // check again after blob

    const url = URL.createObjectURL(blob);
    const audio = new Audio(url);
    audio.volume = Math.max(0, Math.min(1, volume));
    _currentAudio = audio;

    return new Promise((resolve) => {
      audio.onended = () => { URL.revokeObjectURL(url); if (_currentAudio === audio) _currentAudio = null; resolve(true); };
      audio.onerror = () => { URL.revokeObjectURL(url); resolve(false); };
      // Wait for canplay so first word isn't cut off
      audio.oncanplay = () => audio.play().catch(() => resolve(false));
      audio.load();
    });
  } catch (e) {
    if (e.name !== 'AbortError') console.warn('[ElevenLabs] fetch failed:', e);
    return null;
  }
}

// Per-language Fish voice (reference_id). 'es' = Spanish MALE narrator (Narrador v2);
// Leo is male, so the voice must be male. Languages NOT listed use the Worker default
// (env.FISH_VOICE). Add more languages here once the right male voice is validated.
const FISH_VOICE_BY_LANG = { es: '35199d5438854f5d9157c500479ab684' };

// ── Fish (premium) via the Worker, with graceful fallback to browser TTS ──────
// The Fish API key lives as a Worker secret (never exposed client-side). If Fish
// has no balance (402), a bad key (401) or no key (503), Fish is disabled for the
// session and Leo speaks with the browser voice instantly — voice NEVER breaks.
// A 7s client timeout guarantees Leo never freezes waiting on Fish.
async function speakFish(text, lang, volume = 1) {
  const base = (window.__KPI_URL || '').trim();
  if (!base) return null;
  if (_currentAbort) { try { _currentAbort.abort(); } catch(e) {} }
  if (_currentAudio) { try { _currentAudio.pause(); _currentAudio.src = ''; } catch(e) {} _currentAudio = null; }

  const mySeq = ++_speakSeq;
  const ctrl  = new AbortController();
  _currentAbort = ctrl;
  const killer = setTimeout(() => { try { ctrl.abort(); } catch(e) {} }, 5000); // never wait > 5s

  try {
    const fv = FISH_VOICE_BY_LANG[(lang || 'en').slice(0, 2)];
    const res = await fetch(base + '/tts', {
      method: 'POST',
      signal: ctrl.signal,
      // No JSON content-type → "simple" CORS request (no preflight, lower latency).
      // The Worker parses the body via request.json() regardless of content-type.
      body: JSON.stringify(fv ? { text, lang, voice: fv } : { text, lang }),
    });
    clearTimeout(killer);
    if (mySeq !== _speakSeq) return null;

    const ct = res.headers.get('content-type') || '';
    if (!res.ok || !/audio/.test(ct)) {
      // 402 = no balance, 401 = bad key, 503 = no key → disable Fish this session
      if ([401, 402, 403, 503].includes(res.status)) {
        window.__fishOff = true;
        console.warn('[Fish] disabled (status ' + res.status + ') → browser voice. Add balance at fish.audio to enable the premium voice.');
      } else { _fishFail(); }
      return null; // caller falls back to browser TTS
    }

    const blob = await res.blob();
    if (mySeq !== _speakSeq) return null;
    if (!blob || blob.size < 200) return null;
    window.__fishFails = 0; // success → keep Fish enabled

    const url = URL.createObjectURL(blob);
    const audio = new Audio(url);
    audio.volume = Math.max(0, Math.min(1, volume));
    _currentAudio = audio;

    return new Promise((resolve) => {
      audio.onended = () => { URL.revokeObjectURL(url); if (_currentAudio === audio) _currentAudio = null; resolve(true); };
      audio.onerror = () => { URL.revokeObjectURL(url); resolve(false); };
      audio.oncanplay = () => audio.play().catch(() => resolve(false));
      audio.load();
    });
  } catch (e) {
    clearTimeout(killer);
    if (e.name !== 'AbortError') console.warn('[Fish] fetch failed:', e);
    _fishFail(); // timeout / network → after a couple, disable Fish for the session
    return null;
  }
}

function _fishFail() {
  // Transient Fish errors (502 rate-limit / timeout / network) do NOT disable the premium
  // voice — we fall back to the browser for THIS line only and keep trying Fish next time
  // (cached lines are instant anyway). Only definitive negatives (401/402/403/503 = no
  // key / no balance / forbidden) disable Fish for the session. This keeps Leo's voice male.
  window.__fishFails = (window.__fishFails || 0) + 1;
}

// ── Browser TTS fallback ──────────────────────────────────────────────────────
const SPEECH_LOCALE = {
  es: 'es-ES', en: 'en-US', fr: 'fr-FR', de: 'de-DE',
  it: 'it-IT', pt: 'pt-PT', zh: 'zh-CN', ru: 'ru-RU',
  nl: 'nl-NL', ja: 'ja-JP', ar: 'ar-SA', pl: 'pl-PL', hi: 'hi-IN',
};

const IS_FEMALE = /female|mujer|femme|frau|donna|mulher|woman|girl|\b(paloma|monica|amelie|emma|sofia|maria|alice|samantha|fiona|karen|anna|katja|laura|zira|hazel|eva|julia|sara|nora|luciana|isabella|francisca|camila|lupe|dalia|conchita|elsa)\b/i;
const IS_MALE   = /\b(male|hombre|mann|uomo|homem|jorge|carlos|diego|enrique|alvaro|pablo|antonio|alex|daniel|aaron|jack|tom|oliver|thomas|luca|marco|ryan|michael|david|james|mark|stefan|dmitry|ivan|andres|paulo|felix|guy|eric|arthur)\b/i;

// Known-good MALE voice name substrings per language (matched with includes(), case-
// insensitive — multi-word names like "Microsoft Pablo Online (Natural)" break \b regex).
// Tried FIRST in _pickVoice. Covers desktop Chrome/Edge (Microsoft/Google) + macOS.
// On stock Android the system often exposes one generic voice per locale (no gender
// token), so these won't match there → we fall back to the heuristics + pitch 0.75.
const MALE_NAME_HINTS = {
  en: ['guy','andrew','christopher','eric','brian','david','mark','daniel','alex','arthur','aaron','us english male'],
  es: ['alvaro','álvaro','pablo','jorge','dario','darío','carlos','diego','enrique','juan'],
  fr: ['henri','claude','paul','thomas','nicolas'],
  de: ['conrad','stefan','killian','yannick'],
  it: ['diego','giuseppe','cosimo','luca'],
  pt: ['duarte','antonio','antónio','fabio','fábio','joaquim','paulo'],
  nl: ['maarten','xander'],
  ru: ['dmitry','dmitri','pavel','yuri'],
  zh: ['yunxi','yunyang','yunjian','kangkang'],
  ja: ['keita','ichiro','daichi','otoya'],
  ar: ['hamed','naayf','shakir','tarek','maged'],
  pl: ['marek','adam','krzysztof'],
};

let _voices = [];
function _loadVoices() {
  if ('speechSynthesis' in window) _voices = window.speechSynthesis.getVoices() || [];
}
if ('speechSynthesis' in window) {
  _loadVoices();
  window.speechSynthesis.onvoiceschanged = _loadVoices;
}

function _pickVoice(lang) {
  if (!_voices.length) _loadVoices();
  const locale = SPEECH_LOCALE[lang] || 'en-US';
  const prefix = locale.slice(0, 2).toLowerCase();
  const exact  = _voices.filter(v => v.lang === locale);
  const nearby = _voices.filter(v => v.lang.toLowerCase().startsWith(prefix));
  const pool   = exact.length ? exact : nearby;
  if (!pool.length) return null;

  const hints      = MALE_NAME_HINTS[prefix] || [];
  const lc         = s => (s || '').toLowerCase();
  const isHintMale = v => hints.some(h => lc(v.name).includes(h)) && !IS_FEMALE.test(v.name);
  const isNatural  = v => /natural|neural|online/i.test(v.name); // higher-quality engines

  return (
    pool.find(v => isHintMale(v) && isNatural(v)) ||             // known male + natural/neural (best)
    pool.find(v => isHintMale(v)) ||                             // known male voice by name
    pool.find(v => /microsoft/i.test(v.name) && IS_MALE.test(v.name) && !IS_FEMALE.test(v.name)) ||
    pool.find(v => /google/i.test(v.name)    && IS_MALE.test(v.name) && !IS_FEMALE.test(v.name)) ||
    pool.find(v => /microsoft/i.test(v.name) && !IS_FEMALE.test(v.name)) ||
    pool.find(v => IS_MALE.test(v.name)      && !IS_FEMALE.test(v.name)) ||
    pool.find(v => !IS_FEMALE.test(v.name)) ||
    pool[0]
  );
}

function _speakBrowser(text, lang, volume) {
  if (!('speechSynthesis' in window)) return null;
  window.speechSynthesis.cancel();
  const utt = new SpeechSynthesisUtterance(text);
  utt.lang   = SPEECH_LOCALE[lang] || 'en-US';
  utt.rate   = 0.85;  // algo más lento → la voz gratuita se entiende mejor (sobre todo números/temperatura)
  utt.pitch  = 0.75;
  utt.volume = Math.max(0, Math.min(1, volume));
  const v = _pickVoice(lang);
  if (v) { utt.voice = v; console.log('[Leo voice browser]', v.name); }
  window.speechSynthesis.speak(utt);
  return utt;
}

// ── Public API ────────────────────────────────────────────────────────────────
// Returns: SpeechSynthesisUtterance (browser) | Promise<bool> (ElevenLabs) | null
function speak(text, lang = 'en', opts = {}) {
  if (!text) return null;
  // Use ride volume if set, otherwise use opts.volume
  const rideVol = typeof window.__rideVolume === 'number' ? window.__rideVolume : -1;
  const volume  = rideVol >= 0 ? rideVol : (typeof opts.volume === 'number' ? opts.volume : 1);
  if (volume === 0) return null; // muted

  // FREE path: live translation (unique per trip — not worth premium nor caching)
  // always uses the browser voice (male via _pickVoice), never Fish. This is the
  // cost lever: at 50–100 tablets the only thing that would scale on Fish is live
  // translation, so we keep it off the paid voice. Set via speak(.., {free:true}).
  if (opts.free) return _speakBrowser(text, lang, volume);

  // Voice chain: Fish (premium, pay-as-you-go, commercial-licensed) → browser TTS
  // (robotic, last resort). ElevenLabs removed: its FREE tier has no commercial
  // license and its paid plans cost more than Fish pay-as-you-go credits.
  const tryFish = () => {
    // Premium Fish voice per language (see FISH_VOICE_BY_LANG): English + Spanish (male
    // narrator) use Fish; other languages keep their native browser voice for now to
    // avoid an accent mismatch. Expand via window.__FISH_LANGS once a voice is validated.
    const fishLangs = window.__FISH_LANGS || ['en','es','fr','de','it','pt','nl','ru','zh','ja','ar','pl'];
    if (window.__fishOff || !(window.__KPI_URL || '').trim() ||
        fishLangs.indexOf((lang || 'en').slice(0, 2)) === -1) return Promise.resolve(null);
    return speakFish(text, lang, volume);
  };

  return tryFish().then(ok => ok ? ok : _speakBrowser(text, lang, volume));
}

function stopSpeaking() {
  _speakSeq++; // invalidate any pending speak result
  if (_currentAbort) { try { _currentAbort.abort(); } catch(e) {} _currentAbort = null; }
  if (_currentAudio) {
    try { _currentAudio.pause(); _currentAudio.src = ''; } catch(e) {}
    _currentAudio = null;
  }
  if ('speechSynthesis' in window) window.speechSynthesis.cancel();
}

// Update volume on currently-playing audio (for live slider feedback)
function setPlayingVolume(v) {
  const vol = Math.max(0, Math.min(1, v));
  window.__rideVolume = vol;

  if (vol === 0) {
    // MUTE: stop everything immediately
    if (_currentAbort) { try { _currentAbort.abort(); } catch(e) {} _currentAbort = null; }
    if (_currentAudio) {
      try { _currentAudio.pause(); _currentAudio.src = ''; } catch(e) {}
      _currentAudio = null;
    }
    if ('speechSynthesis' in window) window.speechSynthesis.cancel();
    return;
  }

  // NON-ZERO: ElevenLabs audio changes in real time (no interruption needed)
  if (_currentAudio) _currentAudio.volume = vol;
  // Browser TTS: volume can't change mid-utterance (W3C spec limitation).
  // __rideVolume is already updated above — next speak() will use the new level.
  // We do NOT cancel here: cancelling the current sentence would create a "mute" effect
  // every time the user drags the slider, which is confusing.
}

// Free (browser-only) voice — for live translation and other unique-per-trip lines
// that should NOT spend premium Fish credits. Always male (via _pickVoice), instant.
function speakFree(text, lang = 'en') { return speak(text, lang, { free: true }); }

Object.assign(window, { speak, speakFree, stopSpeaking, SPEECH_LOCALE, setPlayingVolume });

// One-time startup probe of Fish (background, during app load) so no spoken line
// ever waits. Chain: Fish → browser. (ElevenLabs removed — its free tier has no
// commercial license and its paid plans cost more than Fish credits.)
(function probeFish() {
  const base = (window.__KPI_URL || '').trim();
  if (!base) { window.__fishOff = true; return; }
  const fc = new AbortController();
  const ft = setTimeout(() => { try { fc.abort(); } catch (e) {} }, 12000); // generoso: la generación en frío es lenta en redes de tablet
  // Probe una frase YA cacheada (acierto instantáneo) para no agotarse por una primera generación lenta.
  fetch(base + '/tts', { method: 'POST', signal: fc.signal, body: JSON.stringify({ text: 'Hola', lang: 'es', voice: '35199d5438854f5d9157c500479ab684' }) })
    .then(r => {
      clearTimeout(ft);
      // Solo desactivar Fish ante un negativo DEFINITIVO (sin clave/saldo/prohibido). Un probe lento
      // o con un fallo de red NO debe desactivar la voz premium para toda la sesión.
      if ([401, 402, 403, 503].includes(r.status)) { window.__fishOff = true; }
      else { console.log('[Fish] premium voice available.'); }
    })
    .catch(() => { clearTimeout(ft); /* fallo de red puntual — mantener Fish ON; el fallback por llamada se encarga */ });
})();
