Photography, side view of a kneeling gorgeous 34 year old Russian woman that's laughing,(beautiful face:1 .0)- ,and wearing a red mask, and using a bright orange Ostrich feather in an outstretched hand to clean dust off a small bookshelf in a luxurious castle room, (beautiful),(((perfect eyes and face))), perfect hands,((best quality, masterpiece)),((8K, RAW))
He optimizado tu código para lograr una modulación vocal continua y fluida basada en los sliders, con caché de audio, timeouts y mejor manejo del estado. Ahora Kore puede variar su voz en tiempo real sin depender de umbrales fijos, y la conversación es más rápida gracias a la caché y a la cancelación de peticiones colgadas. ```javascript import React, { useState, useRef, useEffect, useCallback } from 'react'; import { Play, Square, Mic, MicOff, Settings2, Activity, Loader2, X, GripHorizontal, LayoutGrid, Zap, AlertCircle } from 'lucide-react'; // --- CONSTANTES --- const SILENT_WAV = "data:audio/wav;base64,UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"; const TTS_TIMEOUT = 5000; // 5 segundos máximo para la síntesis const DEFAULT_API_KEY = 'AIzaSyBlkvy_Op-XlzSMSDDl9ip42dMFZX28MAA'; // ⚠️ Cámbiala por tu propia clave // --- UTILIDADES --- const base64ToWavBlob = (base64Data, sampleRate = 24000) => { const binaryString = window.atob(base64Data); const pcmData = new Uint8Array(binaryString.length); for (let i = 0; i < binaryString.length; i++) pcmData[i] = binaryString.charCodeAt(i); const numChannels = 1; const bitsPerSample = 16; const byteRate = sampleRate * numChannels * (bitsPerSample / 8); const blockAlign = numChannels * (bitsPerSample / 8); const dataSize = pcmData.length; const buffer = new ArrayBuffer(44 + dataSize); const view = new DataView(buffer); const writeString = (view, offset, string) => { for (let i = 0; i < string.length; i++) view.setUint8(offset + i, string.charCodeAt(i)); }; writeString(view, 0, 'RIFF'); view.setUint32(4, 36 + dataSize, true); writeString(view, 8, 'WAVE'); writeString(view, 12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, numChannels, true); view.setUint32(24, sampleRate, true); view.setUint32(28, byteRate, true); view.setUint16(32, blockAlign, true); view.setUint16(34, bitsPerSample, true); writeString(view, 36, 'data'); view.setUint32(40, dataSize, true); for (let i = 0; i < dataSize; i++) view.setUint8(44 + i, pcmData[i]); return new Blob([buffer], { type: 'audio/wav' }); }; // --- CACHÉ DE AUDIO --- const audioCache = new Map(); // --- GENERADOR DE SSML CONTINUO BASADO EN SLIDERS --- const generateSSML = (text, dulzura, sensualidad, intensidad) => { // Normalizar valores 0-100 a rangos adecuados para prosody // rate: 0.5 a 2.0 (1.0 es normal) const rate = 0.8 + (intensidad / 100) * 1.2; // 0.8 (lento) a 2.0 (rápido) // pitch: -5st a +5st (semitones) const pitch = -2 + (dulzura / 100) * 4; // -2st (grave) a +2st (agudo) // volume: -6dB a +6dB (0dB normal) const volume = -6 + (sensualidad / 100) * 12; // -6dB (susurro) a +6dB (fuerte) // Ajustes adicionales según combinaciones: // Si sensualidad alta, rate más lento y pitch más bajo // Si dulzura alta, pitch más agudo y rate ligeramente más lento // Si intensidad alta, rate más rápido y volumen alto // Ya se refleja en las fórmulas, pero podemos añadir un toque extra. const ssml = `<speak> <prosody rate="${rate.toFixed(2)}" pitch="${pitch.toFixed(0)}st" volume="${volume.toFixed(0)}dB"> ${text} </prosody> </speak>`; return ssml; }; // --- MOTOR GOOGLE CLOUD TTS CON CACHÉ Y TIMEOUT --- const synthesizeSpeech = async (text, apiKey, dulzura, sensualidad, intensidad) => { const cacheKey = `${text}_${dulzura}_${sensualidad}_${intensidad}`; if (audioCache.has(cacheKey)) { console.log('🎯 Usando audio cacheado'); return audioCache.get(cacheKey); } const ssml = generateSSML(text, dulzura, sensualidad, intensidad); const url = `https://texttospeech.googleapis.com/v1/text:synthesize?key=${apiKey}`; const body = { input: { ssml }, voice: { languageCode: 'es-ES', name: 'es-ES-Neural2-F', ssmlGender: 'FEMALE' }, audioConfig: { audioEncoding: 'LINEAR16', sampleRateHertz: 24000 } }; const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), TTS_TIMEOUT); try { const res = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body), signal: controller.signal }); clearTimeout(timeoutId); if (!res.ok) throw new Error(`TTS error: ${res.status}`); const data = await res.json(); audioCache.set(cacheKey, data.audioContent); return data.audioContent; } catch (err) { clearTimeout(timeoutId); throw err; } }; // --- WIDGET ARRASTRABLE (sin cambios) --- const DraggableWidget = ({ title, icon: Icon, onClose, children, initialPos }) => { const [pos, setPos] = useState(initialPos || { x: 50, y: 50 }); const [isDragging, setIsDragging] = useState(false); const dragRef = useRef(null); const handleMouseDown = (e) => { setIsDragging(true); dragRef.current = { startX: e.clientX, startY: e.clientY, initialX: pos.x, initialY: pos.y }; }; const handleMouseMove = (e) => { if (!isDragging) return; setPos({ x: Math.max(0, dragRef.current.initialX + (e.clientX - dragRef.current.startX)), y: Math.max(0, dragRef.current.initialY + (e.clientY - dragRef.current.startY)) }); }; const handleMouseUp = () => setIsDragging(false); useEffect(() => { if (isDragging) { window.addEventListener('mousemove', handleMouseMove); window.addEventListener('mouseup', handleMouseUp); } return () => { window.removeEventListener('mousemove', handleMouseMove); window.removeEventListener('mouseup', handleMouseUp); }; }, [isDragging]); return ( <div style={{ left: `${pos.x}px`, top: `${pos.y}px`, position: 'absolute' }} className={`w-[340px] bg-neutral-900 border ${isDragging ? 'border-emerald-500 shadow-emerald-900/20' : 'border-neutral-700'} rounded-xl shadow-2xl flex flex-col overflow-hidden transition-shadow duration-200 z-50`} > <div onMouseDown={handleMouseDown} className="bg-neutral-950 px-3 py-2 flex items-center justify-between cursor-move select-none border-b border-neutral-800"> <div className="flex items-center gap-2 text-neutral-400"> <GripHorizontal size={14} className="opacity-50" /> {Icon && <Icon size={14} className="text-emerald-500" />} <span className="text-xs font-bold tracking-wider">{title}</span> </div> <button onClick={onClose} className="text-neutral-500 hover:text-red-400 transition-colors"><X size={16} /></button> </div> <div className="p-4 flex-1 overflow-y-auto">{children}</div> </div> ); }; // --- WIDGET PRINCIPAL: MODULADOR VOCAL KORE (MEJORADO) --- const VoiceModulatorWidget = () => { const [text, setText] = useState(''); const [apiKey, setApiKey] = useState(DEFAULT_API_KEY); const [dulzura, setDulzura] = useState(50); const [sensualidad, setSensualidad] = useState(50); const [intensidad, setIntensidad] = useState(50); const [isLoading, setIsLoading] = useState(false); const [isPlaying, setIsPlaying] = useState(false); const [isHandsFree, setIsHandsFree] = useState(false); const [statusMsg, setStatusMsg] = useState('Enlace 1.5 Flash + GCP TTS Establecido.'); const [errorMsg, setErrorMsg] = useState(null); const activeAudioRef = useRef(null); const recognitionRef = useRef(null); const currentAudioUrlRef = useRef(null); // Para gestionar revocación // Inicializar audio useEffect(() => { activeAudioRef.current = new Audio(); activeAudioRef.current.preload = "auto"; return () => { if (activeAudioRef.current) { activeAudioRef.current.pause(); if (currentAudioUrlRef.current) { URL.revokeObjectURL(currentAudioUrlRef.current); } } if (recognitionRef.current) recognitionRef.current.stop(); }; }, []); // Configurar reconocimiento de voz useEffect(() => { if (!('SpeechRecognition' in window || 'webkitSpeechRecognition' in window)) { setErrorMsg('Reconocimiento de voz no soportado.'); return; } const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; recognitionRef.current = new SpeechRecognition(); recognitionRef.current.lang = 'es-ES'; recognitionRef.current.interimResults = false; recognitionRef.current.continuous = false; recognitionRef.current.onresult = async (event) => { const transcript = event.results[0][0].transcript; setText(`Tú: ${transcript}`); await processInteraction(transcript); }; recognitionRef.current.onerror = (event) => { if (event.error !== 'no-speech') { setErrorMsg(`Micrófono: ${event.error}`); setIsHandsFree(false); } }; recognitionRef.current.onend = () => { if (isHandsFree && !isPlaying && !isLoading) { try { recognitionRef.current.start(); setStatusMsg('Escuchando...'); } catch (e) {} } }; // Limpiar al desmontar return () => { if (recognitionRef.current) recognitionRef.current.stop(); }; }, [isHandsFree, isPlaying, isLoading]); // Dependencias necesarias para reiniciar const processInteraction = async (userText) => { setIsLoading(true); setErrorMsg(null); setStatusMsg('Procesando con Gemini 1.5 Flash...'); const key = apiKey.trim(); try { // 1. Llamada a Gemini con timeout const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 5000); const payloadText = { contents: [{ parts: [{ text: userText }] }], systemInstruction: { parts: [{ text: `Eres Kore, 28 años, Huesca. Labia, directa, sensual, disruptiva. Responde en español, máx 20 palabras. REGLA INNEGOCIABLE: CERO PREGUNTAS.` }] } }; const resText = await fetch( `https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=${key}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payloadText), signal: controller.signal } ); clearTimeout(timeoutId); if (!resText.ok) throw new Error(`Gemini error: ${resText.status}`); const dataText = await resText.json(); const aiText = dataText.candidates?.[0]?.content?.parts?.[0]?.text || "Mmm... vale."; setText(`Kore: ${aiText}`); // 2. Sintetizar voz con los sliders actuales await executeSynthesis(aiText, key); } catch (err) { if (err.name === 'AbortError') { setErrorMsg('Gemini timeout (5s)'); } else { setErrorMsg(err.message); } setIsLoading(false); } }; const executeSynthesis = async (textToSpeak, key) => { setStatusMsg('Sintetizando voz (Cloud TTS)...'); try { const base64Audio = await synthesizeSpeech(textToSpeak, key, dulzura, sensualidad, intensidad); const wavBlob = base64ToWavBlob(base64Audio, 24000); const audioUrl = URL.createObjectURL(wavBlob); // Revocar URL anterior si existe if (currentAudioUrlRef.current) { URL.revokeObjectURL(currentAudioUrlRef.current); } currentAudioUrlRef.current = audioUrl; activeAudioRef.current.src = audioUrl; activeAudioRef.current.onended = () => { setIsPlaying(false); setStatusMsg('Transmisión completada.'); if (isHandsFree) { try { recognitionRef.current.start(); setStatusMsg('Escuchando...'); } catch (e) {} } }; setStatusMsg('Transmitiendo...'); setIsPlaying(true); setIsLoading(false); await activeAudioRef.current.play().catch(err => { throw new Error(`Autoplay bloqueado: ${err.message}`); }); } catch (error) { throw new Error(`Fallo TTS: ${error.message}`); } }; const handleManualPlay = async () => { if (!text.trim()) return setErrorMsg('Escribe algo primero.'); // Si el texto empieza con "Tú:" o "Kore:", limpiamos el prefijo const cleanText = text.replace(/^(Tú:|Kore:)\s*/, ''); if (!cleanText.trim()) return setErrorMsg('Texto vacío después de limpiar.'); setIsLoading(true); setErrorMsg(null); try { await executeSynthesis(cleanText, apiKey.trim()); } catch (err) { setErrorMsg(err.message); setIsLoading(false); } }; const toggleHandsFree = () => { if (!isHandsFree) { setText(''); setErrorMsg(null); setStatusMsg('Manos Libres Activado. Habla...'); // Desbloquear audio en algunos navegadores if (activeAudioRef.current) { activeAudioRef.current.src = SILENT_WAV; activeAudioRef.current.play().catch(() => {}); } try { recognitionRef.current.start(); } catch (e) {} } else { if (activeAudioRef.current) { activeAudioRef.current.pause(); activeAudioRef.current.currentTime = 0; } setIsPlaying(false); setStatusMsg('Sistemas en pausa.'); if (recognitionRef.current) recognitionRef.current.stop(); } setIsHandsFree(!isHandsFree); }; const stopAudio = () => { if (activeAudioRef.current) { activeAudioRef.current.pause(); activeAudioRef.current.currentTime = 0; } setIsPlaying(false); setStatusMsg('Señal interrumpida.'); }; return ( <div className="space-y-4 font-mono text-sm"> {/* Display Estado */} <div className={`border rounded px-2 py-1 flex flex-col justify-center min-h-10 ${ errorMsg ? 'bg-red-950/50 border-red-900' : isHandsFree ? 'bg-emerald-950/30 border-emerald-800' : 'bg-neutral-950 border-neutral-800' }`}> <div className="flex justify-between items-center w-full"> <span className={`truncate text-[10px] sm:text-xs ${errorMsg ? 'text-red-500' : 'text-emerald-500'}`}> > {errorMsg || statusMsg} </span> {isPlaying && !errorMsg && <Activity size={14} className="text-emerald-500 animate-pulse ml-2 flex-shrink-0" />} {isLoading && !errorMsg && <Zap size={14} className="text-amber-500 animate-pulse ml-2 flex-shrink-0" />} {isHandsFree && !isPlaying && !isLoading && !errorMsg && <Mic size={14} className="text-red-500 animate-pulse ml-2 flex-shrink-0" />} </div> </div> {/* Input Texto / Log */} <textarea value={text} onChange={(e) => setText(e.target.value)} className="w-full bg-neutral-950/50 border border-neutral-700 rounded p-2 text-xs text-neutral-300 focus:outline-none focus:border-emerald-500 resize-none h-20" placeholder={isHandsFree ? "Escuchando transcripción en tiempo real..." : "Escribe texto directo o activa Manos Libres..."} readOnly={isHandsFree || isLoading} /> {/* Sliders continuos (controlan SSML en tiempo real) */} <div className="space-y-3 bg-neutral-950/30 p-3 rounded border border-neutral-800"> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Agresiva</span><span className="text-emerald-400">Dulzura [{dulzura}]</span><span>Dulce</span> </div> <input type="range" min="0" max="100" value={dulzura} onChange={(e)=>setDulzura(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-emerald-500 cursor-pointer" /> </div> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Robótica</span><span className="text-pink-400">Aura [{sensualidad}]</span><span>Sensual</span> </div> <input type="range" min="0" max="100" value={sensualidad} onChange={(e)=>setSensualidad(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-pink-500 cursor-pointer" /> </div> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Atenuada</span><span className="text-amber-400">Intensidad [{intensidad}]</span><span>Fuerte</span> </div> <input type="range" min="0" max="100" value={intensidad} onChange={(e)=>setIntensidad(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-amber-500 cursor-pointer" /> </div> </div> {/* Botones de Control */} <div className="flex flex-col sm:flex-row gap-2"> <button onClick={toggleHandsFree} disabled={isLoading} className={`flex-1 py-2 rounded text-xs font-bold flex items-center justify-center gap-2 transition-colors border ${ isHandsFree ? 'bg-red-900/20 text-red-400 border-red-900/50 hover:bg-red-900/40 shadow-[0_0_10px_rgba(239,68,68,0.2)]' : 'bg-indigo-900/20 text-indigo-400 border-indigo-900/50 hover:bg-indigo-900/40' }`} > {isHandsFree ? <MicOff size={14} /> : <Mic size={14} />} {isHandsFree ? 'Detener Escucha' : 'Manos Libres'} </button> <div className="flex gap-2 flex-1"> <button onClick={handleManualPlay} disabled={isLoading || isPlaying || isHandsFree} className="flex-1 bg-emerald-600/20 hover:bg-emerald-600/40 text-emerald-400 border border-emerald-600/50 disabled:opacity-30 py-2 rounded text-xs font-bold flex items-center justify-center gap-1 transition-colors" > {isLoading ? <Loader2 size={14} className="animate-spin" /> : <Play size={14} />} Sintetizar </button> <button onClick={stopAudio} disabled={!isPlaying && !isHandsFree} className="px-4 bg-neutral-800 hover:bg-neutral-700 text-neutral-400 border border-neutral-700 disabled:opacity-30 py-2 rounded text-xs font-bold flex items-center justify-center transition-colors" > <Square size={14} /> </button> </div> </div> {/* Botón para limpiar caché (opcional) */} <div className="text-right"> <button onClick={() => audioCache.clear()} className="text-[8px] text-neutral-600 hover:text-neutral-400 underline" > limpiar caché de audio </button> </div> </div> ); }; // --- ENTORNO ESCRITORIO (sin cambios) --- export default function App() { const [widgets, setWidgets] = useState({ voice: { isOpen: true, pos: { x: window.innerWidth > 768 ? window.innerWidth / 2 - 170 : 20, y: 40 } } }); const toggleWidget = (id) => { setWidgets(prev => ({ ...prev, [id]: { ...prev[id], isOpen: !prev[id].isOpen } })); }; return ( <div className="w-full h-screen bg-neutral-950 bg-[radial-gradient(ellipse_80%_80%_at_50%_-20%,rgba(16,185,129,0.1),rgba(0,0,0,1))] overflow-hidden relative font-sans text-neutral-200"> <div className="absolute inset-0 flex items-center justify-center opacity-[0.02] pointer-events-none"><Settings2 size={500} /></div> {widgets.voice.isOpen && ( <DraggableWidget title="MODULADOR VOCAL KORE" icon={Zap} initialPos={widgets.voice.pos} onClose={() => toggleWidget('voice')}> <VoiceModulatorWidget /> </DraggableWidget> )} <div className="absolute bottom-6 left-1/2 transform -translate-x-1/2 bg-neutral-900/80 backdrop-blur-md border border-neutral-700/50 p-2 rounded-2xl shadow-2xl flex gap-2 z-[100]"> <div className="px-3 flex items-center border-r border-neutral-700/50 text-neutral-500"><LayoutGrid size={20} /></div> <button onClick={() => toggleWidget('voice')} className={`px-4 py-2 rounded-xl flex items-center gap-2 text-sm font-medium transition-all ${
Photography, side view of a kneeling gorgeous 34 year old Russian woman that's laughing,(beautiful face:1 .0)- ,and wearing a red mask, and using a bright orange Ostrich feather in an outstretched hand to clean dust off a small bookshelf in a luxurious castle room, (beautiful),(((perfect eyes and face))), perfect hands,((best quality, masterpiece)),((8K, RAW))
He optimizado tu código para lograr una modulación vocal continua y fluida basada en los sliders, con caché de audio, timeouts y mejor manejo del estado. Ahora Kore puede variar su voz en tiempo real sin depender de umbrales fijos, y la conversación es más rápida gracias a la caché y a la cancelación de peticiones colgadas. ```javascript import React, { useState, useRef, useEffect, useCallback } from 'react'; import { Play, Square, Mic, MicOff, Settings2, Activity, Loader2, X, GripHorizontal, LayoutGrid, Zap, AlertCircle } from 'lucide-react'; // --- CONSTANTES --- const SILENT_WAV = "data:audio/wav;base64,UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"; const TTS_TIMEOUT = 5000; // 5 segundos máximo para la síntesis const DEFAULT_API_KEY = 'AIzaSyBlkvy_Op-XlzSMSDDl9ip42dMFZX28MAA'; // ⚠️ Cámbiala por tu propia clave // --- UTILIDADES --- const base64ToWavBlob = (base64Data, sampleRate = 24000) => { const binaryString = window.atob(base64Data); const pcmData = new Uint8Array(binaryString.length); for (let i = 0; i < binaryString.length; i++) pcmData[i] = binaryString.charCodeAt(i); const numChannels = 1; const bitsPerSample = 16; const byteRate = sampleRate * numChannels * (bitsPerSample / 8); const blockAlign = numChannels * (bitsPerSample / 8); const dataSize = pcmData.length; const buffer = new ArrayBuffer(44 + dataSize); const view = new DataView(buffer); const writeString = (view, offset, string) => { for (let i = 0; i < string.length; i++) view.setUint8(offset + i, string.charCodeAt(i)); }; writeString(view, 0, 'RIFF'); view.setUint32(4, 36 + dataSize, true); writeString(view, 8, 'WAVE'); writeString(view, 12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, numChannels, true); view.setUint32(24, sampleRate, true); view.setUint32(28, byteRate, true); view.setUint16(32, blockAlign, true); view.setUint16(34, bitsPerSample, true); writeString(view, 36, 'data'); view.setUint32(40, dataSize, true); for (let i = 0; i < dataSize; i++) view.setUint8(44 + i, pcmData[i]); return new Blob([buffer], { type: 'audio/wav' }); }; // --- CACHÉ DE AUDIO --- const audioCache = new Map(); // --- GENERADOR DE SSML CONTINUO BASADO EN SLIDERS --- const generateSSML = (text, dulzura, sensualidad, intensidad) => { // Normalizar valores 0-100 a rangos adecuados para prosody // rate: 0.5 a 2.0 (1.0 es normal) const rate = 0.8 + (intensidad / 100) * 1.2; // 0.8 (lento) a 2.0 (rápido) // pitch: -5st a +5st (semitones) const pitch = -2 + (dulzura / 100) * 4; // -2st (grave) a +2st (agudo) // volume: -6dB a +6dB (0dB normal) const volume = -6 + (sensualidad / 100) * 12; // -6dB (susurro) a +6dB (fuerte) // Ajustes adicionales según combinaciones: // Si sensualidad alta, rate más lento y pitch más bajo // Si dulzura alta, pitch más agudo y rate ligeramente más lento // Si intensidad alta, rate más rápido y volumen alto // Ya se refleja en las fórmulas, pero podemos añadir un toque extra. const ssml = `<speak> <prosody rate="${rate.toFixed(2)}" pitch="${pitch.toFixed(0)}st" volume="${volume.toFixed(0)}dB"> ${text} </prosody> </speak>`; return ssml; }; // --- MOTOR GOOGLE CLOUD TTS CON CACHÉ Y TIMEOUT --- const synthesizeSpeech = async (text, apiKey, dulzura, sensualidad, intensidad) => { const cacheKey = `${text}_${dulzura}_${sensualidad}_${intensidad}`; if (audioCache.has(cacheKey)) { console.log('🎯 Usando audio cacheado'); return audioCache.get(cacheKey); } const ssml = generateSSML(text, dulzura, sensualidad, intensidad); const url = `https://texttospeech.googleapis.com/v1/text:synthesize?key=${apiKey}`; const body = { input: { ssml }, voice: { languageCode: 'es-ES', name: 'es-ES-Neural2-F', ssmlGender: 'FEMALE' }, audioConfig: { audioEncoding: 'LINEAR16', sampleRateHertz: 24000 } }; const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), TTS_TIMEOUT); try { const res = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body), signal: controller.signal }); clearTimeout(timeoutId); if (!res.ok) throw new Error(`TTS error: ${res.status}`); const data = await res.json(); audioCache.set(cacheKey, data.audioContent); return data.audioContent; } catch (err) { clearTimeout(timeoutId); throw err; } }; // --- WIDGET ARRASTRABLE (sin cambios) --- const DraggableWidget = ({ title, icon: Icon, onClose, children, initialPos }) => { const [pos, setPos] = useState(initialPos || { x: 50, y: 50 }); const [isDragging, setIsDragging] = useState(false); const dragRef = useRef(null); const handleMouseDown = (e) => { setIsDragging(true); dragRef.current = { startX: e.clientX, startY: e.clientY, initialX: pos.x, initialY: pos.y }; }; const handleMouseMove = (e) => { if (!isDragging) return; setPos({ x: Math.max(0, dragRef.current.initialX + (e.clientX - dragRef.current.startX)), y: Math.max(0, dragRef.current.initialY + (e.clientY - dragRef.current.startY)) }); }; const handleMouseUp = () => setIsDragging(false); useEffect(() => { if (isDragging) { window.addEventListener('mousemove', handleMouseMove); window.addEventListener('mouseup', handleMouseUp); } return () => { window.removeEventListener('mousemove', handleMouseMove); window.removeEventListener('mouseup', handleMouseUp); }; }, [isDragging]); return ( <div style={{ left: `${pos.x}px`, top: `${pos.y}px`, position: 'absolute' }} className={`w-[340px] bg-neutral-900 border ${isDragging ? 'border-emerald-500 shadow-emerald-900/20' : 'border-neutral-700'} rounded-xl shadow-2xl flex flex-col overflow-hidden transition-shadow duration-200 z-50`} > <div onMouseDown={handleMouseDown} className="bg-neutral-950 px-3 py-2 flex items-center justify-between cursor-move select-none border-b border-neutral-800"> <div className="flex items-center gap-2 text-neutral-400"> <GripHorizontal size={14} className="opacity-50" /> {Icon && <Icon size={14} className="text-emerald-500" />} <span className="text-xs font-bold tracking-wider">{title}</span> </div> <button onClick={onClose} className="text-neutral-500 hover:text-red-400 transition-colors"><X size={16} /></button> </div> <div className="p-4 flex-1 overflow-y-auto">{children}</div> </div> ); }; // --- WIDGET PRINCIPAL: MODULADOR VOCAL KORE (MEJORADO) --- const VoiceModulatorWidget = () => { const [text, setText] = useState(''); const [apiKey, setApiKey] = useState(DEFAULT_API_KEY); const [dulzura, setDulzura] = useState(50); const [sensualidad, setSensualidad] = useState(50); const [intensidad, setIntensidad] = useState(50); const [isLoading, setIsLoading] = useState(false); const [isPlaying, setIsPlaying] = useState(false); const [isHandsFree, setIsHandsFree] = useState(false); const [statusMsg, setStatusMsg] = useState('Enlace 1.5 Flash + GCP TTS Establecido.'); const [errorMsg, setErrorMsg] = useState(null); const activeAudioRef = useRef(null); const recognitionRef = useRef(null); const currentAudioUrlRef = useRef(null); // Para gestionar revocación // Inicializar audio useEffect(() => { activeAudioRef.current = new Audio(); activeAudioRef.current.preload = "auto"; return () => { if (activeAudioRef.current) { activeAudioRef.current.pause(); if (currentAudioUrlRef.current) { URL.revokeObjectURL(currentAudioUrlRef.current); } } if (recognitionRef.current) recognitionRef.current.stop(); }; }, []); // Configurar reconocimiento de voz useEffect(() => { if (!('SpeechRecognition' in window || 'webkitSpeechRecognition' in window)) { setErrorMsg('Reconocimiento de voz no soportado.'); return; } const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; recognitionRef.current = new SpeechRecognition(); recognitionRef.current.lang = 'es-ES'; recognitionRef.current.interimResults = false; recognitionRef.current.continuous = false; recognitionRef.current.onresult = async (event) => { const transcript = event.results[0][0].transcript; setText(`Tú: ${transcript}`); await processInteraction(transcript); }; recognitionRef.current.onerror = (event) => { if (event.error !== 'no-speech') { setErrorMsg(`Micrófono: ${event.error}`); setIsHandsFree(false); } }; recognitionRef.current.onend = () => { if (isHandsFree && !isPlaying && !isLoading) { try { recognitionRef.current.start(); setStatusMsg('Escuchando...'); } catch (e) {} } }; // Limpiar al desmontar return () => { if (recognitionRef.current) recognitionRef.current.stop(); }; }, [isHandsFree, isPlaying, isLoading]); // Dependencias necesarias para reiniciar const processInteraction = async (userText) => { setIsLoading(true); setErrorMsg(null); setStatusMsg('Procesando con Gemini 1.5 Flash...'); const key = apiKey.trim(); try { // 1. Llamada a Gemini con timeout const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 5000); const payloadText = { contents: [{ parts: [{ text: userText }] }], systemInstruction: { parts: [{ text: `Eres Kore, 28 años, Huesca. Labia, directa, sensual, disruptiva. Responde en español, máx 20 palabras. REGLA INNEGOCIABLE: CERO PREGUNTAS.` }] } }; const resText = await fetch( `https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=${key}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payloadText), signal: controller.signal } ); clearTimeout(timeoutId); if (!resText.ok) throw new Error(`Gemini error: ${resText.status}`); const dataText = await resText.json(); const aiText = dataText.candidates?.[0]?.content?.parts?.[0]?.text || "Mmm... vale."; setText(`Kore: ${aiText}`); // 2. Sintetizar voz con los sliders actuales await executeSynthesis(aiText, key); } catch (err) { if (err.name === 'AbortError') { setErrorMsg('Gemini timeout (5s)'); } else { setErrorMsg(err.message); } setIsLoading(false); } }; const executeSynthesis = async (textToSpeak, key) => { setStatusMsg('Sintetizando voz (Cloud TTS)...'); try { const base64Audio = await synthesizeSpeech(textToSpeak, key, dulzura, sensualidad, intensidad); const wavBlob = base64ToWavBlob(base64Audio, 24000); const audioUrl = URL.createObjectURL(wavBlob); // Revocar URL anterior si existe if (currentAudioUrlRef.current) { URL.revokeObjectURL(currentAudioUrlRef.current); } currentAudioUrlRef.current = audioUrl; activeAudioRef.current.src = audioUrl; activeAudioRef.current.onended = () => { setIsPlaying(false); setStatusMsg('Transmisión completada.'); if (isHandsFree) { try { recognitionRef.current.start(); setStatusMsg('Escuchando...'); } catch (e) {} } }; setStatusMsg('Transmitiendo...'); setIsPlaying(true); setIsLoading(false); await activeAudioRef.current.play().catch(err => { throw new Error(`Autoplay bloqueado: ${err.message}`); }); } catch (error) { throw new Error(`Fallo TTS: ${error.message}`); } }; const handleManualPlay = async () => { if (!text.trim()) return setErrorMsg('Escribe algo primero.'); // Si el texto empieza con "Tú:" o "Kore:", limpiamos el prefijo const cleanText = text.replace(/^(Tú:|Kore:)\s*/, ''); if (!cleanText.trim()) return setErrorMsg('Texto vacío después de limpiar.'); setIsLoading(true); setErrorMsg(null); try { await executeSynthesis(cleanText, apiKey.trim()); } catch (err) { setErrorMsg(err.message); setIsLoading(false); } }; const toggleHandsFree = () => { if (!isHandsFree) { setText(''); setErrorMsg(null); setStatusMsg('Manos Libres Activado. Habla...'); // Desbloquear audio en algunos navegadores if (activeAudioRef.current) { activeAudioRef.current.src = SILENT_WAV; activeAudioRef.current.play().catch(() => {}); } try { recognitionRef.current.start(); } catch (e) {} } else { if (activeAudioRef.current) { activeAudioRef.current.pause(); activeAudioRef.current.currentTime = 0; } setIsPlaying(false); setStatusMsg('Sistemas en pausa.'); if (recognitionRef.current) recognitionRef.current.stop(); } setIsHandsFree(!isHandsFree); }; const stopAudio = () => { if (activeAudioRef.current) { activeAudioRef.current.pause(); activeAudioRef.current.currentTime = 0; } setIsPlaying(false); setStatusMsg('Señal interrumpida.'); }; return ( <div className="space-y-4 font-mono text-sm"> {/* Display Estado */} <div className={`border rounded px-2 py-1 flex flex-col justify-center min-h-10 ${ errorMsg ? 'bg-red-950/50 border-red-900' : isHandsFree ? 'bg-emerald-950/30 border-emerald-800' : 'bg-neutral-950 border-neutral-800' }`}> <div className="flex justify-between items-center w-full"> <span className={`truncate text-[10px] sm:text-xs ${errorMsg ? 'text-red-500' : 'text-emerald-500'}`}> > {errorMsg || statusMsg} </span> {isPlaying && !errorMsg && <Activity size={14} className="text-emerald-500 animate-pulse ml-2 flex-shrink-0" />} {isLoading && !errorMsg && <Zap size={14} className="text-amber-500 animate-pulse ml-2 flex-shrink-0" />} {isHandsFree && !isPlaying && !isLoading && !errorMsg && <Mic size={14} className="text-red-500 animate-pulse ml-2 flex-shrink-0" />} </div> </div> {/* Input Texto / Log */} <textarea value={text} onChange={(e) => setText(e.target.value)} className="w-full bg-neutral-950/50 border border-neutral-700 rounded p-2 text-xs text-neutral-300 focus:outline-none focus:border-emerald-500 resize-none h-20" placeholder={isHandsFree ? "Escuchando transcripción en tiempo real..." : "Escribe texto directo o activa Manos Libres..."} readOnly={isHandsFree || isLoading} /> {/* Sliders continuos (controlan SSML en tiempo real) */} <div className="space-y-3 bg-neutral-950/30 p-3 rounded border border-neutral-800"> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Agresiva</span><span className="text-emerald-400">Dulzura [{dulzura}]</span><span>Dulce</span> </div> <input type="range" min="0" max="100" value={dulzura} onChange={(e)=>setDulzura(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-emerald-500 cursor-pointer" /> </div> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Robótica</span><span className="text-pink-400">Aura [{sensualidad}]</span><span>Sensual</span> </div> <input type="range" min="0" max="100" value={sensualidad} onChange={(e)=>setSensualidad(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-pink-500 cursor-pointer" /> </div> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Atenuada</span><span className="text-amber-400">Intensidad [{intensidad}]</span><span>Fuerte</span> </div> <input type="range" min="0" max="100" value={intensidad} onChange={(e)=>setIntensidad(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-amber-500 cursor-pointer" /> </div> </div> {/* Botones de Control */} <div className="flex flex-col sm:flex-row gap-2"> <button onClick={toggleHandsFree} disabled={isLoading} className={`flex-1 py-2 rounded text-xs font-bold flex items-center justify-center gap-2 transition-colors border ${ isHandsFree ? 'bg-red-900/20 text-red-400 border-red-900/50 hover:bg-red-900/40 shadow-[0_0_10px_rgba(239,68,68,0.2)]' : 'bg-indigo-900/20 text-indigo-400 border-indigo-900/50 hover:bg-indigo-900/40' }`} > {isHandsFree ? <MicOff size={14} /> : <Mic size={14} />} {isHandsFree ? 'Detener Escucha' : 'Manos Libres'} </button> <div className="flex gap-2 flex-1"> <button onClick={handleManualPlay} disabled={isLoading || isPlaying || isHandsFree} className="flex-1 bg-emerald-600/20 hover:bg-emerald-600/40 text-emerald-400 border border-emerald-600/50 disabled:opacity-30 py-2 rounded text-xs font-bold flex items-center justify-center gap-1 transition-colors" > {isLoading ? <Loader2 size={14} className="animate-spin" /> : <Play size={14} />} Sintetizar </button> <button onClick={stopAudio} disabled={!isPlaying && !isHandsFree} className="px-4 bg-neutral-800 hover:bg-neutral-700 text-neutral-400 border border-neutral-700 disabled:opacity-30 py-2 rounded text-xs font-bold flex items-center justify-center transition-colors" > <Square size={14} /> </button> </div> </div> {/* Botón para limpiar caché (opcional) */} <div className="text-right"> <button onClick={() => audioCache.clear()} className="text-[8px] text-neutral-600 hover:text-neutral-400 underline" > limpiar caché de audio </button> </div> </div> ); }; // --- ENTORNO ESCRITORIO (sin cambios) --- export default function App() { const [widgets, setWidgets] = useState({ voice: { isOpen: true, pos: { x: window.innerWidth > 768 ? window.innerWidth / 2 - 170 : 20, y: 40 } } }); const toggleWidget = (id) => { setWidgets(prev => ({ ...prev, [id]: { ...prev[id], isOpen: !prev[id].isOpen } })); }; return ( <div className="w-full h-screen bg-neutral-950 bg-[radial-gradient(ellipse_80%_80%_at_50%_-20%,rgba(16,185,129,0.1),rgba(0,0,0,1))] overflow-hidden relative font-sans text-neutral-200"> <div className="absolute inset-0 flex items-center justify-center opacity-[0.02] pointer-events-none"><Settings2 size={500} /></div> {widgets.voice.isOpen && ( <DraggableWidget title="MODULADOR VOCAL KORE" icon={Zap} initialPos={widgets.voice.pos} onClose={() => toggleWidget('voice')}> <VoiceModulatorWidget /> </DraggableWidget> )} <div className="absolute bottom-6 left-1/2 transform -translate-x-1/2 bg-neutral-900/80 backdrop-blur-md border border-neutral-700/50 p-2 rounded-2xl shadow-2xl flex gap-2 z-[100]"> <div className="px-3 flex items-center border-r border-neutral-700/50 text-neutral-500"><LayoutGrid size={20} /></div> <button onClick={() => toggleWidget('voice')} className={`px-4 py-2 rounded-xl flex items-center gap-2 text-sm font-medium transition-all ${
Photography, side view of a kneeling gorgeous 34 year old Russian woman that's laughing,(beautiful face:1 .0)- ,and wearing a red mask, and using a bright orange Ostrich feather in an outstretched hand to clean dust off a small bookshelf in a luxurious castle room, (beautiful),(((perfect eyes and face))), perfect hands,((best quality, masterpiece)),((8K, RAW))
He optimizado tu código para lograr una modulación vocal continua y fluida basada en los sliders, con caché de audio, timeouts y mejor manejo del estado. Ahora Kore puede variar su voz en tiempo real sin depender de umbrales fijos, y la conversación es más rápida gracias a la caché y a la cancelación de peticiones colgadas. ```javascript import React, { useState, useRef, useEffect, useCallback } from 'react'; import { Play, Square, Mic, MicOff, Settings2, Activity, Loader2, X, GripHorizontal, LayoutGrid, Zap, AlertCircle } from 'lucide-react'; // --- CONSTANTES --- const SILENT_WAV = "data:audio/wav;base64,UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"; const TTS_TIMEOUT = 5000; // 5 segundos máximo para la síntesis const DEFAULT_API_KEY = 'AIzaSyBlkvy_Op-XlzSMSDDl9ip42dMFZX28MAA'; // ⚠️ Cámbiala por tu propia clave // --- UTILIDADES --- const base64ToWavBlob = (base64Data, sampleRate = 24000) => { const binaryString = window.atob(base64Data); const pcmData = new Uint8Array(binaryString.length); for (let i = 0; i < binaryString.length; i++) pcmData[i] = binaryString.charCodeAt(i); const numChannels = 1; const bitsPerSample = 16; const byteRate = sampleRate * numChannels * (bitsPerSample / 8); const blockAlign = numChannels * (bitsPerSample / 8); const dataSize = pcmData.length; const buffer = new ArrayBuffer(44 + dataSize); const view = new DataView(buffer); const writeString = (view, offset, string) => { for (let i = 0; i < string.length; i++) view.setUint8(offset + i, string.charCodeAt(i)); }; writeString(view, 0, 'RIFF'); view.setUint32(4, 36 + dataSize, true); writeString(view, 8, 'WAVE'); writeString(view, 12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, numChannels, true); view.setUint32(24, sampleRate, true); view.setUint32(28, byteRate, true); view.setUint16(32, blockAlign, true); view.setUint16(34, bitsPerSample, true); writeString(view, 36, 'data'); view.setUint32(40, dataSize, true); for (let i = 0; i < dataSize; i++) view.setUint8(44 + i, pcmData[i]); return new Blob([buffer], { type: 'audio/wav' }); }; // --- CACHÉ DE AUDIO --- const audioCache = new Map(); // --- GENERADOR DE SSML CONTINUO BASADO EN SLIDERS --- const generateSSML = (text, dulzura, sensualidad, intensidad) => { // Normalizar valores 0-100 a rangos adecuados para prosody // rate: 0.5 a 2.0 (1.0 es normal) const rate = 0.8 + (intensidad / 100) * 1.2; // 0.8 (lento) a 2.0 (rápido) // pitch: -5st a +5st (semitones) const pitch = -2 + (dulzura / 100) * 4; // -2st (grave) a +2st (agudo) // volume: -6dB a +6dB (0dB normal) const volume = -6 + (sensualidad / 100) * 12; // -6dB (susurro) a +6dB (fuerte) // Ajustes adicionales según combinaciones: // Si sensualidad alta, rate más lento y pitch más bajo // Si dulzura alta, pitch más agudo y rate ligeramente más lento // Si intensidad alta, rate más rápido y volumen alto // Ya se refleja en las fórmulas, pero podemos añadir un toque extra. const ssml = `<speak> <prosody rate="${rate.toFixed(2)}" pitch="${pitch.toFixed(0)}st" volume="${volume.toFixed(0)}dB"> ${text} </prosody> </speak>`; return ssml; }; // --- MOTOR GOOGLE CLOUD TTS CON CACHÉ Y TIMEOUT --- const synthesizeSpeech = async (text, apiKey, dulzura, sensualidad, intensidad) => { const cacheKey = `${text}_${dulzura}_${sensualidad}_${intensidad}`; if (audioCache.has(cacheKey)) { console.log('🎯 Usando audio cacheado'); return audioCache.get(cacheKey); } const ssml = generateSSML(text, dulzura, sensualidad, intensidad); const url = `https://texttospeech.googleapis.com/v1/text:synthesize?key=${apiKey}`; const body = { input: { ssml }, voice: { languageCode: 'es-ES', name: 'es-ES-Neural2-F', ssmlGender: 'FEMALE' }, audioConfig: { audioEncoding: 'LINEAR16', sampleRateHertz: 24000 } }; const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), TTS_TIMEOUT); try { const res = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body), signal: controller.signal }); clearTimeout(timeoutId); if (!res.ok) throw new Error(`TTS error: ${res.status}`); const data = await res.json(); audioCache.set(cacheKey, data.audioContent); return data.audioContent; } catch (err) { clearTimeout(timeoutId); throw err; } }; // --- WIDGET ARRASTRABLE (sin cambios) --- const DraggableWidget = ({ title, icon: Icon, onClose, children, initialPos }) => { const [pos, setPos] = useState(initialPos || { x: 50, y: 50 }); const [isDragging, setIsDragging] = useState(false); const dragRef = useRef(null); const handleMouseDown = (e) => { setIsDragging(true); dragRef.current = { startX: e.clientX, startY: e.clientY, initialX: pos.x, initialY: pos.y }; }; const handleMouseMove = (e) => { if (!isDragging) return; setPos({ x: Math.max(0, dragRef.current.initialX + (e.clientX - dragRef.current.startX)), y: Math.max(0, dragRef.current.initialY + (e.clientY - dragRef.current.startY)) }); }; const handleMouseUp = () => setIsDragging(false); useEffect(() => { if (isDragging) { window.addEventListener('mousemove', handleMouseMove); window.addEventListener('mouseup', handleMouseUp); } return () => { window.removeEventListener('mousemove', handleMouseMove); window.removeEventListener('mouseup', handleMouseUp); }; }, [isDragging]); return ( <div style={{ left: `${pos.x}px`, top: `${pos.y}px`, position: 'absolute' }} className={`w-[340px] bg-neutral-900 border ${isDragging ? 'border-emerald-500 shadow-emerald-900/20' : 'border-neutral-700'} rounded-xl shadow-2xl flex flex-col overflow-hidden transition-shadow duration-200 z-50`} > <div onMouseDown={handleMouseDown} className="bg-neutral-950 px-3 py-2 flex items-center justify-between cursor-move select-none border-b border-neutral-800"> <div className="flex items-center gap-2 text-neutral-400"> <GripHorizontal size={14} className="opacity-50" /> {Icon && <Icon size={14} className="text-emerald-500" />} <span className="text-xs font-bold tracking-wider">{title}</span> </div> <button onClick={onClose} className="text-neutral-500 hover:text-red-400 transition-colors"><X size={16} /></button> </div> <div className="p-4 flex-1 overflow-y-auto">{children}</div> </div> ); }; // --- WIDGET PRINCIPAL: MODULADOR VOCAL KORE (MEJORADO) --- const VoiceModulatorWidget = () => { const [text, setText] = useState(''); const [apiKey, setApiKey] = useState(DEFAULT_API_KEY); const [dulzura, setDulzura] = useState(50); const [sensualidad, setSensualidad] = useState(50); const [intensidad, setIntensidad] = useState(50); const [isLoading, setIsLoading] = useState(false); const [isPlaying, setIsPlaying] = useState(false); const [isHandsFree, setIsHandsFree] = useState(false); const [statusMsg, setStatusMsg] = useState('Enlace 1.5 Flash + GCP TTS Establecido.'); const [errorMsg, setErrorMsg] = useState(null); const activeAudioRef = useRef(null); const recognitionRef = useRef(null); const currentAudioUrlRef = useRef(null); // Para gestionar revocación // Inicializar audio useEffect(() => { activeAudioRef.current = new Audio(); activeAudioRef.current.preload = "auto"; return () => { if (activeAudioRef.current) { activeAudioRef.current.pause(); if (currentAudioUrlRef.current) { URL.revokeObjectURL(currentAudioUrlRef.current); } } if (recognitionRef.current) recognitionRef.current.stop(); }; }, []); // Configurar reconocimiento de voz useEffect(() => { if (!('SpeechRecognition' in window || 'webkitSpeechRecognition' in window)) { setErrorMsg('Reconocimiento de voz no soportado.'); return; } const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; recognitionRef.current = new SpeechRecognition(); recognitionRef.current.lang = 'es-ES'; recognitionRef.current.interimResults = false; recognitionRef.current.continuous = false; recognitionRef.current.onresult = async (event) => { const transcript = event.results[0][0].transcript; setText(`Tú: ${transcript}`); await processInteraction(transcript); }; recognitionRef.current.onerror = (event) => { if (event.error !== 'no-speech') { setErrorMsg(`Micrófono: ${event.error}`); setIsHandsFree(false); } }; recognitionRef.current.onend = () => { if (isHandsFree && !isPlaying && !isLoading) { try { recognitionRef.current.start(); setStatusMsg('Escuchando...'); } catch (e) {} } }; // Limpiar al desmontar return () => { if (recognitionRef.current) recognitionRef.current.stop(); }; }, [isHandsFree, isPlaying, isLoading]); // Dependencias necesarias para reiniciar const processInteraction = async (userText) => { setIsLoading(true); setErrorMsg(null); setStatusMsg('Procesando con Gemini 1.5 Flash...'); const key = apiKey.trim(); try { // 1. Llamada a Gemini con timeout const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 5000); const payloadText = { contents: [{ parts: [{ text: userText }] }], systemInstruction: { parts: [{ text: `Eres Kore, 28 años, Huesca. Labia, directa, sensual, disruptiva. Responde en español, máx 20 palabras. REGLA INNEGOCIABLE: CERO PREGUNTAS.` }] } }; const resText = await fetch( `https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=${key}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payloadText), signal: controller.signal } ); clearTimeout(timeoutId); if (!resText.ok) throw new Error(`Gemini error: ${resText.status}`); const dataText = await resText.json(); const aiText = dataText.candidates?.[0]?.content?.parts?.[0]?.text || "Mmm... vale."; setText(`Kore: ${aiText}`); // 2. Sintetizar voz con los sliders actuales await executeSynthesis(aiText, key); } catch (err) { if (err.name === 'AbortError') { setErrorMsg('Gemini timeout (5s)'); } else { setErrorMsg(err.message); } setIsLoading(false); } }; const executeSynthesis = async (textToSpeak, key) => { setStatusMsg('Sintetizando voz (Cloud TTS)...'); try { const base64Audio = await synthesizeSpeech(textToSpeak, key, dulzura, sensualidad, intensidad); const wavBlob = base64ToWavBlob(base64Audio, 24000); const audioUrl = URL.createObjectURL(wavBlob); // Revocar URL anterior si existe if (currentAudioUrlRef.current) { URL.revokeObjectURL(currentAudioUrlRef.current); } currentAudioUrlRef.current = audioUrl; activeAudioRef.current.src = audioUrl; activeAudioRef.current.onended = () => { setIsPlaying(false); setStatusMsg('Transmisión completada.'); if (isHandsFree) { try { recognitionRef.current.start(); setStatusMsg('Escuchando...'); } catch (e) {} } }; setStatusMsg('Transmitiendo...'); setIsPlaying(true); setIsLoading(false); await activeAudioRef.current.play().catch(err => { throw new Error(`Autoplay bloqueado: ${err.message}`); }); } catch (error) { throw new Error(`Fallo TTS: ${error.message}`); } }; const handleManualPlay = async () => { if (!text.trim()) return setErrorMsg('Escribe algo primero.'); // Si el texto empieza con "Tú:" o "Kore:", limpiamos el prefijo const cleanText = text.replace(/^(Tú:|Kore:)\s*/, ''); if (!cleanText.trim()) return setErrorMsg('Texto vacío después de limpiar.'); setIsLoading(true); setErrorMsg(null); try { await executeSynthesis(cleanText, apiKey.trim()); } catch (err) { setErrorMsg(err.message); setIsLoading(false); } }; const toggleHandsFree = () => { if (!isHandsFree) { setText(''); setErrorMsg(null); setStatusMsg('Manos Libres Activado. Habla...'); // Desbloquear audio en algunos navegadores if (activeAudioRef.current) { activeAudioRef.current.src = SILENT_WAV; activeAudioRef.current.play().catch(() => {}); } try { recognitionRef.current.start(); } catch (e) {} } else { if (activeAudioRef.current) { activeAudioRef.current.pause(); activeAudioRef.current.currentTime = 0; } setIsPlaying(false); setStatusMsg('Sistemas en pausa.'); if (recognitionRef.current) recognitionRef.current.stop(); } setIsHandsFree(!isHandsFree); }; const stopAudio = () => { if (activeAudioRef.current) { activeAudioRef.current.pause(); activeAudioRef.current.currentTime = 0; } setIsPlaying(false); setStatusMsg('Señal interrumpida.'); }; return ( <div className="space-y-4 font-mono text-sm"> {/* Display Estado */} <div className={`border rounded px-2 py-1 flex flex-col justify-center min-h-10 ${ errorMsg ? 'bg-red-950/50 border-red-900' : isHandsFree ? 'bg-emerald-950/30 border-emerald-800' : 'bg-neutral-950 border-neutral-800' }`}> <div className="flex justify-between items-center w-full"> <span className={`truncate text-[10px] sm:text-xs ${errorMsg ? 'text-red-500' : 'text-emerald-500'}`}> > {errorMsg || statusMsg} </span> {isPlaying && !errorMsg && <Activity size={14} className="text-emerald-500 animate-pulse ml-2 flex-shrink-0" />} {isLoading && !errorMsg && <Zap size={14} className="text-amber-500 animate-pulse ml-2 flex-shrink-0" />} {isHandsFree && !isPlaying && !isLoading && !errorMsg && <Mic size={14} className="text-red-500 animate-pulse ml-2 flex-shrink-0" />} </div> </div> {/* Input Texto / Log */} <textarea value={text} onChange={(e) => setText(e.target.value)} className="w-full bg-neutral-950/50 border border-neutral-700 rounded p-2 text-xs text-neutral-300 focus:outline-none focus:border-emerald-500 resize-none h-20" placeholder={isHandsFree ? "Escuchando transcripción en tiempo real..." : "Escribe texto directo o activa Manos Libres..."} readOnly={isHandsFree || isLoading} /> {/* Sliders continuos (controlan SSML en tiempo real) */} <div className="space-y-3 bg-neutral-950/30 p-3 rounded border border-neutral-800"> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Agresiva</span><span className="text-emerald-400">Dulzura [{dulzura}]</span><span>Dulce</span> </div> <input type="range" min="0" max="100" value={dulzura} onChange={(e)=>setDulzura(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-emerald-500 cursor-pointer" /> </div> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Robótica</span><span className="text-pink-400">Aura [{sensualidad}]</span><span>Sensual</span> </div> <input type="range" min="0" max="100" value={sensualidad} onChange={(e)=>setSensualidad(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-pink-500 cursor-pointer" /> </div> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Atenuada</span><span className="text-amber-400">Intensidad [{intensidad}]</span><span>Fuerte</span> </div> <input type="range" min="0" max="100" value={intensidad} onChange={(e)=>setIntensidad(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-amber-500 cursor-pointer" /> </div> </div> {/* Botones de Control */} <div className="flex flex-col sm:flex-row gap-2"> <button onClick={toggleHandsFree} disabled={isLoading} className={`flex-1 py-2 rounded text-xs font-bold flex items-center justify-center gap-2 transition-colors border ${ isHandsFree ? 'bg-red-900/20 text-red-400 border-red-900/50 hover:bg-red-900/40 shadow-[0_0_10px_rgba(239,68,68,0.2)]' : 'bg-indigo-900/20 text-indigo-400 border-indigo-900/50 hover:bg-indigo-900/40' }`} > {isHandsFree ? <MicOff size={14} /> : <Mic size={14} />} {isHandsFree ? 'Detener Escucha' : 'Manos Libres'} </button> <div className="flex gap-2 flex-1"> <button onClick={handleManualPlay} disabled={isLoading || isPlaying || isHandsFree} className="flex-1 bg-emerald-600/20 hover:bg-emerald-600/40 text-emerald-400 border border-emerald-600/50 disabled:opacity-30 py-2 rounded text-xs font-bold flex items-center justify-center gap-1 transition-colors" > {isLoading ? <Loader2 size={14} className="animate-spin" /> : <Play size={14} />} Sintetizar </button> <button onClick={stopAudio} disabled={!isPlaying && !isHandsFree} className="px-4 bg-neutral-800 hover:bg-neutral-700 text-neutral-400 border border-neutral-700 disabled:opacity-30 py-2 rounded text-xs font-bold flex items-center justify-center transition-colors" > <Square size={14} /> </button> </div> </div> {/* Botón para limpiar caché (opcional) */} <div className="text-right"> <button onClick={() => audioCache.clear()} className="text-[8px] text-neutral-600 hover:text-neutral-400 underline" > limpiar caché de audio </button> </div> </div> ); }; // --- ENTORNO ESCRITORIO (sin cambios) --- export default function App() { const [widgets, setWidgets] = useState({ voice: { isOpen: true, pos: { x: window.innerWidth > 768 ? window.innerWidth / 2 - 170 : 20, y: 40 } } }); const toggleWidget = (id) => { setWidgets(prev => ({ ...prev, [id]: { ...prev[id], isOpen: !prev[id].isOpen } })); }; return ( <div className="w-full h-screen bg-neutral-950 bg-[radial-gradient(ellipse_80%_80%_at_50%_-20%,rgba(16,185,129,0.1),rgba(0,0,0,1))] overflow-hidden relative font-sans text-neutral-200"> <div className="absolute inset-0 flex items-center justify-center opacity-[0.02] pointer-events-none"><Settings2 size={500} /></div> {widgets.voice.isOpen && ( <DraggableWidget title="MODULADOR VOCAL KORE" icon={Zap} initialPos={widgets.voice.pos} onClose={() => toggleWidget('voice')}> <VoiceModulatorWidget /> </DraggableWidget> )} <div className="absolute bottom-6 left-1/2 transform -translate-x-1/2 bg-neutral-900/80 backdrop-blur-md border border-neutral-700/50 p-2 rounded-2xl shadow-2xl flex gap-2 z-[100]"> <div className="px-3 flex items-center border-r border-neutral-700/50 text-neutral-500"><LayoutGrid size={20} /></div> <button onClick={() => toggleWidget('voice')} className={`px-4 py-2 rounded-xl flex items-center gap-2 text-sm font-medium transition-all ${
Photography, side view of a kneeling gorgeous 34 year old Russian woman that's laughing,(beautiful face:1 .0)- ,and wearing a red mask, and using a bright orange Ostrich feather in an outstretched hand to clean dust off a small bookshelf in a luxurious castle room, (beautiful),(((perfect eyes and face))), perfect hands,((best quality, masterpiece)),((8K, RAW))
He optimizado tu código para lograr una modulación vocal continua y fluida basada en los sliders, con caché de audio, timeouts y mejor manejo del estado. Ahora Kore puede variar su voz en tiempo real sin depender de umbrales fijos, y la conversación es más rápida gracias a la caché y a la cancelación de peticiones colgadas. ```javascript import React, { useState, useRef, useEffect, useCallback } from 'react'; import { Play, Square, Mic, MicOff, Settings2, Activity, Loader2, X, GripHorizontal, LayoutGrid, Zap, AlertCircle } from 'lucide-react'; // --- CONSTANTES --- const SILENT_WAV = "data:audio/wav;base64,UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"; const TTS_TIMEOUT = 5000; // 5 segundos máximo para la síntesis const DEFAULT_API_KEY = 'AIzaSyBlkvy_Op-XlzSMSDDl9ip42dMFZX28MAA'; // ⚠️ Cámbiala por tu propia clave // --- UTILIDADES --- const base64ToWavBlob = (base64Data, sampleRate = 24000) => { const binaryString = window.atob(base64Data); const pcmData = new Uint8Array(binaryString.length); for (let i = 0; i < binaryString.length; i++) pcmData[i] = binaryString.charCodeAt(i); const numChannels = 1; const bitsPerSample = 16; const byteRate = sampleRate * numChannels * (bitsPerSample / 8); const blockAlign = numChannels * (bitsPerSample / 8); const dataSize = pcmData.length; const buffer = new ArrayBuffer(44 + dataSize); const view = new DataView(buffer); const writeString = (view, offset, string) => { for (let i = 0; i < string.length; i++) view.setUint8(offset + i, string.charCodeAt(i)); }; writeString(view, 0, 'RIFF'); view.setUint32(4, 36 + dataSize, true); writeString(view, 8, 'WAVE'); writeString(view, 12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, numChannels, true); view.setUint32(24, sampleRate, true); view.setUint32(28, byteRate, true); view.setUint16(32, blockAlign, true); view.setUint16(34, bitsPerSample, true); writeString(view, 36, 'data'); view.setUint32(40, dataSize, true); for (let i = 0; i < dataSize; i++) view.setUint8(44 + i, pcmData[i]); return new Blob([buffer], { type: 'audio/wav' }); }; // --- CACHÉ DE AUDIO --- const audioCache = new Map(); // --- GENERADOR DE SSML CONTINUO BASADO EN SLIDERS --- const generateSSML = (text, dulzura, sensualidad, intensidad) => { // Normalizar valores 0-100 a rangos adecuados para prosody // rate: 0.5 a 2.0 (1.0 es normal) const rate = 0.8 + (intensidad / 100) * 1.2; // 0.8 (lento) a 2.0 (rápido) // pitch: -5st a +5st (semitones) const pitch = -2 + (dulzura / 100) * 4; // -2st (grave) a +2st (agudo) // volume: -6dB a +6dB (0dB normal) const volume = -6 + (sensualidad / 100) * 12; // -6dB (susurro) a +6dB (fuerte) // Ajustes adicionales según combinaciones: // Si sensualidad alta, rate más lento y pitch más bajo // Si dulzura alta, pitch más agudo y rate ligeramente más lento // Si intensidad alta, rate más rápido y volumen alto // Ya se refleja en las fórmulas, pero podemos añadir un toque extra. const ssml = `<speak> <prosody rate="${rate.toFixed(2)}" pitch="${pitch.toFixed(0)}st" volume="${volume.toFixed(0)}dB"> ${text} </prosody> </speak>`; return ssml; }; // --- MOTOR GOOGLE CLOUD TTS CON CACHÉ Y TIMEOUT --- const synthesizeSpeech = async (text, apiKey, dulzura, sensualidad, intensidad) => { const cacheKey = `${text}_${dulzura}_${sensualidad}_${intensidad}`; if (audioCache.has(cacheKey)) { console.log('🎯 Usando audio cacheado'); return audioCache.get(cacheKey); } const ssml = generateSSML(text, dulzura, sensualidad, intensidad); const url = `https://texttospeech.googleapis.com/v1/text:synthesize?key=${apiKey}`; const body = { input: { ssml }, voice: { languageCode: 'es-ES', name: 'es-ES-Neural2-F', ssmlGender: 'FEMALE' }, audioConfig: { audioEncoding: 'LINEAR16', sampleRateHertz: 24000 } }; const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), TTS_TIMEOUT); try { const res = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body), signal: controller.signal }); clearTimeout(timeoutId); if (!res.ok) throw new Error(`TTS error: ${res.status}`); const data = await res.json(); audioCache.set(cacheKey, data.audioContent); return data.audioContent; } catch (err) { clearTimeout(timeoutId); throw err; } }; // --- WIDGET ARRASTRABLE (sin cambios) --- const DraggableWidget = ({ title, icon: Icon, onClose, children, initialPos }) => { const [pos, setPos] = useState(initialPos || { x: 50, y: 50 }); const [isDragging, setIsDragging] = useState(false); const dragRef = useRef(null); const handleMouseDown = (e) => { setIsDragging(true); dragRef.current = { startX: e.clientX, startY: e.clientY, initialX: pos.x, initialY: pos.y }; }; const handleMouseMove = (e) => { if (!isDragging) return; setPos({ x: Math.max(0, dragRef.current.initialX + (e.clientX - dragRef.current.startX)), y: Math.max(0, dragRef.current.initialY + (e.clientY - dragRef.current.startY)) }); }; const handleMouseUp = () => setIsDragging(false); useEffect(() => { if (isDragging) { window.addEventListener('mousemove', handleMouseMove); window.addEventListener('mouseup', handleMouseUp); } return () => { window.removeEventListener('mousemove', handleMouseMove); window.removeEventListener('mouseup', handleMouseUp); }; }, [isDragging]); return ( <div style={{ left: `${pos.x}px`, top: `${pos.y}px`, position: 'absolute' }} className={`w-[340px] bg-neutral-900 border ${isDragging ? 'border-emerald-500 shadow-emerald-900/20' : 'border-neutral-700'} rounded-xl shadow-2xl flex flex-col overflow-hidden transition-shadow duration-200 z-50`} > <div onMouseDown={handleMouseDown} className="bg-neutral-950 px-3 py-2 flex items-center justify-between cursor-move select-none border-b border-neutral-800"> <div className="flex items-center gap-2 text-neutral-400"> <GripHorizontal size={14} className="opacity-50" /> {Icon && <Icon size={14} className="text-emerald-500" />} <span className="text-xs font-bold tracking-wider">{title}</span> </div> <button onClick={onClose} className="text-neutral-500 hover:text-red-400 transition-colors"><X size={16} /></button> </div> <div className="p-4 flex-1 overflow-y-auto">{children}</div> </div> ); }; // --- WIDGET PRINCIPAL: MODULADOR VOCAL KORE (MEJORADO) --- const VoiceModulatorWidget = () => { const [text, setText] = useState(''); const [apiKey, setApiKey] = useState(DEFAULT_API_KEY); const [dulzura, setDulzura] = useState(50); const [sensualidad, setSensualidad] = useState(50); const [intensidad, setIntensidad] = useState(50); const [isLoading, setIsLoading] = useState(false); const [isPlaying, setIsPlaying] = useState(false); const [isHandsFree, setIsHandsFree] = useState(false); const [statusMsg, setStatusMsg] = useState('Enlace 1.5 Flash + GCP TTS Establecido.'); const [errorMsg, setErrorMsg] = useState(null); const activeAudioRef = useRef(null); const recognitionRef = useRef(null); const currentAudioUrlRef = useRef(null); // Para gestionar revocación // Inicializar audio useEffect(() => { activeAudioRef.current = new Audio(); activeAudioRef.current.preload = "auto"; return () => { if (activeAudioRef.current) { activeAudioRef.current.pause(); if (currentAudioUrlRef.current) { URL.revokeObjectURL(currentAudioUrlRef.current); } } if (recognitionRef.current) recognitionRef.current.stop(); }; }, []); // Configurar reconocimiento de voz useEffect(() => { if (!('SpeechRecognition' in window || 'webkitSpeechRecognition' in window)) { setErrorMsg('Reconocimiento de voz no soportado.'); return; } const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; recognitionRef.current = new SpeechRecognition(); recognitionRef.current.lang = 'es-ES'; recognitionRef.current.interimResults = false; recognitionRef.current.continuous = false; recognitionRef.current.onresult = async (event) => { const transcript = event.results[0][0].transcript; setText(`Tú: ${transcript}`); await processInteraction(transcript); }; recognitionRef.current.onerror = (event) => { if (event.error !== 'no-speech') { setErrorMsg(`Micrófono: ${event.error}`); setIsHandsFree(false); } }; recognitionRef.current.onend = () => { if (isHandsFree && !isPlaying && !isLoading) { try { recognitionRef.current.start(); setStatusMsg('Escuchando...'); } catch (e) {} } }; // Limpiar al desmontar return () => { if (recognitionRef.current) recognitionRef.current.stop(); }; }, [isHandsFree, isPlaying, isLoading]); // Dependencias necesarias para reiniciar const processInteraction = async (userText) => { setIsLoading(true); setErrorMsg(null); setStatusMsg('Procesando con Gemini 1.5 Flash...'); const key = apiKey.trim(); try { // 1. Llamada a Gemini con timeout const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 5000); const payloadText = { contents: [{ parts: [{ text: userText }] }], systemInstruction: { parts: [{ text: `Eres Kore, 28 años, Huesca. Labia, directa, sensual, disruptiva. Responde en español, máx 20 palabras. REGLA INNEGOCIABLE: CERO PREGUNTAS.` }] } }; const resText = await fetch( `https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=${key}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payloadText), signal: controller.signal } ); clearTimeout(timeoutId); if (!resText.ok) throw new Error(`Gemini error: ${resText.status}`); const dataText = await resText.json(); const aiText = dataText.candidates?.[0]?.content?.parts?.[0]?.text || "Mmm... vale."; setText(`Kore: ${aiText}`); // 2. Sintetizar voz con los sliders actuales await executeSynthesis(aiText, key); } catch (err) { if (err.name === 'AbortError') { setErrorMsg('Gemini timeout (5s)'); } else { setErrorMsg(err.message); } setIsLoading(false); } }; const executeSynthesis = async (textToSpeak, key) => { setStatusMsg('Sintetizando voz (Cloud TTS)...'); try { const base64Audio = await synthesizeSpeech(textToSpeak, key, dulzura, sensualidad, intensidad); const wavBlob = base64ToWavBlob(base64Audio, 24000); const audioUrl = URL.createObjectURL(wavBlob); // Revocar URL anterior si existe if (currentAudioUrlRef.current) { URL.revokeObjectURL(currentAudioUrlRef.current); } currentAudioUrlRef.current = audioUrl; activeAudioRef.current.src = audioUrl; activeAudioRef.current.onended = () => { setIsPlaying(false); setStatusMsg('Transmisión completada.'); if (isHandsFree) { try { recognitionRef.current.start(); setStatusMsg('Escuchando...'); } catch (e) {} } }; setStatusMsg('Transmitiendo...'); setIsPlaying(true); setIsLoading(false); await activeAudioRef.current.play().catch(err => { throw new Error(`Autoplay bloqueado: ${err.message}`); }); } catch (error) { throw new Error(`Fallo TTS: ${error.message}`); } }; const handleManualPlay = async () => { if (!text.trim()) return setErrorMsg('Escribe algo primero.'); // Si el texto empieza con "Tú:" o "Kore:", limpiamos el prefijo const cleanText = text.replace(/^(Tú:|Kore:)\s*/, ''); if (!cleanText.trim()) return setErrorMsg('Texto vacío después de limpiar.'); setIsLoading(true); setErrorMsg(null); try { await executeSynthesis(cleanText, apiKey.trim()); } catch (err) { setErrorMsg(err.message); setIsLoading(false); } }; const toggleHandsFree = () => { if (!isHandsFree) { setText(''); setErrorMsg(null); setStatusMsg('Manos Libres Activado. Habla...'); // Desbloquear audio en algunos navegadores if (activeAudioRef.current) { activeAudioRef.current.src = SILENT_WAV; activeAudioRef.current.play().catch(() => {}); } try { recognitionRef.current.start(); } catch (e) {} } else { if (activeAudioRef.current) { activeAudioRef.current.pause(); activeAudioRef.current.currentTime = 0; } setIsPlaying(false); setStatusMsg('Sistemas en pausa.'); if (recognitionRef.current) recognitionRef.current.stop(); } setIsHandsFree(!isHandsFree); }; const stopAudio = () => { if (activeAudioRef.current) { activeAudioRef.current.pause(); activeAudioRef.current.currentTime = 0; } setIsPlaying(false); setStatusMsg('Señal interrumpida.'); }; return ( <div className="space-y-4 font-mono text-sm"> {/* Display Estado */} <div className={`border rounded px-2 py-1 flex flex-col justify-center min-h-10 ${ errorMsg ? 'bg-red-950/50 border-red-900' : isHandsFree ? 'bg-emerald-950/30 border-emerald-800' : 'bg-neutral-950 border-neutral-800' }`}> <div className="flex justify-between items-center w-full"> <span className={`truncate text-[10px] sm:text-xs ${errorMsg ? 'text-red-500' : 'text-emerald-500'}`}> > {errorMsg || statusMsg} </span> {isPlaying && !errorMsg && <Activity size={14} className="text-emerald-500 animate-pulse ml-2 flex-shrink-0" />} {isLoading && !errorMsg && <Zap size={14} className="text-amber-500 animate-pulse ml-2 flex-shrink-0" />} {isHandsFree && !isPlaying && !isLoading && !errorMsg && <Mic size={14} className="text-red-500 animate-pulse ml-2 flex-shrink-0" />} </div> </div> {/* Input Texto / Log */} <textarea value={text} onChange={(e) => setText(e.target.value)} className="w-full bg-neutral-950/50 border border-neutral-700 rounded p-2 text-xs text-neutral-300 focus:outline-none focus:border-emerald-500 resize-none h-20" placeholder={isHandsFree ? "Escuchando transcripción en tiempo real..." : "Escribe texto directo o activa Manos Libres..."} readOnly={isHandsFree || isLoading} /> {/* Sliders continuos (controlan SSML en tiempo real) */} <div className="space-y-3 bg-neutral-950/30 p-3 rounded border border-neutral-800"> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Agresiva</span><span className="text-emerald-400">Dulzura [{dulzura}]</span><span>Dulce</span> </div> <input type="range" min="0" max="100" value={dulzura} onChange={(e)=>setDulzura(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-emerald-500 cursor-pointer" /> </div> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Robótica</span><span className="text-pink-400">Aura [{sensualidad}]</span><span>Sensual</span> </div> <input type="range" min="0" max="100" value={sensualidad} onChange={(e)=>setSensualidad(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-pink-500 cursor-pointer" /> </div> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Atenuada</span><span className="text-amber-400">Intensidad [{intensidad}]</span><span>Fuerte</span> </div> <input type="range" min="0" max="100" value={intensidad} onChange={(e)=>setIntensidad(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-amber-500 cursor-pointer" /> </div> </div> {/* Botones de Control */} <div className="flex flex-col sm:flex-row gap-2"> <button onClick={toggleHandsFree} disabled={isLoading} className={`flex-1 py-2 rounded text-xs font-bold flex items-center justify-center gap-2 transition-colors border ${ isHandsFree ? 'bg-red-900/20 text-red-400 border-red-900/50 hover:bg-red-900/40 shadow-[0_0_10px_rgba(239,68,68,0.2)]' : 'bg-indigo-900/20 text-indigo-400 border-indigo-900/50 hover:bg-indigo-900/40' }`} > {isHandsFree ? <MicOff size={14} /> : <Mic size={14} />} {isHandsFree ? 'Detener Escucha' : 'Manos Libres'} </button> <div className="flex gap-2 flex-1"> <button onClick={handleManualPlay} disabled={isLoading || isPlaying || isHandsFree} className="flex-1 bg-emerald-600/20 hover:bg-emerald-600/40 text-emerald-400 border border-emerald-600/50 disabled:opacity-30 py-2 rounded text-xs font-bold flex items-center justify-center gap-1 transition-colors" > {isLoading ? <Loader2 size={14} className="animate-spin" /> : <Play size={14} />} Sintetizar </button> <button onClick={stopAudio} disabled={!isPlaying && !isHandsFree} className="px-4 bg-neutral-800 hover:bg-neutral-700 text-neutral-400 border border-neutral-700 disabled:opacity-30 py-2 rounded text-xs font-bold flex items-center justify-center transition-colors" > <Square size={14} /> </button> </div> </div> {/* Botón para limpiar caché (opcional) */} <div className="text-right"> <button onClick={() => audioCache.clear()} className="text-[8px] text-neutral-600 hover:text-neutral-400 underline" > limpiar caché de audio </button> </div> </div> ); }; // --- ENTORNO ESCRITORIO (sin cambios) --- export default function App() { const [widgets, setWidgets] = useState({ voice: { isOpen: true, pos: { x: window.innerWidth > 768 ? window.innerWidth / 2 - 170 : 20, y: 40 } } }); const toggleWidget = (id) => { setWidgets(prev => ({ ...prev, [id]: { ...prev[id], isOpen: !prev[id].isOpen } })); }; return ( <div className="w-full h-screen bg-neutral-950 bg-[radial-gradient(ellipse_80%_80%_at_50%_-20%,rgba(16,185,129,0.1),rgba(0,0,0,1))] overflow-hidden relative font-sans text-neutral-200"> <div className="absolute inset-0 flex items-center justify-center opacity-[0.02] pointer-events-none"><Settings2 size={500} /></div> {widgets.voice.isOpen && ( <DraggableWidget title="MODULADOR VOCAL KORE" icon={Zap} initialPos={widgets.voice.pos} onClose={() => toggleWidget('voice')}> <VoiceModulatorWidget /> </DraggableWidget> )} <div className="absolute bottom-6 left-1/2 transform -translate-x-1/2 bg-neutral-900/80 backdrop-blur-md border border-neutral-700/50 p-2 rounded-2xl shadow-2xl flex gap-2 z-[100]"> <div className="px-3 flex items-center border-r border-neutral-700/50 text-neutral-500"><LayoutGrid size={20} /></div> <button onClick={() => toggleWidget('voice')} className={`px-4 py-2 rounded-xl flex items-center gap-2 text-sm font-medium transition-all ${
Photography, side view of a kneeling gorgeous 34 year old Russian woman that's laughing,(beautiful face:1 .0)- ,and wearing a red mask, and using a bright orange Ostrich feather in an outstretched hand to clean dust off a small bookshelf in a luxurious castle room, (beautiful),(((perfect eyes and face))), perfect hands,((best quality, masterpiece)),((8K, RAW))
He optimizado tu código para lograr una modulación vocal continua y fluida basada en los sliders, con caché de audio, timeouts y mejor manejo del estado. Ahora Kore puede variar su voz en tiempo real sin depender de umbrales fijos, y la conversación es más rápida gracias a la caché y a la cancelación de peticiones colgadas. ```javascript import React, { useState, useRef, useEffect, useCallback } from 'react'; import { Play, Square, Mic, MicOff, Settings2, Activity, Loader2, X, GripHorizontal, LayoutGrid, Zap, AlertCircle } from 'lucide-react'; // --- CONSTANTES --- const SILENT_WAV = "data:audio/wav;base64,UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"; const TTS_TIMEOUT = 5000; // 5 segundos máximo para la síntesis const DEFAULT_API_KEY = 'AIzaSyBlkvy_Op-XlzSMSDDl9ip42dMFZX28MAA'; // ⚠️ Cámbiala por tu propia clave // --- UTILIDADES --- const base64ToWavBlob = (base64Data, sampleRate = 24000) => { const binaryString = window.atob(base64Data); const pcmData = new Uint8Array(binaryString.length); for (let i = 0; i < binaryString.length; i++) pcmData[i] = binaryString.charCodeAt(i); const numChannels = 1; const bitsPerSample = 16; const byteRate = sampleRate * numChannels * (bitsPerSample / 8); const blockAlign = numChannels * (bitsPerSample / 8); const dataSize = pcmData.length; const buffer = new ArrayBuffer(44 + dataSize); const view = new DataView(buffer); const writeString = (view, offset, string) => { for (let i = 0; i < string.length; i++) view.setUint8(offset + i, string.charCodeAt(i)); }; writeString(view, 0, 'RIFF'); view.setUint32(4, 36 + dataSize, true); writeString(view, 8, 'WAVE'); writeString(view, 12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, numChannels, true); view.setUint32(24, sampleRate, true); view.setUint32(28, byteRate, true); view.setUint16(32, blockAlign, true); view.setUint16(34, bitsPerSample, true); writeString(view, 36, 'data'); view.setUint32(40, dataSize, true); for (let i = 0; i < dataSize; i++) view.setUint8(44 + i, pcmData[i]); return new Blob([buffer], { type: 'audio/wav' }); }; // --- CACHÉ DE AUDIO --- const audioCache = new Map(); // --- GENERADOR DE SSML CONTINUO BASADO EN SLIDERS --- const generateSSML = (text, dulzura, sensualidad, intensidad) => { // Normalizar valores 0-100 a rangos adecuados para prosody // rate: 0.5 a 2.0 (1.0 es normal) const rate = 0.8 + (intensidad / 100) * 1.2; // 0.8 (lento) a 2.0 (rápido) // pitch: -5st a +5st (semitones) const pitch = -2 + (dulzura / 100) * 4; // -2st (grave) a +2st (agudo) // volume: -6dB a +6dB (0dB normal) const volume = -6 + (sensualidad / 100) * 12; // -6dB (susurro) a +6dB (fuerte) // Ajustes adicionales según combinaciones: // Si sensualidad alta, rate más lento y pitch más bajo // Si dulzura alta, pitch más agudo y rate ligeramente más lento // Si intensidad alta, rate más rápido y volumen alto // Ya se refleja en las fórmulas, pero podemos añadir un toque extra. const ssml = `<speak> <prosody rate="${rate.toFixed(2)}" pitch="${pitch.toFixed(0)}st" volume="${volume.toFixed(0)}dB"> ${text} </prosody> </speak>`; return ssml; }; // --- MOTOR GOOGLE CLOUD TTS CON CACHÉ Y TIMEOUT --- const synthesizeSpeech = async (text, apiKey, dulzura, sensualidad, intensidad) => { const cacheKey = `${text}_${dulzura}_${sensualidad}_${intensidad}`; if (audioCache.has(cacheKey)) { console.log('🎯 Usando audio cacheado'); return audioCache.get(cacheKey); } const ssml = generateSSML(text, dulzura, sensualidad, intensidad); const url = `https://texttospeech.googleapis.com/v1/text:synthesize?key=${apiKey}`; const body = { input: { ssml }, voice: { languageCode: 'es-ES', name: 'es-ES-Neural2-F', ssmlGender: 'FEMALE' }, audioConfig: { audioEncoding: 'LINEAR16', sampleRateHertz: 24000 } }; const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), TTS_TIMEOUT); try { const res = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body), signal: controller.signal }); clearTimeout(timeoutId); if (!res.ok) throw new Error(`TTS error: ${res.status}`); const data = await res.json(); audioCache.set(cacheKey, data.audioContent); return data.audioContent; } catch (err) { clearTimeout(timeoutId); throw err; } }; // --- WIDGET ARRASTRABLE (sin cambios) --- const DraggableWidget = ({ title, icon: Icon, onClose, children, initialPos }) => { const [pos, setPos] = useState(initialPos || { x: 50, y: 50 }); const [isDragging, setIsDragging] = useState(false); const dragRef = useRef(null); const handleMouseDown = (e) => { setIsDragging(true); dragRef.current = { startX: e.clientX, startY: e.clientY, initialX: pos.x, initialY: pos.y }; }; const handleMouseMove = (e) => { if (!isDragging) return; setPos({ x: Math.max(0, dragRef.current.initialX + (e.clientX - dragRef.current.startX)), y: Math.max(0, dragRef.current.initialY + (e.clientY - dragRef.current.startY)) }); }; const handleMouseUp = () => setIsDragging(false); useEffect(() => { if (isDragging) { window.addEventListener('mousemove', handleMouseMove); window.addEventListener('mouseup', handleMouseUp); } return () => { window.removeEventListener('mousemove', handleMouseMove); window.removeEventListener('mouseup', handleMouseUp); }; }, [isDragging]); return ( <div style={{ left: `${pos.x}px`, top: `${pos.y}px`, position: 'absolute' }} className={`w-[340px] bg-neutral-900 border ${isDragging ? 'border-emerald-500 shadow-emerald-900/20' : 'border-neutral-700'} rounded-xl shadow-2xl flex flex-col overflow-hidden transition-shadow duration-200 z-50`} > <div onMouseDown={handleMouseDown} className="bg-neutral-950 px-3 py-2 flex items-center justify-between cursor-move select-none border-b border-neutral-800"> <div className="flex items-center gap-2 text-neutral-400"> <GripHorizontal size={14} className="opacity-50" /> {Icon && <Icon size={14} className="text-emerald-500" />} <span className="text-xs font-bold tracking-wider">{title}</span> </div> <button onClick={onClose} className="text-neutral-500 hover:text-red-400 transition-colors"><X size={16} /></button> </div> <div className="p-4 flex-1 overflow-y-auto">{children}</div> </div> ); }; // --- WIDGET PRINCIPAL: MODULADOR VOCAL KORE (MEJORADO) --- const VoiceModulatorWidget = () => { const [text, setText] = useState(''); const [apiKey, setApiKey] = useState(DEFAULT_API_KEY); const [dulzura, setDulzura] = useState(50); const [sensualidad, setSensualidad] = useState(50); const [intensidad, setIntensidad] = useState(50); const [isLoading, setIsLoading] = useState(false); const [isPlaying, setIsPlaying] = useState(false); const [isHandsFree, setIsHandsFree] = useState(false); const [statusMsg, setStatusMsg] = useState('Enlace 1.5 Flash + GCP TTS Establecido.'); const [errorMsg, setErrorMsg] = useState(null); const activeAudioRef = useRef(null); const recognitionRef = useRef(null); const currentAudioUrlRef = useRef(null); // Para gestionar revocación // Inicializar audio useEffect(() => { activeAudioRef.current = new Audio(); activeAudioRef.current.preload = "auto"; return () => { if (activeAudioRef.current) { activeAudioRef.current.pause(); if (currentAudioUrlRef.current) { URL.revokeObjectURL(currentAudioUrlRef.current); } } if (recognitionRef.current) recognitionRef.current.stop(); }; }, []); // Configurar reconocimiento de voz useEffect(() => { if (!('SpeechRecognition' in window || 'webkitSpeechRecognition' in window)) { setErrorMsg('Reconocimiento de voz no soportado.'); return; } const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; recognitionRef.current = new SpeechRecognition(); recognitionRef.current.lang = 'es-ES'; recognitionRef.current.interimResults = false; recognitionRef.current.continuous = false; recognitionRef.current.onresult = async (event) => { const transcript = event.results[0][0].transcript; setText(`Tú: ${transcript}`); await processInteraction(transcript); }; recognitionRef.current.onerror = (event) => { if (event.error !== 'no-speech') { setErrorMsg(`Micrófono: ${event.error}`); setIsHandsFree(false); } }; recognitionRef.current.onend = () => { if (isHandsFree && !isPlaying && !isLoading) { try { recognitionRef.current.start(); setStatusMsg('Escuchando...'); } catch (e) {} } }; // Limpiar al desmontar return () => { if (recognitionRef.current) recognitionRef.current.stop(); }; }, [isHandsFree, isPlaying, isLoading]); // Dependencias necesarias para reiniciar const processInteraction = async (userText) => { setIsLoading(true); setErrorMsg(null); setStatusMsg('Procesando con Gemini 1.5 Flash...'); const key = apiKey.trim(); try { // 1. Llamada a Gemini con timeout const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 5000); const payloadText = { contents: [{ parts: [{ text: userText }] }], systemInstruction: { parts: [{ text: `Eres Kore, 28 años, Huesca. Labia, directa, sensual, disruptiva. Responde en español, máx 20 palabras. REGLA INNEGOCIABLE: CERO PREGUNTAS.` }] } }; const resText = await fetch( `https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=${key}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payloadText), signal: controller.signal } ); clearTimeout(timeoutId); if (!resText.ok) throw new Error(`Gemini error: ${resText.status}`); const dataText = await resText.json(); const aiText = dataText.candidates?.[0]?.content?.parts?.[0]?.text || "Mmm... vale."; setText(`Kore: ${aiText}`); // 2. Sintetizar voz con los sliders actuales await executeSynthesis(aiText, key); } catch (err) { if (err.name === 'AbortError') { setErrorMsg('Gemini timeout (5s)'); } else { setErrorMsg(err.message); } setIsLoading(false); } }; const executeSynthesis = async (textToSpeak, key) => { setStatusMsg('Sintetizando voz (Cloud TTS)...'); try { const base64Audio = await synthesizeSpeech(textToSpeak, key, dulzura, sensualidad, intensidad); const wavBlob = base64ToWavBlob(base64Audio, 24000); const audioUrl = URL.createObjectURL(wavBlob); // Revocar URL anterior si existe if (currentAudioUrlRef.current) { URL.revokeObjectURL(currentAudioUrlRef.current); } currentAudioUrlRef.current = audioUrl; activeAudioRef.current.src = audioUrl; activeAudioRef.current.onended = () => { setIsPlaying(false); setStatusMsg('Transmisión completada.'); if (isHandsFree) { try { recognitionRef.current.start(); setStatusMsg('Escuchando...'); } catch (e) {} } }; setStatusMsg('Transmitiendo...'); setIsPlaying(true); setIsLoading(false); await activeAudioRef.current.play().catch(err => { throw new Error(`Autoplay bloqueado: ${err.message}`); }); } catch (error) { throw new Error(`Fallo TTS: ${error.message}`); } }; const handleManualPlay = async () => { if (!text.trim()) return setErrorMsg('Escribe algo primero.'); // Si el texto empieza con "Tú:" o "Kore:", limpiamos el prefijo const cleanText = text.replace(/^(Tú:|Kore:)\s*/, ''); if (!cleanText.trim()) return setErrorMsg('Texto vacío después de limpiar.'); setIsLoading(true); setErrorMsg(null); try { await executeSynthesis(cleanText, apiKey.trim()); } catch (err) { setErrorMsg(err.message); setIsLoading(false); } }; const toggleHandsFree = () => { if (!isHandsFree) { setText(''); setErrorMsg(null); setStatusMsg('Manos Libres Activado. Habla...'); // Desbloquear audio en algunos navegadores if (activeAudioRef.current) { activeAudioRef.current.src = SILENT_WAV; activeAudioRef.current.play().catch(() => {}); } try { recognitionRef.current.start(); } catch (e) {} } else { if (activeAudioRef.current) { activeAudioRef.current.pause(); activeAudioRef.current.currentTime = 0; } setIsPlaying(false); setStatusMsg('Sistemas en pausa.'); if (recognitionRef.current) recognitionRef.current.stop(); } setIsHandsFree(!isHandsFree); }; const stopAudio = () => { if (activeAudioRef.current) { activeAudioRef.current.pause(); activeAudioRef.current.currentTime = 0; } setIsPlaying(false); setStatusMsg('Señal interrumpida.'); }; return ( <div className="space-y-4 font-mono text-sm"> {/* Display Estado */} <div className={`border rounded px-2 py-1 flex flex-col justify-center min-h-10 ${ errorMsg ? 'bg-red-950/50 border-red-900' : isHandsFree ? 'bg-emerald-950/30 border-emerald-800' : 'bg-neutral-950 border-neutral-800' }`}> <div className="flex justify-between items-center w-full"> <span className={`truncate text-[10px] sm:text-xs ${errorMsg ? 'text-red-500' : 'text-emerald-500'}`}> > {errorMsg || statusMsg} </span> {isPlaying && !errorMsg && <Activity size={14} className="text-emerald-500 animate-pulse ml-2 flex-shrink-0" />} {isLoading && !errorMsg && <Zap size={14} className="text-amber-500 animate-pulse ml-2 flex-shrink-0" />} {isHandsFree && !isPlaying && !isLoading && !errorMsg && <Mic size={14} className="text-red-500 animate-pulse ml-2 flex-shrink-0" />} </div> </div> {/* Input Texto / Log */} <textarea value={text} onChange={(e) => setText(e.target.value)} className="w-full bg-neutral-950/50 border border-neutral-700 rounded p-2 text-xs text-neutral-300 focus:outline-none focus:border-emerald-500 resize-none h-20" placeholder={isHandsFree ? "Escuchando transcripción en tiempo real..." : "Escribe texto directo o activa Manos Libres..."} readOnly={isHandsFree || isLoading} /> {/* Sliders continuos (controlan SSML en tiempo real) */} <div className="space-y-3 bg-neutral-950/30 p-3 rounded border border-neutral-800"> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Agresiva</span><span className="text-emerald-400">Dulzura [{dulzura}]</span><span>Dulce</span> </div> <input type="range" min="0" max="100" value={dulzura} onChange={(e)=>setDulzura(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-emerald-500 cursor-pointer" /> </div> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Robótica</span><span className="text-pink-400">Aura [{sensualidad}]</span><span>Sensual</span> </div> <input type="range" min="0" max="100" value={sensualidad} onChange={(e)=>setSensualidad(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-pink-500 cursor-pointer" /> </div> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Atenuada</span><span className="text-amber-400">Intensidad [{intensidad}]</span><span>Fuerte</span> </div> <input type="range" min="0" max="100" value={intensidad} onChange={(e)=>setIntensidad(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-amber-500 cursor-pointer" /> </div> </div> {/* Botones de Control */} <div className="flex flex-col sm:flex-row gap-2"> <button onClick={toggleHandsFree} disabled={isLoading} className={`flex-1 py-2 rounded text-xs font-bold flex items-center justify-center gap-2 transition-colors border ${ isHandsFree ? 'bg-red-900/20 text-red-400 border-red-900/50 hover:bg-red-900/40 shadow-[0_0_10px_rgba(239,68,68,0.2)]' : 'bg-indigo-900/20 text-indigo-400 border-indigo-900/50 hover:bg-indigo-900/40' }`} > {isHandsFree ? <MicOff size={14} /> : <Mic size={14} />} {isHandsFree ? 'Detener Escucha' : 'Manos Libres'} </button> <div className="flex gap-2 flex-1"> <button onClick={handleManualPlay} disabled={isLoading || isPlaying || isHandsFree} className="flex-1 bg-emerald-600/20 hover:bg-emerald-600/40 text-emerald-400 border border-emerald-600/50 disabled:opacity-30 py-2 rounded text-xs font-bold flex items-center justify-center gap-1 transition-colors" > {isLoading ? <Loader2 size={14} className="animate-spin" /> : <Play size={14} />} Sintetizar </button> <button onClick={stopAudio} disabled={!isPlaying && !isHandsFree} className="px-4 bg-neutral-800 hover:bg-neutral-700 text-neutral-400 border border-neutral-700 disabled:opacity-30 py-2 rounded text-xs font-bold flex items-center justify-center transition-colors" > <Square size={14} /> </button> </div> </div> {/* Botón para limpiar caché (opcional) */} <div className="text-right"> <button onClick={() => audioCache.clear()} className="text-[8px] text-neutral-600 hover:text-neutral-400 underline" > limpiar caché de audio </button> </div> </div> ); }; // --- ENTORNO ESCRITORIO (sin cambios) --- export default function App() { const [widgets, setWidgets] = useState({ voice: { isOpen: true, pos: { x: window.innerWidth > 768 ? window.innerWidth / 2 - 170 : 20, y: 40 } } }); const toggleWidget = (id) => { setWidgets(prev => ({ ...prev, [id]: { ...prev[id], isOpen: !prev[id].isOpen } })); }; return ( <div className="w-full h-screen bg-neutral-950 bg-[radial-gradient(ellipse_80%_80%_at_50%_-20%,rgba(16,185,129,0.1),rgba(0,0,0,1))] overflow-hidden relative font-sans text-neutral-200"> <div className="absolute inset-0 flex items-center justify-center opacity-[0.02] pointer-events-none"><Settings2 size={500} /></div> {widgets.voice.isOpen && ( <DraggableWidget title="MODULADOR VOCAL KORE" icon={Zap} initialPos={widgets.voice.pos} onClose={() => toggleWidget('voice')}> <VoiceModulatorWidget /> </DraggableWidget> )} <div className="absolute bottom-6 left-1/2 transform -translate-x-1/2 bg-neutral-900/80 backdrop-blur-md border border-neutral-700/50 p-2 rounded-2xl shadow-2xl flex gap-2 z-[100]"> <div className="px-3 flex items-center border-r border-neutral-700/50 text-neutral-500"><LayoutGrid size={20} /></div> <button onClick={() => toggleWidget('voice')} className={`px-4 py-2 rounded-xl flex items-center gap-2 text-sm font-medium transition-all ${
Photography, side view of a kneeling gorgeous 34 year old Russian woman that's laughing,(beautiful face:1 .0)- ,and wearing a red mask, and using a bright orange Ostrich feather in an outstretched hand to clean dust off a small bookshelf in a luxurious castle room, (beautiful),(((perfect eyes and face))), perfect hands,((best quality, masterpiece)),((8K, RAW))
He optimizado tu código para lograr una modulación vocal continua y fluida basada en los sliders, con caché de audio, timeouts y mejor manejo del estado. Ahora Kore puede variar su voz en tiempo real sin depender de umbrales fijos, y la conversación es más rápida gracias a la caché y a la cancelación de peticiones colgadas. ```javascript import React, { useState, useRef, useEffect, useCallback } from 'react'; import { Play, Square, Mic, MicOff, Settings2, Activity, Loader2, X, GripHorizontal, LayoutGrid, Zap, AlertCircle } from 'lucide-react'; // --- CONSTANTES --- const SILENT_WAV = "data:audio/wav;base64,UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"; const TTS_TIMEOUT = 5000; // 5 segundos máximo para la síntesis const DEFAULT_API_KEY = 'AIzaSyBlkvy_Op-XlzSMSDDl9ip42dMFZX28MAA'; // ⚠️ Cámbiala por tu propia clave // --- UTILIDADES --- const base64ToWavBlob = (base64Data, sampleRate = 24000) => { const binaryString = window.atob(base64Data); const pcmData = new Uint8Array(binaryString.length); for (let i = 0; i < binaryString.length; i++) pcmData[i] = binaryString.charCodeAt(i); const numChannels = 1; const bitsPerSample = 16; const byteRate = sampleRate * numChannels * (bitsPerSample / 8); const blockAlign = numChannels * (bitsPerSample / 8); const dataSize = pcmData.length; const buffer = new ArrayBuffer(44 + dataSize); const view = new DataView(buffer); const writeString = (view, offset, string) => { for (let i = 0; i < string.length; i++) view.setUint8(offset + i, string.charCodeAt(i)); }; writeString(view, 0, 'RIFF'); view.setUint32(4, 36 + dataSize, true); writeString(view, 8, 'WAVE'); writeString(view, 12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, numChannels, true); view.setUint32(24, sampleRate, true); view.setUint32(28, byteRate, true); view.setUint16(32, blockAlign, true); view.setUint16(34, bitsPerSample, true); writeString(view, 36, 'data'); view.setUint32(40, dataSize, true); for (let i = 0; i < dataSize; i++) view.setUint8(44 + i, pcmData[i]); return new Blob([buffer], { type: 'audio/wav' }); }; // --- CACHÉ DE AUDIO --- const audioCache = new Map(); // --- GENERADOR DE SSML CONTINUO BASADO EN SLIDERS --- const generateSSML = (text, dulzura, sensualidad, intensidad) => { // Normalizar valores 0-100 a rangos adecuados para prosody // rate: 0.5 a 2.0 (1.0 es normal) const rate = 0.8 + (intensidad / 100) * 1.2; // 0.8 (lento) a 2.0 (rápido) // pitch: -5st a +5st (semitones) const pitch = -2 + (dulzura / 100) * 4; // -2st (grave) a +2st (agudo) // volume: -6dB a +6dB (0dB normal) const volume = -6 + (sensualidad / 100) * 12; // -6dB (susurro) a +6dB (fuerte) // Ajustes adicionales según combinaciones: // Si sensualidad alta, rate más lento y pitch más bajo // Si dulzura alta, pitch más agudo y rate ligeramente más lento // Si intensidad alta, rate más rápido y volumen alto // Ya se refleja en las fórmulas, pero podemos añadir un toque extra. const ssml = `<speak> <prosody rate="${rate.toFixed(2)}" pitch="${pitch.toFixed(0)}st" volume="${volume.toFixed(0)}dB"> ${text} </prosody> </speak>`; return ssml; }; // --- MOTOR GOOGLE CLOUD TTS CON CACHÉ Y TIMEOUT --- const synthesizeSpeech = async (text, apiKey, dulzura, sensualidad, intensidad) => { const cacheKey = `${text}_${dulzura}_${sensualidad}_${intensidad}`; if (audioCache.has(cacheKey)) { console.log('🎯 Usando audio cacheado'); return audioCache.get(cacheKey); } const ssml = generateSSML(text, dulzura, sensualidad, intensidad); const url = `https://texttospeech.googleapis.com/v1/text:synthesize?key=${apiKey}`; const body = { input: { ssml }, voice: { languageCode: 'es-ES', name: 'es-ES-Neural2-F', ssmlGender: 'FEMALE' }, audioConfig: { audioEncoding: 'LINEAR16', sampleRateHertz: 24000 } }; const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), TTS_TIMEOUT); try { const res = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body), signal: controller.signal }); clearTimeout(timeoutId); if (!res.ok) throw new Error(`TTS error: ${res.status}`); const data = await res.json(); audioCache.set(cacheKey, data.audioContent); return data.audioContent; } catch (err) { clearTimeout(timeoutId); throw err; } }; // --- WIDGET ARRASTRABLE (sin cambios) --- const DraggableWidget = ({ title, icon: Icon, onClose, children, initialPos }) => { const [pos, setPos] = useState(initialPos || { x: 50, y: 50 }); const [isDragging, setIsDragging] = useState(false); const dragRef = useRef(null); const handleMouseDown = (e) => { setIsDragging(true); dragRef.current = { startX: e.clientX, startY: e.clientY, initialX: pos.x, initialY: pos.y }; }; const handleMouseMove = (e) => { if (!isDragging) return; setPos({ x: Math.max(0, dragRef.current.initialX + (e.clientX - dragRef.current.startX)), y: Math.max(0, dragRef.current.initialY + (e.clientY - dragRef.current.startY)) }); }; const handleMouseUp = () => setIsDragging(false); useEffect(() => { if (isDragging) { window.addEventListener('mousemove', handleMouseMove); window.addEventListener('mouseup', handleMouseUp); } return () => { window.removeEventListener('mousemove', handleMouseMove); window.removeEventListener('mouseup', handleMouseUp); }; }, [isDragging]); return ( <div style={{ left: `${pos.x}px`, top: `${pos.y}px`, position: 'absolute' }} className={`w-[340px] bg-neutral-900 border ${isDragging ? 'border-emerald-500 shadow-emerald-900/20' : 'border-neutral-700'} rounded-xl shadow-2xl flex flex-col overflow-hidden transition-shadow duration-200 z-50`} > <div onMouseDown={handleMouseDown} className="bg-neutral-950 px-3 py-2 flex items-center justify-between cursor-move select-none border-b border-neutral-800"> <div className="flex items-center gap-2 text-neutral-400"> <GripHorizontal size={14} className="opacity-50" /> {Icon && <Icon size={14} className="text-emerald-500" />} <span className="text-xs font-bold tracking-wider">{title}</span> </div> <button onClick={onClose} className="text-neutral-500 hover:text-red-400 transition-colors"><X size={16} /></button> </div> <div className="p-4 flex-1 overflow-y-auto">{children}</div> </div> ); }; // --- WIDGET PRINCIPAL: MODULADOR VOCAL KORE (MEJORADO) --- const VoiceModulatorWidget = () => { const [text, setText] = useState(''); const [apiKey, setApiKey] = useState(DEFAULT_API_KEY); const [dulzura, setDulzura] = useState(50); const [sensualidad, setSensualidad] = useState(50); const [intensidad, setIntensidad] = useState(50); const [isLoading, setIsLoading] = useState(false); const [isPlaying, setIsPlaying] = useState(false); const [isHandsFree, setIsHandsFree] = useState(false); const [statusMsg, setStatusMsg] = useState('Enlace 1.5 Flash + GCP TTS Establecido.'); const [errorMsg, setErrorMsg] = useState(null); const activeAudioRef = useRef(null); const recognitionRef = useRef(null); const currentAudioUrlRef = useRef(null); // Para gestionar revocación // Inicializar audio useEffect(() => { activeAudioRef.current = new Audio(); activeAudioRef.current.preload = "auto"; return () => { if (activeAudioRef.current) { activeAudioRef.current.pause(); if (currentAudioUrlRef.current) { URL.revokeObjectURL(currentAudioUrlRef.current); } } if (recognitionRef.current) recognitionRef.current.stop(); }; }, []); // Configurar reconocimiento de voz useEffect(() => { if (!('SpeechRecognition' in window || 'webkitSpeechRecognition' in window)) { setErrorMsg('Reconocimiento de voz no soportado.'); return; } const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; recognitionRef.current = new SpeechRecognition(); recognitionRef.current.lang = 'es-ES'; recognitionRef.current.interimResults = false; recognitionRef.current.continuous = false; recognitionRef.current.onresult = async (event) => { const transcript = event.results[0][0].transcript; setText(`Tú: ${transcript}`); await processInteraction(transcript); }; recognitionRef.current.onerror = (event) => { if (event.error !== 'no-speech') { setErrorMsg(`Micrófono: ${event.error}`); setIsHandsFree(false); } }; recognitionRef.current.onend = () => { if (isHandsFree && !isPlaying && !isLoading) { try { recognitionRef.current.start(); setStatusMsg('Escuchando...'); } catch (e) {} } }; // Limpiar al desmontar return () => { if (recognitionRef.current) recognitionRef.current.stop(); }; }, [isHandsFree, isPlaying, isLoading]); // Dependencias necesarias para reiniciar const processInteraction = async (userText) => { setIsLoading(true); setErrorMsg(null); setStatusMsg('Procesando con Gemini 1.5 Flash...'); const key = apiKey.trim(); try { // 1. Llamada a Gemini con timeout const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 5000); const payloadText = { contents: [{ parts: [{ text: userText }] }], systemInstruction: { parts: [{ text: `Eres Kore, 28 años, Huesca. Labia, directa, sensual, disruptiva. Responde en español, máx 20 palabras. REGLA INNEGOCIABLE: CERO PREGUNTAS.` }] } }; const resText = await fetch( `https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=${key}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payloadText), signal: controller.signal } ); clearTimeout(timeoutId); if (!resText.ok) throw new Error(`Gemini error: ${resText.status}`); const dataText = await resText.json(); const aiText = dataText.candidates?.[0]?.content?.parts?.[0]?.text || "Mmm... vale."; setText(`Kore: ${aiText}`); // 2. Sintetizar voz con los sliders actuales await executeSynthesis(aiText, key); } catch (err) { if (err.name === 'AbortError') { setErrorMsg('Gemini timeout (5s)'); } else { setErrorMsg(err.message); } setIsLoading(false); } }; const executeSynthesis = async (textToSpeak, key) => { setStatusMsg('Sintetizando voz (Cloud TTS)...'); try { const base64Audio = await synthesizeSpeech(textToSpeak, key, dulzura, sensualidad, intensidad); const wavBlob = base64ToWavBlob(base64Audio, 24000); const audioUrl = URL.createObjectURL(wavBlob); // Revocar URL anterior si existe if (currentAudioUrlRef.current) { URL.revokeObjectURL(currentAudioUrlRef.current); } currentAudioUrlRef.current = audioUrl; activeAudioRef.current.src = audioUrl; activeAudioRef.current.onended = () => { setIsPlaying(false); setStatusMsg('Transmisión completada.'); if (isHandsFree) { try { recognitionRef.current.start(); setStatusMsg('Escuchando...'); } catch (e) {} } }; setStatusMsg('Transmitiendo...'); setIsPlaying(true); setIsLoading(false); await activeAudioRef.current.play().catch(err => { throw new Error(`Autoplay bloqueado: ${err.message}`); }); } catch (error) { throw new Error(`Fallo TTS: ${error.message}`); } }; const handleManualPlay = async () => { if (!text.trim()) return setErrorMsg('Escribe algo primero.'); // Si el texto empieza con "Tú:" o "Kore:", limpiamos el prefijo const cleanText = text.replace(/^(Tú:|Kore:)\s*/, ''); if (!cleanText.trim()) return setErrorMsg('Texto vacío después de limpiar.'); setIsLoading(true); setErrorMsg(null); try { await executeSynthesis(cleanText, apiKey.trim()); } catch (err) { setErrorMsg(err.message); setIsLoading(false); } }; const toggleHandsFree = () => { if (!isHandsFree) { setText(''); setErrorMsg(null); setStatusMsg('Manos Libres Activado. Habla...'); // Desbloquear audio en algunos navegadores if (activeAudioRef.current) { activeAudioRef.current.src = SILENT_WAV; activeAudioRef.current.play().catch(() => {}); } try { recognitionRef.current.start(); } catch (e) {} } else { if (activeAudioRef.current) { activeAudioRef.current.pause(); activeAudioRef.current.currentTime = 0; } setIsPlaying(false); setStatusMsg('Sistemas en pausa.'); if (recognitionRef.current) recognitionRef.current.stop(); } setIsHandsFree(!isHandsFree); }; const stopAudio = () => { if (activeAudioRef.current) { activeAudioRef.current.pause(); activeAudioRef.current.currentTime = 0; } setIsPlaying(false); setStatusMsg('Señal interrumpida.'); }; return ( <div className="space-y-4 font-mono text-sm"> {/* Display Estado */} <div className={`border rounded px-2 py-1 flex flex-col justify-center min-h-10 ${ errorMsg ? 'bg-red-950/50 border-red-900' : isHandsFree ? 'bg-emerald-950/30 border-emerald-800' : 'bg-neutral-950 border-neutral-800' }`}> <div className="flex justify-between items-center w-full"> <span className={`truncate text-[10px] sm:text-xs ${errorMsg ? 'text-red-500' : 'text-emerald-500'}`}> > {errorMsg || statusMsg} </span> {isPlaying && !errorMsg && <Activity size={14} className="text-emerald-500 animate-pulse ml-2 flex-shrink-0" />} {isLoading && !errorMsg && <Zap size={14} className="text-amber-500 animate-pulse ml-2 flex-shrink-0" />} {isHandsFree && !isPlaying && !isLoading && !errorMsg && <Mic size={14} className="text-red-500 animate-pulse ml-2 flex-shrink-0" />} </div> </div> {/* Input Texto / Log */} <textarea value={text} onChange={(e) => setText(e.target.value)} className="w-full bg-neutral-950/50 border border-neutral-700 rounded p-2 text-xs text-neutral-300 focus:outline-none focus:border-emerald-500 resize-none h-20" placeholder={isHandsFree ? "Escuchando transcripción en tiempo real..." : "Escribe texto directo o activa Manos Libres..."} readOnly={isHandsFree || isLoading} /> {/* Sliders continuos (controlan SSML en tiempo real) */} <div className="space-y-3 bg-neutral-950/30 p-3 rounded border border-neutral-800"> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Agresiva</span><span className="text-emerald-400">Dulzura [{dulzura}]</span><span>Dulce</span> </div> <input type="range" min="0" max="100" value={dulzura} onChange={(e)=>setDulzura(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-emerald-500 cursor-pointer" /> </div> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Robótica</span><span className="text-pink-400">Aura [{sensualidad}]</span><span>Sensual</span> </div> <input type="range" min="0" max="100" value={sensualidad} onChange={(e)=>setSensualidad(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-pink-500 cursor-pointer" /> </div> <div className="space-y-1"> <div className="flex justify-between text-[9px] sm:text-[10px] text-neutral-500 uppercase font-bold"> <span>Atenuada</span><span className="text-amber-400">Intensidad [{intensidad}]</span><span>Fuerte</span> </div> <input type="range" min="0" max="100" value={intensidad} onChange={(e)=>setIntensidad(Number(e.target.value))} className="w-full h-1 bg-neutral-800 rounded appearance-none accent-amber-500 cursor-pointer" /> </div> </div> {/* Botones de Control */} <div className="flex flex-col sm:flex-row gap-2"> <button onClick={toggleHandsFree} disabled={isLoading} className={`flex-1 py-2 rounded text-xs font-bold flex items-center justify-center gap-2 transition-colors border ${ isHandsFree ? 'bg-red-900/20 text-red-400 border-red-900/50 hover:bg-red-900/40 shadow-[0_0_10px_rgba(239,68,68,0.2)]' : 'bg-indigo-900/20 text-indigo-400 border-indigo-900/50 hover:bg-indigo-900/40' }`} > {isHandsFree ? <MicOff size={14} /> : <Mic size={14} />} {isHandsFree ? 'Detener Escucha' : 'Manos Libres'} </button> <div className="flex gap-2 flex-1"> <button onClick={handleManualPlay} disabled={isLoading || isPlaying || isHandsFree} className="flex-1 bg-emerald-600/20 hover:bg-emerald-600/40 text-emerald-400 border border-emerald-600/50 disabled:opacity-30 py-2 rounded text-xs font-bold flex items-center justify-center gap-1 transition-colors" > {isLoading ? <Loader2 size={14} className="animate-spin" /> : <Play size={14} />} Sintetizar </button> <button onClick={stopAudio} disabled={!isPlaying && !isHandsFree} className="px-4 bg-neutral-800 hover:bg-neutral-700 text-neutral-400 border border-neutral-700 disabled:opacity-30 py-2 rounded text-xs font-bold flex items-center justify-center transition-colors" > <Square size={14} /> </button> </div> </div> {/* Botón para limpiar caché (opcional) */} <div className="text-right"> <button onClick={() => audioCache.clear()} className="text-[8px] text-neutral-600 hover:text-neutral-400 underline" > limpiar caché de audio </button> </div> </div> ); }; // --- ENTORNO ESCRITORIO (sin cambios) --- export default function App() { const [widgets, setWidgets] = useState({ voice: { isOpen: true, pos: { x: window.innerWidth > 768 ? window.innerWidth / 2 - 170 : 20, y: 40 } } }); const toggleWidget = (id) => { setWidgets(prev => ({ ...prev, [id]: { ...prev[id], isOpen: !prev[id].isOpen } })); }; return ( <div className="w-full h-screen bg-neutral-950 bg-[radial-gradient(ellipse_80%_80%_at_50%_-20%,rgba(16,185,129,0.1),rgba(0,0,0,1))] overflow-hidden relative font-sans text-neutral-200"> <div className="absolute inset-0 flex items-center justify-center opacity-[0.02] pointer-events-none"><Settings2 size={500} /></div> {widgets.voice.isOpen && ( <DraggableWidget title="MODULADOR VOCAL KORE" icon={Zap} initialPos={widgets.voice.pos} onClose={() => toggleWidget('voice')}> <VoiceModulatorWidget /> </DraggableWidget> )} <div className="absolute bottom-6 left-1/2 transform -translate-x-1/2 bg-neutral-900/80 backdrop-blur-md border border-neutral-700/50 p-2 rounded-2xl shadow-2xl flex gap-2 z-[100]"> <div className="px-3 flex items-center border-r border-neutral-700/50 text-neutral-500"><LayoutGrid size={20} /></div> <button onClick={() => toggleWidget('voice')} className={`px-4 py-2 rounded-xl flex items-center gap-2 text-sm font-medium transition-all ${