/**
 * Azure Speech Service
 *
 * Handles real-time speech-to-text transcription using Microsoft Azure Cognitive Services
 * Features:
 * - Real-time streaming transcription
 * - Interim and final results
 * - Word-level timestamps
 * - Pause detection (>500ms)
 * - WPM (words per minute) calculation
 * - Multilingual support (de-DE, en-US)
 */

const sdk = require('microsoft-cognitiveservices-speech-sdk');
const config = require('@config');

class AzureSpeechService {
  constructor(language = 'de-DE') {
    console.log('🎤 [AzureSpeechService] Initializing with language:', language);

    this.language = language;
    this.recognizer = null;
    this.audioConfig = null;
    this.pushStream = null;

    // Transcription state
    this.interimText = '';
    this.finalText = '';
    this.segments = [];
    this.words = [];
    this.pauses = [];
    this.startTime = null;
    this.lastSegmentEndTime = 0;
    this.lastInterimTime = 0;

    // Callbacks
    this.onInterimResult = null;
    this.onFinalResult = null;
    this.onError = null;

    console.log('✅ [AzureSpeechService] Instance created');
  }

  /**
   * Setup the speech recognizer with callbacks
   */
  setup(callbacks = {}) {
    console.log('🔧 [AzureSpeechService] Setting up callbacks');

    this.onInterimResult = callbacks.onInterimResult || null;
    this.onFinalResult = callbacks.onFinalResult || null;
    this.onError = callbacks.onError || null;

    try {
      // Create push stream for audio input
      this.pushStream = sdk.AudioInputStream.createPushStream();
      this.audioConfig = sdk.AudioConfig.fromStreamInput(this.pushStream);

      // Create speech config with Azure credentials
      const speechConfig = sdk.SpeechConfig.fromSubscription(
        config.azureSpeech.key,
        config.azureSpeech.region
      );

      // Configure speech recognition
      speechConfig.speechRecognitionLanguage = this.language;
      speechConfig.outputFormat = sdk.OutputFormat.Detailed;
      speechConfig.requestWordLevelTimestamps();

      console.log('📡 [AzureSpeechService] Speech config created:', {
        language: this.language,
        region: config.azureSpeech.region,
        outputFormat: 'Detailed',
        wordTimestamps: true
      });

      // Create recognizer
      this.recognizer = new sdk.SpeechRecognizer(speechConfig, this.audioConfig);

      // Setup event handlers
      this._setupEventHandlers();

      console.log('✅ [AzureSpeechService] Setup completed successfully');
      return true;
    } catch (error) {
      console.error('❌ [AzureSpeechService] Setup failed:', error);
      if (this.onError) {
        this.onError(error);
      }
      return false;
    }
  }

  /**
   * Setup all event handlers for the recognizer
   */
  _setupEventHandlers() {
    console.log('🎯 [AzureSpeechService] Setting up event handlers');

    // Recognizing event - interim results (real-time)
    this.recognizer.recognizing = (s, e) => {
      if (e.result.reason === sdk.ResultReason.RecognizingSpeech) {
        const text = e.result.text;

        if (text) {
          console.log('📝 [AzureSpeechService] Interim result:', text);
          this.interimText = text;
          this.lastInterimTime = Date.now();

          if (this.onInterimResult) {
            this.onInterimResult(text);
          }
        }
      }
    };

    // Recognized event - final results with word timestamps
    this.recognizer.recognized = (s, e) => {
      if (e.result.reason === sdk.ResultReason.RecognizedSpeech) {
        const text = e.result.text;

        if (!text || text.trim() === '') {
          console.log('⏭️  [AzureSpeechService] Empty result, skipping');
          return;
        }

        console.log('✅ [AzureSpeechService] Final result:', text);

        // Update final text
        this.finalText += (this.finalText ? ' ' : '') + text;

        // Create segment with basic info
        const segment = {
          text: text,
          timestamp: this.startTime ? Date.now() - this.startTime : 0,
          offset: e.result.offset / 10000, // Convert ticks to milliseconds
          duration: e.result.duration / 10000,
          words: []
        };

        // Extract word-level timestamps from detailed results
        try {
          const jsonResult = e.result.properties.getProperty(
            sdk.PropertyId.SpeechServiceResponse_JsonResult
          );

          if (jsonResult) {
            const details = JSON.parse(jsonResult);
            console.log('📊 [AzureSpeechService] Detailed result:', {
              confidence: details.NBest?.[0]?.Confidence,
              wordCount: details.NBest?.[0]?.Words?.length || 0
            });

            // Extract words with timestamps
            if (details.NBest && details.NBest[0] && details.NBest[0].Words) {
              segment.words = details.NBest[0].Words.map(word => ({
                word: word.Word,
                offset: word.Offset / 10000, // Convert to milliseconds
                duration: word.Duration / 10000
              }));

              // Add to global words array
              this.words.push(...segment.words);

              console.log('📝 [AzureSpeechService] Extracted words:', segment.words.length);
            }
          }
        } catch (error) {
          console.error('⚠️  [AzureSpeechService] Error parsing detailed result:', error);
        }

        // Detect pause between segments
        if (this.lastSegmentEndTime > 0) {
          const gap = segment.offset - this.lastSegmentEndTime;
          if (gap > 500) { // 500ms pause threshold
            this.pauses.push({
              offset: this.lastSegmentEndTime,
              duration: gap
            });
            console.log(`⏸️  [AzureSpeechService] Pause detected: ${gap}ms`);
          }
        }

        // Update last segment end time
        this.lastSegmentEndTime = segment.offset + segment.duration;

        // Add segment
        this.segments.push(segment);

        // Clear interim text
        this.interimText = '';

        // Callback
        if (this.onFinalResult) {
          this.onFinalResult(text, this.finalText, segment);
        }
      } else if (e.result.reason === sdk.ResultReason.NoMatch) {
        console.log('🔇 [AzureSpeechService] No speech detected');
      }
    };

    // Canceled event - errors
    this.recognizer.canceled = (s, e) => {
      console.error('❌ [AzureSpeechService] Recognition canceled:', {
        reason: e.reason,
        errorCode: e.errorCode,
        errorDetails: e.errorDetails
      });

      if (e.reason === sdk.CancellationReason.Error) {
        const error = new Error(`Speech recognition error: ${e.errorDetails}`);
        if (this.onError) {
          this.onError(error);
        }
      }
    };

    // Session started
    this.recognizer.sessionStarted = (s, e) => {
      console.log('🎙️  [AzureSpeechService] Recognition session started');
    };

    // Session stopped
    this.recognizer.sessionStopped = (s, e) => {
      console.log('🛑 [AzureSpeechService] Recognition session stopped');
    };

    console.log('✅ [AzureSpeechService] Event handlers configured');
  }

  /**
   * Start continuous recognition
   */
  async start() {
    console.log('▶️  [AzureSpeechService] Starting recognition');

    if (!this.recognizer) {
      const error = new Error('Recognizer not initialized. Call setup() first.');
      console.error('❌ [AzureSpeechService]', error.message);
      if (this.onError) {
        this.onError(error);
      }
      return false;
    }

    try {
      // Reset state
      this.startTime = Date.now();
      this.interimText = '';
      this.finalText = '';
      this.segments = [];
      this.words = [];
      this.pauses = [];
      this.lastSegmentEndTime = 0;

      console.log('🔄 [AzureSpeechService] State reset, starting continuous recognition');

      // Start continuous recognition
      await this.recognizer.startContinuousRecognitionAsync(
        () => {
          console.log('✅ [AzureSpeechService] Recognition started successfully');
        },
        (error) => {
          console.error('❌ [AzureSpeechService] Failed to start recognition:', error);
          if (this.onError) {
            this.onError(error);
          }
        }
      );

      return true;
    } catch (error) {
      console.error('❌ [AzureSpeechService] Start error:', error);
      if (this.onError) {
        this.onError(error);
      }
      return false;
    }
  }

  /**
   * Send audio chunk to recognizer
   */
  sendAudioChunk(audioData) {
    if (!this.pushStream) {
      console.error('❌ [AzureSpeechService] Push stream not initialized');
      return;
    }

    try {
      // Convert audio data to buffer if needed
      const buffer = Buffer.isBuffer(audioData) ? audioData : Buffer.from(audioData);

      // Push audio data to stream
      this.pushStream.write(buffer);

      // Log periodically (not every chunk to avoid spam)
      if (Math.random() < 0.01) { // 1% of chunks
        console.log('🎵 [AzureSpeechService] Audio chunk sent:', buffer.length, 'bytes');
      }
    } catch (error) {
      console.error('❌ [AzureSpeechService] Error sending audio chunk:', error);
    }
  }

  /**
   * Stop recognition and get results
   */
  async stop() {
    console.log('⏹️  [AzureSpeechService] Stopping recognition');

    if (!this.recognizer) {
      console.warn('⚠️  [AzureSpeechService] No recognizer to stop');
      return this._getResults();
    }

    try {
      // Stop continuous recognition
      await this.recognizer.stopContinuousRecognitionAsync(
        () => {
          console.log('✅ [AzureSpeechService] Recognition stopped successfully');
        },
        (error) => {
          console.error('❌ [AzureSpeechService] Error stopping recognition:', error);
        }
      );

      // Close push stream
      if (this.pushStream) {
        this.pushStream.close();
        console.log('🔒 [AzureSpeechService] Audio stream closed');
      }

      // Get final results
      const results = this._getResults();

      console.log('📊 [AzureSpeechService] Final results:', {
        text: results.text.substring(0, 100) + '...',
        wordCount: results.wordCount,
        duration: results.duration.toFixed(2) + 's',
        wpm: results.wpm,
        pauses: results.pausesMade
      });

      // Cleanup
      this._cleanup();

      return results;
    } catch (error) {
      console.error('❌ [AzureSpeechService] Stop error:', error);
      this._cleanup();
      return this._getResults();
    }
  }

  /**
   * Get final results with metrics
   */
  _getResults() {
    const totalDuration = this.startTime ? (Date.now() - this.startTime) / 1000 : 0;
    const wordCount = this.words.length;
    const wpm = totalDuration > 0 ? (wordCount / totalDuration) * 60 : 0;
    const ppm = totalDuration > 0 ? (this.pauses.length / totalDuration) * 60 : 0;

    return {
      text: this.finalText,
      words: this.words,
      segments: this.segments,
      pauses: this.pauses,
      duration: totalDuration,
      wordCount,
      wpm: Math.round(wpm),
      ppm: Math.round(ppm * 10) / 10,
      language: this.language,
      pausesMade: this.pauses.length
    };
  }

  /**
   * Cleanup resources
   */
  _cleanup() {
    console.log('🧹 [AzureSpeechService] Cleaning up resources');

    try {
      if (this.recognizer) {
        try {
          this.recognizer.close();
        } catch (err) {
          console.warn('⚠️  [AzureSpeechService] Error closing recognizer:', err.message);
        }
        this.recognizer = null;
      }

      if (this.audioConfig) {
        try {
          this.audioConfig.close();
        } catch (err) {
          console.warn('⚠️  [AzureSpeechService] Error closing audioConfig:', err.message);
        }
        this.audioConfig = null;
      }

      if (this.pushStream) {
        try {
          this.pushStream.close();
        } catch (err) {
          console.warn('⚠️  [AzureSpeechService] Error closing pushStream:', err.message);
        }
        this.pushStream = null;
      }

      console.log('✅ [AzureSpeechService] Cleanup completed');
    } catch (error) {
      console.error('❌ [AzureSpeechService] Cleanup error:', error);
    }
  }
}

module.exports = AzureSpeechService;
