import { Injectable } from '@angular/core';
import * as sdk from 'microsoft-cognitiveservices-speech-sdk';
import toWav from 'audiobuffer-to-wav';
import { EmsConfig } from '.././shared/emsConfig';
import { BehaviorSubject, Subject } from 'rxjs';


/*******************************
TODO:
- refactor async call inside generateCaptions() so it can be reused inside translations
    - Translations are not live and stop after first result
- refactor both speech and audio congif & initiations
- make translation langs dynamic
- pull video from URL instead of file input
- use moment.js for time conversion
******************************/

@Injectable()
export class SpeechRecognitionService {

    speechTranscript: string = '';
    convertedText: Subject<string> = new BehaviorSubject("");

    audioTranscript: string = '';
    isListening: boolean = false;
    isTranscribing: boolean = false;

    audioUrl: string = 'https://raw.githubusercontent.com/Azure-Samples/cognitive-services-speech-sdk/master/samples/csharp/sharedcontent/console/whatstheweatherlike.wav';
    // audioUrl: string = 'https://audio.lifespeak.com/EN/Jaffe_Adi_MentalHealthEndingShame_1.mp3'

    // TODO - refactor and remove
    recognizer: any; // SDK SpeechRecognizer

    speechConfig = sdk.SpeechConfig.fromSubscription(this._emsConfig.azureAISpeechServiceKey, this._emsConfig.azureAISpeechServiceRegion); // TODO - Refactor
    translationConfig = sdk.SpeechTranslationConfig.fromSubscription(this._emsConfig.azureAISpeechServiceKey, this._emsConfig.azureAISpeechServiceRegion); // TODO - Refactor
    // audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput(); // For Microphone

    vttContent: string;
    translatedContent: string;
    public isProcessing: boolean;
    mediaFile: File;

    // Translation
    encodedWavFile: sdk.AudioConfig;
    defaultLang: string = 'en-US';

    constructor(  public _emsConfig: EmsConfig ) { }

    // TODO - current Default = Microphone
    setSpeechRecognizer(audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput()): void {
        this.speechConfig.speechRecognitionLanguage = this.defaultLang;
        this.recognizer = new sdk.SpeechRecognizer(this.speechConfig, audioConfig);
        this.recognizer.recognized = (s: any, e: any) => {
            const result = e.result;
            if (result.reason === sdk.ResultReason.RecognizedSpeech) {
                this.speechTranscript = result.text;
                this.convertedText.next(this.speechTranscript);
            }
        };
    }

    // Live Speech to Text via Microphone

    startListening(): void {
        if (!this.recognizer) {
            this.setSpeechRecognizer();
        }

        if (!this.isListening) {
            this.recognizer.startContinuousRecognitionAsync(() => {
                console.log('Speech recognition started.');
                this.isListening = true;
            });
        }
    }

    stopListening(): void {
        if (this.isListening) {
            this.recognizer.stopContinuousRecognitionAsync(() => {
                console.log('Speech recognition stopped.');
                this.isListening = false;
            });
        }
    }

    // WAV Audio file to Text Transcription

    async transcribeAudio(url): Promise<void> {
        if (this.isTranscribing) return;
        this.isTranscribing = true;

        try {
            const response = await fetch(url);
            if (!response.ok) {
              throw new Error(`Failed to fetch audio file (${response.status} ${response.statusText})`);
            }
            console.log("response:", response)

            const arrayBuffer = await response.arrayBuffer();
            const audioStream = sdk.AudioInputStream.createPushStream();
            audioStream.write(arrayBuffer);
            const audioConfig = sdk.AudioConfig.fromStreamInput(audioStream);
            this.setSpeechRecognizer(audioConfig);

            // TODO - shouldn't need a settimeout here, but the recognizer is not stopping without it
            let recognitionTimeout: NodeJS.Timeout;
            // Set a timeout for recognition
            recognitionTimeout = setTimeout(() => {
                this.recognizer.stopContinuousRecognitionAsync();
                audioStream.close();
                console.log('Recognition timeout reached.');
            }, 5000); // 5 seconds

            this.recognizer.recognizeOnceAsync(result => {
                console.log("result:", result)
                clearTimeout(recognitionTimeout); // Clear the timeout

                switch (result.reason) {
                    case sdk.ResultReason.RecognizedSpeech:
                        console.log(`RECOGNIZED: Text=${result.text}`);
                        this.audioTranscript = result.text;
                        this.isTranscribing = false;
                        break;
                    case sdk.ResultReason.NoMatch:
                        console.log("NOMATCH: Speech could not be recognized.");
                        break;
                    case sdk.ResultReason.Canceled:
                        const cancellation = sdk.CancellationDetails.fromResult(result);
                        console.log(`CANCELED: Reason=${cancellation.reason}`);

                        if (cancellation.reason == sdk.CancellationReason.Error) {
                            console.log(`CANCELED: ErrorCode=${cancellation.ErrorCode}`);
                            console.log(`CANCELED: ErrorDetails=${cancellation.errorDetails}`);
                            console.log("CANCELED: Did you set the speech resource key and region values?");
                        }
                        break;
                }
                this.recognizer.close(); // Close recognizer
                audioStream.close(); // Close audio stream
                this.isTranscribing = false; // Update flag
            });

        } catch (error) {
            console.error('Error fetching or transcribing audio:', error);
            this.isTranscribing = false;
        }
    }

    // Video Closed Captioning

    onFileChange(event) {
        this.mediaFile = event.target.files[0];
    }


    async generateCaptions() {
        if (!this.mediaFile) {
          console.error('No media file selected.');
          return;
        }

        this.isProcessing = true;

        const audioContext = new window.AudioContext();
        const fileReader = new FileReader();

        fileReader.onload = async () => {
          const arrayBuffer = fileReader.result as ArrayBuffer;

          try {
            const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
            const wavBlob = await this.encodeWav(audioBuffer);
            const audioConfig = sdk.AudioConfig.fromWavFileInput(wavBlob);
            this.encodedWavFile = audioConfig; // TODO - used for translation func - needs to be refactored

            this.setSpeechRecognizer(audioConfig);

            this.recognizer.recognized = (s, e) => {
              if (e.result.reason === sdk.ResultReason.RecognizedSpeech) {
                this.vttContent += `${this.convertTimeToString(e.result.offset)} --> ${this.convertTimeToString(e.result.offset + e.result.duration)}\n${e.result.text}\n\n`;
              }
            };

            this.recognizer.canceled = (s, e) => {
              console.error(`CANCELED: Reason=${e.reason}`);
              this.isProcessing = false;
            };

            this.recognizer.sessionStopped = (s, e) => {
              console.log('\nSession stopped event.');
              this.recognizer.close();
              this.isProcessing = false;
            };

            await this.recognizer.startContinuousRecognitionAsync();

          } catch (error) {
            console.error('Error decoding audio data:', error);
            this.isProcessing = false;
          }
        };

        fileReader.readAsArrayBuffer(this.mediaFile);
    }

    async encodeWav(audioBuffer: AudioBuffer): Promise<File> {
        const wavBuffer = toWav(audioBuffer);
        const fileName = 'audio.wav';
        return new File([wavBuffer], fileName, { type: 'audio/wav' });
    }

    convertTimeToString(seconds: number): string {
        const hours = Math.floor(seconds / 3600);
        const minutes = Math.floor((seconds % 3600) / 60);
        const remainingSeconds = Math.floor(seconds % 60);
        return `${this.padZero(hours)}:${this.padZero(minutes)}:${this.padZero(remainingSeconds)}.000`;
        // return `${seconds}`;
    }

    padZero(num: number): string {
        return num < 10 ? `0${num}` : `${num}`;
    }

    downloadVttFile() {
        const blob = new Blob([this.vttContent], { type: 'text/vtt' });
        const url = window.URL.createObjectURL(blob);
        const a = document.createElement('a');
        a.href = url;
        a.download = 'captions.vtt';
        document.body.appendChild(a);
        a.click();
        window.URL.revokeObjectURL(url);
        document.body.removeChild(a);
    }

    // TRANSLATION
    sendForTranslation() {
        const speechTranslationConfig = sdk.SpeechTranslationConfig.fromSubscription(this._emsConfig.azureAISpeechServiceKey, this._emsConfig.azureAISpeechServiceRegion); // TODO - Refactor
        speechTranslationConfig.speechRecognitionLanguage = "en-US";

        var language = "fr";
        speechTranslationConfig.addTargetLanguage(language);

        //let audioConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync("YourAudioFile.wav"));
        let audioConfig = this.encodedWavFile;
        let translationRecognizer = new sdk.TranslationRecognizer(speechTranslationConfig, audioConfig);

        translationRecognizer.recognizeOnceAsync(result => {
            switch (result.reason) {
                case sdk.ResultReason.TranslatedSpeech:
                    console.log(`RECOGNIZED: Text=${result.text}`);
                    console.log("Translated into [" + language + "]: " + result.translations.get(language));
                    this.translatedContent += result.translations.get(language);
                    break;
                case sdk.ResultReason.NoMatch:
                    console.log("NOMATCH: Speech could not be recognized.");
                    break;
                case sdk.ResultReason.Canceled:
                    const cancellation = sdk.CancellationDetails.fromResult(result);
                    console.log(`CANCELED: Reason=${cancellation.reason}`);

                    if (cancellation.reason == sdk.CancellationReason.Error) {
                        console.log(`CANCELED: ErrorCode=${cancellation.ErrorCode}`);
                        console.log(`CANCELED: ErrorDetails=${cancellation.errorDetails}`);
                        console.log("CANCELED: Did you set the speech resource key and region values?");
                    }
                    break;
            }
            translationRecognizer.close();
        });
    }

}


// HTML COMPONENT SAMPLE

// <section style="padding:20px 0">
//     <h2 style="padding-bottom:20px">Azure AI Cognitive Services</h2>
//     <div style="padding: 12px 0;">
//         <div>
//             <h3>Speech Recognition</h3>
//             <div style="padding: 12px 0;">
//                 <button (click)="startListening()" style="float:none;margin-right:12px;" type="button" class="btn btn-xs ls-button-1" [disabled]="isListening">Start Listening</button>
//                 <button (click)="stopListening()" style="float:none;" type="button" class="btn btn--delete-bg btn-xs ls-button-1" [disabled]="!isListening">Stop Listening</button>
//                 <div style="padding: 12px 0;">
//                     <strong>Recognized Transcript: </strong> <span style="color:green">{{ speechTranscript }}</span>
//                 </div>
//             </div>
//         </div>
//         <!-- <div>
//             <h3>Audio Transcription</h3>
//             <div style="padding: 12px 0;">
//                 <input type="text" [(ngModel)]="audioUrl" placeholder="Enter audio file URL">
//                 <button (click)="transcribeAudio(audioUrl)" type="button" class="btn btn-xs ls-button-1">Transcribe</button>
//                 <div>
//                     <span *ngIf="isTranscribing">Transcribing...</span>
//                     <strong *ngIf="audioTranscript">Recognized Transcript: </strong> <span style="color:green">{{ audioTranscript }}</span>
//                 </div>
//             </div>
//         </div> -->
//         <div>
//             <h3>Video Closed Captioning</h3>
//             <div style="padding: 12px 0;">
//                 <div>
//                     <input type="file" (change)="onFileChange($event)">
//                     <button (click)="generateCaptions()" type="button" class="btn btn-xs ls-button-1" [disabled]="isProcessing">Generate Captions</button>
//                 </div>
//                 <div *ngIf="vttContent">
//                     <h3>Converted VTT Content:</h3>
//                     <textarea rows="10" cols="50" [(ngModel)]="vttContent"></textarea>
//                     <button (click)="downloadVttFile()" type="button" class="btn btn-xs ls-button-1">Download VTT</button>
//                 </div>
//                 <div *ngIf="isProcessing">Processing...</div>
//             </div>
//         </div>
//     </div>
// </section>
