Untitled

using System.Net.WebSockets;
using System.Text.Json;

using Apex.BFF.Application.Common.Interfaces;
using Apex.BFF.Infrastructure.Services.AzureAiTranslation;

using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using Microsoft.CognitiveServices.Speech.Translation;
using Microsoft.Extensions.Options;

namespace Apex.BFF.Infrastructure.Services.AITranslationService;

public class TranslationService(IOptions<TranslationServiceSettings> settings) : ITranslationService
{
    private readonly TranslationServiceSettings _serviceSettings = settings.Value;

    public async Task HandleWebSocketAsync(WebSocket webSocket, List<string>? targetLanguages)
    {
        try
        {
            var v2EndpointInString = $"wss://{_serviceSettings.Region}.stt.speech.microsoft.com/speech/universal/v2";

            var v2EndpointUrl = new Uri(v2EndpointInString);
            var config = SpeechTranslationConfig.FromEndpoint(v2EndpointUrl, _serviceSettings.ApiKey);

            var autoDetectSourceLanguageConfig =
                AutoDetectSourceLanguageConfig.FromLanguages(["en-US", "es-ES"]);

            config.AddTargetLanguage("es");
            config.AddTargetLanguage("en");

            using var audioInput = AudioConfig.FromStreamInput(new WebSocketAudioInput(webSocket));

            using var recognizer = new TranslationRecognizer(config, autoDetectSourceLanguageConfig, audioInput);

            recognizer.Recognized += async (s, e) =>
            {
                var lidResult =
                    e.Result.Properties.GetProperty(PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult);

                if (e.Result.Reason == ResultReason.TranslatedSpeech)
                {
                    await HandleTranslationsAsync(lidResult.Substring(0, 2), e.Result.Translations, webSocket);
                }
            };


            recognizer.Canceled += (s, e) =>
            {
                Console.WriteLine($"Recognition canceled: {e.Reason}");
                if (e.Reason == CancellationReason.Error)
                {
                    Console.WriteLine($"Error details: {e.ErrorDetails}");
                }
            };

            recognizer.SessionStarted += (s, e) => Console.WriteLine("Session started");
            recognizer.SessionStopped += (s, e) => Console.WriteLine("Session stopped");

            recognizer.Recognized += (s, e) => Console.WriteLine(e.Result.Reason == ResultReason.TranslatedSpeech
                ? $"Recognized: {e.Result.Text}"
                : $"Recognized event reason: {e.Result.Reason}");

            await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);
            var buffer = new byte[1024 * 4];

            while (webSocket.State == WebSocketState.Open)
            {
                var result = await webSocket.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);

                if (result.MessageType == WebSocketMessageType.Close)
                {
                    await webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, string.Empty,
                        CancellationToken.None);
                    Console.WriteLine("WebSocket closed.");
                }
            }

            await recognizer.StopContinuousRecognitionAsync();
        }
        catch (Exception ex)
        {
            Console.WriteLine($"WebSocket receive error: {ex.Message}");
            throw;
        }
    }

    private async Task<byte[]> SynthesizeAudioAsync(string text)
    {
        var config = SpeechConfig.FromSubscription(_serviceSettings.ApiKey, _serviceSettings.Region);

        // Create a MemoryStream to capture the audio data
        var pullAudioStream = new PullAudioOutputStream();
        var audioConfig = AudioConfig.FromStreamOutput(pullAudioStream);

        using var synthesizer = new SpeechSynthesizer(config, audioConfig);
        using var result = await synthesizer.SpeakTextAsync(text);

        if (result.Reason == ResultReason.SynthesizingAudioCompleted)
        {
            return result.AudioData;
        }
        else
        {
            throw new Exception($"Speech synthesis failed: {result.Reason}");
        }
    }

    private async Task HandleTranslationsAsync(string? sourceLanguage, IReadOnlyDictionary<string, string> translations,
        WebSocket webSocket)
    {
        var tasks = translations.Select(async element =>
        {
            Console.WriteLine($"TRANSLATING into '{element.Key}': {element.Value}");
            if (element.Key != sourceLanguage)
            {
                var translatedAudio = await SynthesizeAudioAsync(element.Value);
                var message = new
                {
                    TranslateLanguage = element.Key,
                    TranslatedText = element.Value,
                    TranslatedAudio = translatedAudio
                };
                var messageBytes = JsonSerializer.SerializeToUtf8Bytes(message);
                await webSocket.SendAsync(new ArraySegment<byte>(messageBytes), WebSocketMessageType.Text, true,
                    CancellationToken.None);
            }
        });

        await Task.WhenAll(tasks);
    }
}

public class WebSocketAudioInput(WebSocket webSocket) : PullAudioInputStreamCallback
{
    private readonly byte[] _buffer = new byte[1024 * 4];

    public override int Read(byte[] dataBuffer, uint size)
    {
        try
        {
            var result = webSocket.ReceiveAsync(new ArraySegment<byte>(_buffer), CancellationToken.None).Result;
            if (result.MessageType == WebSocketMessageType.Close)
            {
                Console.WriteLine("WebSocket closed during read.");
                return 0;
            }

            var bytesToCopy = Math.Min(result.Count, (int)size);
            Array.Copy(_buffer, dataBuffer, bytesToCopy);
            
            Console.WriteLine($"Read {bytesToCopy} bytes from WebSocket.");
            return bytesToCopy;
        }
        catch (Exception ex)
        {
            Console.WriteLine($"WebSocket read error: {ex.Message}");
            return 0;
        }
    }
}
Editor is loading...