Untitled

#ifndef ECHO_DETECTOR_HPP
#define ECHO_DETECTOR_HPP

// Comment in English as requested

#include <speex/speex_echo.h>
#include <speex/speex_preprocess.h>
#include <vector>
#include <stdexcept>
#include <cmath>

/**
 * EchoDetector class encapsulates SpeexDSP echo state and provides
 * a method to detect echo based on the estimated ERLE value.
 */
class EchoDetector {
public:
    /**
     * Constructor
     * @param frameSize    Number of samples per frame (e.g. 160 samples for 20ms @ 8kHz)
     * @param filterLength Length of echo buffer in samples (e.g. 1024 or more)
     * @param sampleRate   Sampling rate (e.g. 8000 for 8 kHz)
     */
    EchoDetector(int frameSize, int filterLength, int sampleRate)
        : m_frameSize(frameSize)
        , m_sampleRate(sampleRate)
        , m_filterLength(filterLength)
    {
        if (frameSize <= 0 || filterLength <= 0 || sampleRate <= 0) {
            throw std::runtime_error("Invalid EchoDetector constructor parameters.");
        }

        // Initialize Speex echo state
        m_echoState = speex_echo_state_init(m_frameSize, m_filterLength);

        if (!m_echoState) {
            throw std::runtime_error("Failed to initialize Speex echo state.");
        }

        // Set sampling rate
        speex_echo_ctl(m_echoState, SPEEX_ECHO_SET_SAMPLING_RATE, &m_sampleRate);

        // Initialize Speex preprocess state (for better echo analysis)
        m_preprocessState = speex_preprocess_state_init(m_frameSize, m_sampleRate);
        if (!m_preprocessState) {
            speex_echo_state_destroy(m_echoState);
            throw std::runtime_error("Failed to initialize Speex preprocess state.");
        }

        // Link the preprocess state with echo state
        speex_preprocess_ctl(m_preprocessState, SPEEX_PREPROCESS_SET_ECHO_STATE, m_echoState);
    }

    /**
     * Destructor
     */
    ~EchoDetector() {
        if (m_echoState) {
            speex_echo_state_destroy(m_echoState);
            m_echoState = nullptr;
        }
        if (m_preprocessState) {
            speex_preprocess_state_destroy(m_preprocessState);
            m_preprocessState = nullptr;
        }
    }

    /**
     * Process one frame of near-end (caller) and far-end (called) samples.
     * Returns a boolean indicating whether echo is likely present.
     *
     * @param nearEnd Input buffer from the "microphone" side (caller)
     * @param farEnd  Input buffer from the "speaker" side (called)
     *
     * @return bool   True if echo is detected, false otherwise
     */
    bool detectEcho(const short* nearEnd, const short* farEnd) {
        if (!nearEnd || !farEnd) {
            return false;
        }

        // Prepare buffers
        std::vector<short> out(m_frameSize, 0);

        // Speex echo cancellation: out = nearEnd - echo(farEnd)
        speex_echo_cancellation(
            m_echoState,
            nearEnd,   // near-end samples
            farEnd,    // far-end samples
            out.data() // output buffer
        );

        // Run preprocess on the output (this updates internal stats, can help with detection)
        int vad = speex_preprocess_run(m_preprocessState, out.data());
        (void)vad; // We don't strictly need VAD here, but it's part of standard flow

        // Get the current ERLE (Echo Return Loss Enhancement)
        float erle = 0.0f;
        speex_echo_ctl(m_echoState, SPEEX_ECHO_GET_ERLE, &erle);

        // Simple threshold approach: if ERLE is below a certain dB, we consider echo is present
        // Adjust threshold as needed. A typical "good" cancellation can exceed 20 dB ERLE or more.
        // If it's consistently under ~15 dB, there's likely a noticeable echo.
        const float ERLE_THRESHOLD = 15.0f;
        bool echoPresent = (erle < ERLE_THRESHOLD);

        return echoPresent;
    }

private:
    int m_frameSize;
    int m_sampleRate;
    int m_filterLength;

    SpeexEchoState*       m_echoState        = nullptr;
    SpeexPreprocessState* m_preprocessState  = nullptr;
};

#endif // ECHO_DETECTOR_HPP
Editor is loading...