Untitled
#ifndef ECHO_DETECTOR_HPP #define ECHO_DETECTOR_HPP // Comment in English as requested #include <speex/speex_echo.h> #include <speex/speex_preprocess.h> #include <vector> #include <stdexcept> #include <cmath> /** * EchoDetector class encapsulates SpeexDSP echo state and provides * a method to detect echo based on the estimated ERLE value. */ class EchoDetector { public: /** * Constructor * @param frameSize Number of samples per frame (e.g. 160 samples for 20ms @ 8kHz) * @param filterLength Length of echo buffer in samples (e.g. 1024 or more) * @param sampleRate Sampling rate (e.g. 8000 for 8 kHz) */ EchoDetector(int frameSize, int filterLength, int sampleRate) : m_frameSize(frameSize) , m_sampleRate(sampleRate) , m_filterLength(filterLength) { if (frameSize <= 0 || filterLength <= 0 || sampleRate <= 0) { throw std::runtime_error("Invalid EchoDetector constructor parameters."); } // Initialize Speex echo state m_echoState = speex_echo_state_init(m_frameSize, m_filterLength); if (!m_echoState) { throw std::runtime_error("Failed to initialize Speex echo state."); } // Set sampling rate speex_echo_ctl(m_echoState, SPEEX_ECHO_SET_SAMPLING_RATE, &m_sampleRate); // Initialize Speex preprocess state (for better echo analysis) m_preprocessState = speex_preprocess_state_init(m_frameSize, m_sampleRate); if (!m_preprocessState) { speex_echo_state_destroy(m_echoState); throw std::runtime_error("Failed to initialize Speex preprocess state."); } // Link the preprocess state with echo state speex_preprocess_ctl(m_preprocessState, SPEEX_PREPROCESS_SET_ECHO_STATE, m_echoState); } /** * Destructor */ ~EchoDetector() { if (m_echoState) { speex_echo_state_destroy(m_echoState); m_echoState = nullptr; } if (m_preprocessState) { speex_preprocess_state_destroy(m_preprocessState); m_preprocessState = nullptr; } } /** * Process one frame of near-end (caller) and far-end (called) samples. * Returns a boolean indicating whether echo is likely present. * * @param nearEnd Input buffer from the "microphone" side (caller) * @param farEnd Input buffer from the "speaker" side (called) * * @return bool True if echo is detected, false otherwise */ bool detectEcho(const short* nearEnd, const short* farEnd) { if (!nearEnd || !farEnd) { return false; } // Prepare buffers std::vector<short> out(m_frameSize, 0); // Speex echo cancellation: out = nearEnd - echo(farEnd) speex_echo_cancellation( m_echoState, nearEnd, // near-end samples farEnd, // far-end samples out.data() // output buffer ); // Run preprocess on the output (this updates internal stats, can help with detection) int vad = speex_preprocess_run(m_preprocessState, out.data()); (void)vad; // We don't strictly need VAD here, but it's part of standard flow // Get the current ERLE (Echo Return Loss Enhancement) float erle = 0.0f; speex_echo_ctl(m_echoState, SPEEX_ECHO_GET_ERLE, &erle); // Simple threshold approach: if ERLE is below a certain dB, we consider echo is present // Adjust threshold as needed. A typical "good" cancellation can exceed 20 dB ERLE or more. // If it's consistently under ~15 dB, there's likely a noticeable echo. const float ERLE_THRESHOLD = 15.0f; bool echoPresent = (erle < ERLE_THRESHOLD); return echoPresent; } private: int m_frameSize; int m_sampleRate; int m_filterLength; SpeexEchoState* m_echoState = nullptr; SpeexPreprocessState* m_preprocessState = nullptr; }; #endif // ECHO_DETECTOR_HPP
Leave a Comment