pitch detection

 avatar
unknown
c_cpp
a month ago
5.9 kB
6
Indexable
#include <iostream>
#include <vector>
#include <cmath>
#include <complex>
#include <portaudio.h>
#include <fftw3.h>

// Frequency-to-Note Conversion
std::string frequencyToNote(double frequency) {
    static const std::vector<std::string> notes = {
        "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"
    };
    double A4 = 440.0;
    int semitonesFromA4 = std::round(12 * std::log2(frequency / A4));
    int noteIndex = (semitonesFromA4 % 12 + 12) % 12; // Wrap around 12 notes
    int octave = 4 + (semitonesFromA4 / 12);
    return notes[noteIndex] + std::to_string(octave);
}

int main() {
    PaError err;

    // Initialize PortAudio
    err = Pa_Initialize();
    if (err != paNoError) {
        std::cerr << "PortAudio error: " << Pa_GetErrorText(err) << std::endl;
        return 1;
    }

    // Get default input device
    int defaultInputDeviceIndex = Pa_GetDefaultInputDevice();
    if (defaultInputDeviceIndex == paNoDevice) {
        std::cerr << "Error: No default input device found." << std::endl;
        Pa_Terminate();
        return 1;
    }

    const PaDeviceInfo* inputDeviceInfo = Pa_GetDeviceInfo(defaultInputDeviceIndex);
    if (!inputDeviceInfo) {
        std::cerr << "Error: Failed to retrieve input device info." << std::endl;
        Pa_Terminate();
        return 1;
    }

    std::cout << "Using input device: " << inputDeviceInfo->name << std::endl;

    // Configure input stream parameters
    PaStreamParameters inputParameters;
    inputParameters.device = defaultInputDeviceIndex;
    inputParameters.channelCount = 1; // Mono audio
    inputParameters.sampleFormat = paFloat32; // Floating-point samples
    inputParameters.suggestedLatency = inputDeviceInfo->defaultLowInputLatency;
    inputParameters.hostApiSpecificStreamInfo = NULL;

    // Open stream
    PaStream* stream;
    err = Pa_OpenStream(
        &stream,
        &inputParameters,
        NULL, // No output
        44100, // Sample rate
        512,   // Frames per buffer
        paClipOff, // Don't clip samples
        NULL, // No callback function
        NULL  // No user data
    );

    if (err != paNoError) {
        std::cerr << "Error opening stream: " << Pa_GetErrorText(err) << std::endl;
        Pa_Terminate();
        return 1;
    }

    // Start the stream
    err = Pa_StartStream(stream);
    if (err != paNoError) {
        std::cerr << "Error starting stream: " << Pa_GetErrorText(err) << std::endl;
        Pa_CloseStream(stream);
        Pa_Terminate();
        return 1;
    }

    std::cout << "Recording audio for 5 seconds..." << std::endl;

    // Record audio for 5 seconds
    const int sampleRate = 44100;
    const int numSamples = sampleRate * 5; // 5 seconds at 44.1 kHz
    std::vector<float> recordedSamples(numSamples, 0.0f);
    int framesToRead = 512; // Read in chunks of 512 frames
    int totalFramesRead = 0;

    while (totalFramesRead < numSamples) {
        int framesRemaining = numSamples - totalFramesRead;
        int framesToReadNow = std::min(framesToRead, framesRemaining);

        err = Pa_ReadStream(stream, recordedSamples.data() + totalFramesRead, framesToReadNow);
        if (err && err != paInputOverflowed) { // Allow input overflow warnings
            std::cerr << "Error reading stream: " << Pa_GetErrorText(err) << std::endl;
            break;
        }

        totalFramesRead += framesToReadNow;
    }

    // Stop the stream
    err = Pa_StopStream(stream);
    if (err != paNoError) {
        std::cerr << "Error stopping stream: " << Pa_GetErrorText(err) << std::endl;
    }

    // Close the stream
    err = Pa_CloseStream(stream);
    if (err != paNoError) {
        std::cerr << "Error closing stream: " << Pa_GetErrorText(err) << std::endl;
    }

    // Terminate PortAudio
    Pa_Terminate();

    std::cout << "Recording complete. Total frames: " << totalFramesRead << std::endl;

    // Perform FFT Analysis
    const int bufferSize = 2048; // Chunk size for FFT
    fftw_complex* in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * bufferSize);
    fftw_complex* out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * bufferSize);
    fftw_plan plan = fftw_plan_dft_1d(bufferSize, in, out, FFTW_FORWARD, FFTW_ESTIMATE);

    for (int chunkStart = 0; chunkStart < numSamples; chunkStart += bufferSize) {
        // Fill FFT input buffer
        for (int i = 0; i < bufferSize; ++i) {
            in[i][0] = (chunkStart + i < numSamples) ? recordedSamples[chunkStart + i] : 0.0; // Real part
            in[i][1] = 0.0; // Imaginary part
        }

        // Execute FFT
        fftw_execute(plan);

        // Compute magnitude spectrum
        std::vector<double> magnitudes(bufferSize / 2);
        for (int i = 0; i < bufferSize / 2; ++i) {
            magnitudes[i] = sqrt(out[i][0] * out[i][0] + out[i][1] * out[i][1]);
        }

        // Find the frequency with the maximum magnitude
        double maxMagnitude = 0.0;
        int maxIndex = 0;
        for (int i = 0; i < bufferSize / 2; ++i) {
            if (magnitudes[i] > maxMagnitude) {
                maxMagnitude = magnitudes[i];
                maxIndex = i;
            }
        }

        // Calculate the fundamental frequency
        double fundamentalFrequency = maxIndex * (double)sampleRate / bufferSize;

        // Convert frequency to note
        if (fundamentalFrequency > 20.0) { // Ignore frequencies below 20 Hz
            std::string note = frequencyToNote(fundamentalFrequency);
            std::cout << "Chunk starting at " << (chunkStart / (double)sampleRate) << "s: "
                      << "Frequency: " << fundamentalFrequency << " Hz, Note: " << note << std::endl;
        }
    }

    // Clean up FFTW resources
    fftw_destroy_plan(plan);
    fftw_free(in);
    fftw_free(out);

    return 0; 

    // compile with: g++ -o pitch_detection pitch_detection.cpp -lportaudio -lfftw3 -lm

}
Leave a Comment