pitch detection
#include <iostream> #include <vector> #include <cmath> #include <complex> #include <portaudio.h> #include <fftw3.h> // Frequency-to-Note Conversion std::string frequencyToNote(double frequency) { static const std::vector<std::string> notes = { "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B" }; double A4 = 440.0; int semitonesFromA4 = std::round(12 * std::log2(frequency / A4)); int noteIndex = (semitonesFromA4 % 12 + 12) % 12; // Wrap around 12 notes int octave = 4 + (semitonesFromA4 / 12); return notes[noteIndex] + std::to_string(octave); } int main() { PaError err; // Initialize PortAudio err = Pa_Initialize(); if (err != paNoError) { std::cerr << "PortAudio error: " << Pa_GetErrorText(err) << std::endl; return 1; } // Get default input device int defaultInputDeviceIndex = Pa_GetDefaultInputDevice(); if (defaultInputDeviceIndex == paNoDevice) { std::cerr << "Error: No default input device found." << std::endl; Pa_Terminate(); return 1; } const PaDeviceInfo* inputDeviceInfo = Pa_GetDeviceInfo(defaultInputDeviceIndex); if (!inputDeviceInfo) { std::cerr << "Error: Failed to retrieve input device info." << std::endl; Pa_Terminate(); return 1; } std::cout << "Using input device: " << inputDeviceInfo->name << std::endl; // Configure input stream parameters PaStreamParameters inputParameters; inputParameters.device = defaultInputDeviceIndex; inputParameters.channelCount = 1; // Mono audio inputParameters.sampleFormat = paFloat32; // Floating-point samples inputParameters.suggestedLatency = inputDeviceInfo->defaultLowInputLatency; inputParameters.hostApiSpecificStreamInfo = NULL; // Open stream PaStream* stream; err = Pa_OpenStream( &stream, &inputParameters, NULL, // No output 44100, // Sample rate 512, // Frames per buffer paClipOff, // Don't clip samples NULL, // No callback function NULL // No user data ); if (err != paNoError) { std::cerr << "Error opening stream: " << Pa_GetErrorText(err) << std::endl; Pa_Terminate(); return 1; } // Start the stream err = Pa_StartStream(stream); if (err != paNoError) { std::cerr << "Error starting stream: " << Pa_GetErrorText(err) << std::endl; Pa_CloseStream(stream); Pa_Terminate(); return 1; } std::cout << "Recording audio for 5 seconds..." << std::endl; // Record audio for 5 seconds const int sampleRate = 44100; const int numSamples = sampleRate * 5; // 5 seconds at 44.1 kHz std::vector<float> recordedSamples(numSamples, 0.0f); int framesToRead = 512; // Read in chunks of 512 frames int totalFramesRead = 0; while (totalFramesRead < numSamples) { int framesRemaining = numSamples - totalFramesRead; int framesToReadNow = std::min(framesToRead, framesRemaining); err = Pa_ReadStream(stream, recordedSamples.data() + totalFramesRead, framesToReadNow); if (err && err != paInputOverflowed) { // Allow input overflow warnings std::cerr << "Error reading stream: " << Pa_GetErrorText(err) << std::endl; break; } totalFramesRead += framesToReadNow; } // Stop the stream err = Pa_StopStream(stream); if (err != paNoError) { std::cerr << "Error stopping stream: " << Pa_GetErrorText(err) << std::endl; } // Close the stream err = Pa_CloseStream(stream); if (err != paNoError) { std::cerr << "Error closing stream: " << Pa_GetErrorText(err) << std::endl; } // Terminate PortAudio Pa_Terminate(); std::cout << "Recording complete. Total frames: " << totalFramesRead << std::endl; // Perform FFT Analysis const int bufferSize = 2048; // Chunk size for FFT fftw_complex* in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * bufferSize); fftw_complex* out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * bufferSize); fftw_plan plan = fftw_plan_dft_1d(bufferSize, in, out, FFTW_FORWARD, FFTW_ESTIMATE); for (int chunkStart = 0; chunkStart < numSamples; chunkStart += bufferSize) { // Fill FFT input buffer for (int i = 0; i < bufferSize; ++i) { in[i][0] = (chunkStart + i < numSamples) ? recordedSamples[chunkStart + i] : 0.0; // Real part in[i][1] = 0.0; // Imaginary part } // Execute FFT fftw_execute(plan); // Compute magnitude spectrum std::vector<double> magnitudes(bufferSize / 2); for (int i = 0; i < bufferSize / 2; ++i) { magnitudes[i] = sqrt(out[i][0] * out[i][0] + out[i][1] * out[i][1]); } // Find the frequency with the maximum magnitude double maxMagnitude = 0.0; int maxIndex = 0; for (int i = 0; i < bufferSize / 2; ++i) { if (magnitudes[i] > maxMagnitude) { maxMagnitude = magnitudes[i]; maxIndex = i; } } // Calculate the fundamental frequency double fundamentalFrequency = maxIndex * (double)sampleRate / bufferSize; // Convert frequency to note if (fundamentalFrequency > 20.0) { // Ignore frequencies below 20 Hz std::string note = frequencyToNote(fundamentalFrequency); std::cout << "Chunk starting at " << (chunkStart / (double)sampleRate) << "s: " << "Frequency: " << fundamentalFrequency << " Hz, Note: " << note << std::endl; } } // Clean up FFTW resources fftw_destroy_plan(plan); fftw_free(in); fftw_free(out); return 0; // compile with: g++ -o pitch_detection pitch_detection.cpp -lportaudio -lfftw3 -lm }
Leave a Comment