Untitled
unknown
plain_text
2 years ago
7.6 kB
6
Indexable
#include <stdio.h>
#include <stdlib.h>
#include "cuda_runtime.h"
#include <omp.h>
#include <opencv2/opencv.hpp>
using namespace cv; // Use OpenCV's namespace
using namespace std; // Use the standard namespace
// Declare global variables for performance timing
long double init, _end;
long double total_time;
unsigned char *d_videoFrames, *d_finalVideoFrames;
__global__ void reduce(unsigned char *videoFrames, unsigned char *finalVideoFrames, int n_blocks, int n_threads, int width, int height){
// loop into the matrix rows, the rows will be managed by the threads
for (int i = 0; i < height; i += n_threads) // step of 3*n_threads, a thread manages a 3X3 pixel grid
{ // loop into the matrix columns, the columns will be managed by the blocks
for (int j = 0; j < width; j += n_blocks) //step of 3*n_blocks a block manages n_trheadsx3 grid
{
//initialize the color variables
if((i+threadIdx.x)<height && (j+blockIdx.x)<width){
double blue = 0;
double green = 0;
double red = 0;
//each pixel of the finalFrames will be a mean of a 3x3 grid of the original video frame
for (int ik = 0; ik < 3; ik++)
{
for (int jk = 0; jk < 3; jk++)
{
blue += videoFrames[(i+threadIdx.x)*27*width+(j+blockIdx.x)*9+9*jk+ik*9*width+0]; //sum over the blue value of the originals pixels
green += videoFrames[(i+threadIdx.x)*27*width+(j+blockIdx.x)*9+9*jk+ik*9*width+1]; //sum over the green value of the originals pixels
red += videoFrames[(i+threadIdx.x)*27*width+(j+blockIdx.x)*9+9*jk+ik*9*width+2]; //sum over the red value of the originals pixels
}
}
//mean of the colors
red /= 9;
green /= 9;
blue /= 9;
finalVideoFrames[((int) ((i))+threadIdx.x)*width*3+ ((int) ((j))+blockIdx.x)*3+0] = blue; //asign the new color value to the final frame
finalVideoFrames[((int) ((i))+threadIdx.x)*width*3+ ((int) ((j))+blockIdx.x)*3+1] = green; //asign the new color value to the final frame
finalVideoFrames[((int) ((i))+threadIdx.x)*width*3+ ((int) ((j))+blockIdx.x)*3+2] = red; //asign the new color value to the final frame
}
}
}
}
void init_cuda_kernel(cudaError_t err){
// Verify that dev allocations succeeded
if (err != cudaSuccess) {
fprintf(stderr, "Failed to allocate device vector (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
}
int main(int argc, char *argv[])
{
// Initialize input and output video file names and number of threads from command-line arguments
string input = argv[1];
string output = argv[2];
int n_blocks = atoi(argv[3]); // Convert the third argument to an integer for number of blocks
int n_threads = atoi(argv[4]); // Convert the third argument to an integer for number of threads
cout << "Iniciando programa con " << n_blocks << " bloques..." << endl;
cout << "Iniciando programa con " << n_threads << " hilos..." << endl;
total_time = 0;
// Open a file to write the results
FILE *fp = fopen("results.txt", "a");
fprintf(fp, "%d \t", n_blocks);
fprintf(fp, "%d \t", n_threads);
// Open input video file using OpenCV's VideoCapture
VideoCapture cap(input);
// Check if the video file is successfully opened
if (!cap.isOpened())
{
cout << "Error opening video stream or file" << endl;
return 0;
}
// Retrieve properties of the input video
int fps = cap.get(CAP_PROP_FPS);
int fourcc = cap.get(CAP_PROP_FOURCC);
int frameCount = cap.get(CAP_PROP_FRAME_COUNT);
// Initialize output video file using OpenCV's VideoWriter
VideoWriter video(output, fourcc, fps, Size(640, 360));
// Start performance timing
cudaError_t err;
int height = 360;
int width = 640;
//Memory allocation of the host input and output arrays
int *finalVideoFrames = (int *)malloc(height*width*3*sizeof(int));
int *videoFramesArray = (int *)malloc(3*height*3*width*3*sizeof(int));
// Verify that allocations succeeded
if (finalVideoFrames == NULL) {
fprintf(stderr, "Failed to allocate host vector finalVideoFrames!\n");
exit(EXIT_FAILURE);
}
if (videoFramesArray == NULL) {
fprintf(stderr, "Failed to allocate host vector videoFramesArray!\n");
exit(EXIT_FAILURE);
}
Mat newFrame = Mat::zeros(height, width, CV_8UC3);
init_cuda_kernel(cudaMalloc<unsigned char>(&d_finalVideoFrames, newFrame.step*newFrame.rows));
init_cuda_kernel(cudaMalloc<unsigned char>(&d_videoFrames, 3*height*3*width*3*sizeof(unsigned char)));
printf("Iniciando procesamiento del video...\n");
init = omp_get_wtime();
int n = 0;
// Main loop to read and process video frames
while (true)
{
// Declare a vector to hold frames and their corresponding frame numbers
Mat videoFrames;
cap >> videoFrames;
if (videoFrames.empty())
{
break;
}
// Copy the content of the frame from Host to Device
err = cudaMemcpy(d_videoFrames, videoFrames.ptr(), 3*height*3*width*3*sizeof(unsigned char), cudaMemcpyHostToDevice);
// Verify that copy succeeded
if (err != cudaSuccess) {
fprintf(stderr, "Failed to copy vector videoFrames from host to device (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
// run cuda function
reduce<<<n_blocks,n_threads>>>(d_videoFrames, d_finalVideoFrames, n_blocks, n_threads, width, height);
//Write the video
//Create a new frame to allocate the new pixel's values
//loop over the pixel's values frame
err = cudaMemcpy(newFrame.ptr(), d_finalVideoFrames, newFrame.step*newFrame.rows, cudaMemcpyDeviceToHost);
// Verify that copy succeeded
if (err != cudaSuccess) {
fprintf(stderr, "Failed to copy vector finalVideoFrames from device to host (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
// write the video
video.write(newFrame);
char c = (char)waitKey(1);
if (c == 27){
break;
}
}
_end = omp_get_wtime();
total_time += _end - init;
// End performance timing and calculate total time
fprintf(fp, "%Lf \n", total_time);
cout << "Tiempo total: " << total_time << "s" << endl;
cout << "Resultado guardado en results.txt" << endl;
//clean the cuda memory
err = cudaFree(d_videoFrames);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to free device vector d_videoFrames (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
//clean the cuda memory
err = cudaFree(d_finalVideoFrames);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to free device vector d_finalVideoFrames (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
//clean the memory
free(videoFramesArray);
free(finalVideoFrames);
// Close the results file
fclose(fp);
// Write the final processed frames to the output video
// Release video resources
cap.release();
video.release();
return 0;
}
Editor is loading...
Leave a Comment