43 lines
1.1 KiB
Plaintext
43 lines
1.1 KiB
Plaintext
|
#include <stdio.h>
|
||
|
|
||
|
__global__ void calculate(float *a, float *b, float *c, float *d) {
|
||
|
// Kernel declaration
|
||
|
printf("test test\n");
|
||
|
float temp = *a + *b; // Pointer variable operation
|
||
|
*d = temp / *c;
|
||
|
}
|
||
|
|
||
|
int main(void) {
|
||
|
// Host variables and input
|
||
|
float a = 3.0, b = 7.0, c = 2.0;
|
||
|
float d;
|
||
|
// Device variables
|
||
|
float *a_dev, *b_dev, *c_dev, *d_dev;
|
||
|
int float_size = sizeof(float);
|
||
|
|
||
|
// Allocate memory device variables
|
||
|
cudaMalloc((void **)&a_dev, float_size);
|
||
|
cudaMalloc((void **)&b_dev, float_size);
|
||
|
cudaMalloc((void **)&c_dev, float_size);
|
||
|
cudaMalloc((void **)&d_dev, float_size);
|
||
|
|
||
|
// Copy input from host to device
|
||
|
cudaMemcpy(a_dev, &a, float_size, cudaMemcpyHostToDevice);
|
||
|
cudaMemcpy(b_dev, &b, float_size, cudaMemcpyHostToDevice);
|
||
|
cudaMemcpy(c_dev, &c, float_size, cudaMemcpyHostToDevice);
|
||
|
|
||
|
// Launch kernel
|
||
|
calculate<<<1, 1>>>(a_dev, b_dev, c_dev, d_dev);
|
||
|
|
||
|
// Copy result, and print
|
||
|
cudaMemcpy(&d, d_dev, float_size, cudaMemcpyDeviceToHost);
|
||
|
printf("d is: %f\n", d);
|
||
|
|
||
|
// Clean up device memory
|
||
|
cudaFree(a_dev);
|
||
|
cudaFree(b_dev);
|
||
|
cudaFree(c_dev);
|
||
|
cudaFree(d_dev);
|
||
|
return 0;
|
||
|
}
|