EBU6502_cloud_computing_notes/assets/code/test3.cu

43 lines
1.2 KiB
Plaintext
Raw Normal View History

2024-12-29 16:45:17 +08:00
#include <stdio.h>
#define N 6
__global__ void add(int *a, int *b, int *c) {
int bid = blockIdx.x;
printf("bid: %d\n", bid);
if (bid < N) { // The students can also use a "for loop here"
c[bid] = a[bid] + b[bid];
printf("c: %d\n", c[bid]);
}
}
int main(void) {
int a[N], b[N], c[N];
int *dev_a, *dev_b, *dev_c;
// allocate memory to device
cudaMalloc((void **)&dev_a, N * sizeof(int));
cudaMalloc((void **)&dev_b, N * sizeof(int));
cudaMalloc((void **)&dev_c, N * sizeof(int));
// Fill arrays "a" and "b" with values on the host
for (int i = 0; i < N; i++) {
a[i] = i;
b[i] = i * i;
}
// Copy arrays "a" and "b" to the device
cudaMemcpy(dev_a, a, N * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b, N * sizeof(int), cudaMemcpyHostToDevice);
// Launch the kernel
add<<<12, 1>>>(dev_a, dev_b, dev_c);
// Copy the array "c" from the device to the host
cudaMemcpy(c, dev_c, N * sizeof(int), cudaMemcpyDeviceToHost);
// Print the array "c"
for (int i = 0; i < N; i++) {
printf("%d\n", c[i]);
}
// Free memory allocated to the device
cudaFree(dev_a);
cudaFree(dev_b);
cudaFree(dev_c);
return 0;
} // End main