The following is sample code that does an island exchange in the host.
/* island.cu simple example of island exchanges via host */ #include// For the CUDA runtime routines (prefixed with "cuda_") #include #include #define PES (1024*2) #define POP 50 typedef struct { int num[POP][PES]; int best[PES]; int worst[PES]; } gdata_t; __global__ void island(gdata_t *p, const int pes) { // pick best & worst int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < pes) { int best = 0; int worst = 0; for (int j = 1; j < POP; ++j) { if (p->num[best][i] < p->num[j][i]) { best = j; } if (p->num[worst][i] >= p->num[j][i]) { worst = j; } } p->best[i] = best; p->worst[i] = worst; } } /** * Host main routine */ int main(void) { // Error code to check return values for CUDA calls cudaError_t err = cudaSuccess; int gbest, agree, pass = 0; // Print the vector length to be used, and compute its size int pes = PES; int pop = POP; size_t size = sizeof(gdata_t); // Allocate the host data gdata_t *hp = (gdata_t *)malloc(size); // Verify that allocations succeeded if (hp == NULL) { fprintf(stderr, "Failed to allocate host data!\n"); exit(EXIT_FAILURE); } // Initialize the host data for (int i = 0; i < pop; ++i) { for (int j = 0; j < pes; ++j) { hp->num[i][j] = rand(); } } // Allocate the device data gdata_t *dp = NULL; err = cudaMalloc((void **)&dp, size); if (err != cudaSuccess) { fprintf(stderr, "Failed to allocate device data (error code %s)!\n", cudaGetErrorString(err)); exit(EXIT_FAILURE); } // loop until all nodes agree do { // Copy the host data to device printf("Copy input data from the host memory to the CUDA device\n"); err = cudaMemcpy(dp, hp, size, cudaMemcpyHostToDevice); if (err != cudaSuccess) { fprintf(stderr, "Failed to copy data from host to device (error code %s)!\n", cudaGetErrorString(err)); exit(EXIT_FAILURE); } // Launch the CUDA Kernel int threadsPerBlock = 256; int blocksPerGrid =(pes + threadsPerBlock - 1) / threadsPerBlock; printf("CUDA kernel launch with %d blocks of %d threads\n", blocksPerGrid, threadsPerBlock); island<< >>(dp, pes); err = cudaGetLastError(); if (err != cudaSuccess) { fprintf(stderr, "Failed to launch island kernel (error code %s)!\n", cudaGetErrorString(err)); exit(EXIT_FAILURE); } // Copy the device result back to the host memory printf("Copy output data from the CUDA device to the host memory\n"); err = cudaMemcpy(hp, dp, size, cudaMemcpyDeviceToHost); if (err != cudaSuccess) { fprintf(stderr, "Failed to copy data back from device to host (error code %s)!\n", cudaGetErrorString(err)); exit(EXIT_FAILURE); } // Communicate between islands // Pick global best & do all agree? gbest = hp->num[ hp->best[0] ][0]; agree = 1; for (int i = 0; i < pes; ++i) { int whofrom = rand() % pes; // Communicate between islands hp->num[ hp->worst[i] ][i] = hp->num[ hp->best[whofrom] ][whofrom]; // Pick best if (hp->num[ hp->best[i] ][i] != gbest) { agree = 0; if (hp->num[ hp->best[i] ][i] > gbest) gbest = hp->num[ hp->best[i] ][i]; } } fprintf(stderr, "Pass %d\n", ++pass); } while (!agree); printf("All agree!\n"); // Free host memory free(hp); printf("Done\n"); return 0; }