Example
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cuda.h"
#include <device_functions.h>
#include <cuda_runtime_api.h>
#include<stdio.h>
#include <cmath>
#include<stdlib.h>
#include<iostream>
#include <iomanip>
using namespace std;
typedef unsigned int uint;
const uint N = 1e6;
__device__ uint Val2[N];
__global__ void set0()
{
uint index = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
if (index < N)
{
Val2[index] = 0;
}
}
int main()
{
int numThreads = 512;
uint numBlocks = (uint)ceil(N / (double)numThreads);
set0 << < numBlocks, numThreads >> >();
return 0;
}