c++ - error: attribute "__global__" does not apply here

admin管理员组
文章数量:1025292

nvcc not recognizing a kernel on Windows 11 with cuda 12.5 (I can compile other *.cu file fine with powershell)

nvcc -arch=sm_89 .\simplest_kernel.cu

#include <cuda_runtime.h>
#include <iostream>
#include <vector>

__global__ void kernel(uint *A, uint *B, int row) {
  auto x = threadIdx.x / 4;
  auto y = threadIdx.x % 4;
  A[x * row + y] = x;
  B[x * row + y] = y;
}

int main(int argc, char **argv) {
  uint *Xs, *Ys;
  uint *Xs_d, *Ys_d;

  uint SIZE = 4;

  Xs = (uint *)malloc(SIZE * SIZE * sizeof(uint));
  Ys = (uint *)malloc(SIZE * SIZE * sizeof(uint));

  cudaMalloc((void **)&Xs_d, SIZE * SIZE * sizeof(uint));
  cudaMalloc((void **)&Ys_d, SIZE * SIZE * sizeof(uint));

  dim3 grid_size(1, 1, 1);
  dim3 block_size(4 * 4);

  kernel<<<grid_size, block_size>>>(Xs_d, Ys_d, 4);

  cudaMemcpy(Xs, Xs_d, SIZE * SIZE * sizeof(uint), cudaMemcpyDeviceToHost);
  cudaMemcpy(Ys, Ys_d, SIZE * SIZE * sizeof(uint), cudaMemcpyDeviceToHost);

  cudaDeviceSynchronize();

  for (int row = 0; row < SIZE; ++row) {
    for (int col = 0; col < SIZE; ++col) {
      std::cout << "[" << Xs[row * SIZE + col] << "|" << Ys[row * SIZE + col]
                << "] ";
    }
    std::cout << "\n";
  }

  cudaFree(Xs_d);
  cudaFree(Ys_d);
  free(Xs);
  free(Ys);
}

results in

PS D:\samples\api\SGEMM_CUDA> nvcc -arch=sm_89 .\simplest_kernel.cu
simplest_kernel.cu
D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: attribute "__global__" does not apply here
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
             ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: incomplete type "void" is not allowed
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                              ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: identifier "uint" is undefined
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                     ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: identifier "A" is undefined
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                           ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: identifier "B" is undefined
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                                    ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: type name is not allowed
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                                       ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: expected a ")"
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                                           ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: expected a ";"
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {

nvcc not recognizing a kernel on Windows 11 with cuda 12.5 (I can compile other *.cu file fine with powershell)

nvcc -arch=sm_89 .\simplest_kernel.cu

#include <cuda_runtime.h>
#include <iostream>
#include <vector>

__global__ void kernel(uint *A, uint *B, int row) {
  auto x = threadIdx.x / 4;
  auto y = threadIdx.x % 4;
  A[x * row + y] = x;
  B[x * row + y] = y;
}

int main(int argc, char **argv) {
  uint *Xs, *Ys;
  uint *Xs_d, *Ys_d;

  uint SIZE = 4;

  Xs = (uint *)malloc(SIZE * SIZE * sizeof(uint));
  Ys = (uint *)malloc(SIZE * SIZE * sizeof(uint));

  cudaMalloc((void **)&Xs_d, SIZE * SIZE * sizeof(uint));
  cudaMalloc((void **)&Ys_d, SIZE * SIZE * sizeof(uint));

  dim3 grid_size(1, 1, 1);
  dim3 block_size(4 * 4);

  kernel<<<grid_size, block_size>>>(Xs_d, Ys_d, 4);

  cudaMemcpy(Xs, Xs_d, SIZE * SIZE * sizeof(uint), cudaMemcpyDeviceToHost);
  cudaMemcpy(Ys, Ys_d, SIZE * SIZE * sizeof(uint), cudaMemcpyDeviceToHost);

  cudaDeviceSynchronize();

  for (int row = 0; row < SIZE; ++row) {
    for (int col = 0; col < SIZE; ++col) {
      std::cout << "[" << Xs[row * SIZE + col] << "|" << Ys[row * SIZE + col]
                << "] ";
    }
    std::cout << "\n";
  }

  cudaFree(Xs_d);
  cudaFree(Ys_d);
  free(Xs);
  free(Ys);
}

results in

PS D:\samples\api\SGEMM_CUDA> nvcc -arch=sm_89 .\simplest_kernel.cu
simplest_kernel.cu
D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: attribute "__global__" does not apply here
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
             ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: incomplete type "void" is not allowed
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                              ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: identifier "uint" is undefined
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                     ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: identifier "A" is undefined
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                           ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: identifier "B" is undefined
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                                    ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: type name is not allowed
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                                       ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: expected a ")"
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                                           ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: expected a ";"
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {

Share Improve this question asked Nov 18, 2024 at 13:40 He Huang 231 silver badge3 bronze badges

Could it be that your header files are messed up somehow? This really shouldn't happen... – einpoklum Commented Nov 19, 2024 at 7:32
This sometimes happens to me if there is a syntax error somewhere else in the file, this stops the parser from processing the CUDA extensions. Scroll further down the list of errors and see if there are any c++ (not cuda) issues, fix those and recompile. The uint looks suspect, try and replace it with unsigned or uint32_t – Johan Commented Nov 19, 2024 at 7:55
1 you have other errors on that line such as the use of uint on windows without a typedef. fix all other errors and this attribute error will go away stackoverflow/questions/3552094/c-uint-unsigned-int-int – Robert Crovella Commented Nov 19, 2024 at 14:37

Add a comment |

1 Answer 1

Sorted by: Reset to default 1

From your error message I can see that you are running on Windows.

Only on Linux is uint defined, on Windows it is not.

You can fix this easily, by using unsigned instead, or just define uint as needed. The following fix will make your code compile and run correctly.

Add thus define uint as needed (note that on modern platforms unsigned means unsigned int, aka uint32_t):

#ifndef uint
    typedef unsigned uint;
#endif

This problem manifests as it does, because the nvcc preprocessor cannot translate the __global__, __device__ etc directives in code that with faulty syntax. If there are (too many) syntax errors, then the nvcc system will just feed the code into the c++ compiler as is without processing the CUDA specific extensions. That compiler will then fail on those extensions, resulting in the errors that you see.

In general when you see a lot of errors relating to CUDA stuff the odds are high that you have a simple syntax error somewhere tripping up the nvcc preprocessor.

The full code (see below) now compiles and runs correctly, outputting the correct result.

#include <cuda_runtime.h>
#include <iostream>
#include <vector>

#ifndef uint
   typedef unsigned uint;
#endif

__global__ void kernel(uint* A, uint* B, int row) {
    auto x = threadIdx.x / 4;
    auto y = threadIdx.x % 4;
    A[x * row + y] = x;
    B[x * row + y] = y;
}

int main(int argc, char** argv) {
    uint* Xs, * Ys;
    uint* Xs_d, * Ys_d;

    uint SIZE = 4;

    Xs = (uint*)malloc(SIZE * SIZE * sizeof(uint));
    Ys = (uint*)malloc(SIZE * SIZE * sizeof(uint));

    cudaMalloc((void**)&Xs_d, SIZE * SIZE * sizeof(uint));
    cudaMalloc((void**)&Ys_d, SIZE * SIZE * sizeof(uint));

    dim3 grid_size(1, 1, 1);
    dim3 block_size(4 * 4);

    kernel << <grid_size, block_size >> > (Xs_d, Ys_d, 4);

    cudaMemcpy(Xs, Xs_d, SIZE * SIZE * sizeof(uint), cudaMemcpyDeviceToHost);
    cudaMemcpy(Ys, Ys_d, SIZE * SIZE * sizeof(uint), cudaMemcpyDeviceToHost);

    cudaDeviceSynchronize();

    for (int row = 0; row < SIZE; ++row) {
        for (int col = 0; col < SIZE; ++col) {
            std::cout << "[" << Xs[row * SIZE + col] << "|" << Ys[row * SIZE + col]
                << "] ";
        }
        std::cout << "\n";
    }

    cudaFree(Xs_d);
    cudaFree(Ys_d);
    free(Xs);
    free(Ys);
}

nvcc not recognizing a kernel on Windows 11 with cuda 12.5 (I can compile other *.cu file fine with powershell)

nvcc -arch=sm_89 .\simplest_kernel.cu

#include <cuda_runtime.h>
#include <iostream>
#include <vector>

__global__ void kernel(uint *A, uint *B, int row) {
  auto x = threadIdx.x / 4;
  auto y = threadIdx.x % 4;
  A[x * row + y] = x;
  B[x * row + y] = y;
}

int main(int argc, char **argv) {
  uint *Xs, *Ys;
  uint *Xs_d, *Ys_d;

  uint SIZE = 4;

  Xs = (uint *)malloc(SIZE * SIZE * sizeof(uint));
  Ys = (uint *)malloc(SIZE * SIZE * sizeof(uint));

  cudaMalloc((void **)&Xs_d, SIZE * SIZE * sizeof(uint));
  cudaMalloc((void **)&Ys_d, SIZE * SIZE * sizeof(uint));

  dim3 grid_size(1, 1, 1);
  dim3 block_size(4 * 4);

  kernel<<<grid_size, block_size>>>(Xs_d, Ys_d, 4);

  cudaMemcpy(Xs, Xs_d, SIZE * SIZE * sizeof(uint), cudaMemcpyDeviceToHost);
  cudaMemcpy(Ys, Ys_d, SIZE * SIZE * sizeof(uint), cudaMemcpyDeviceToHost);

  cudaDeviceSynchronize();

  for (int row = 0; row < SIZE; ++row) {
    for (int col = 0; col < SIZE; ++col) {
      std::cout << "[" << Xs[row * SIZE + col] << "|" << Ys[row * SIZE + col]
                << "] ";
    }
    std::cout << "\n";
  }

  cudaFree(Xs_d);
  cudaFree(Ys_d);
  free(Xs);
  free(Ys);
}

results in

PS D:\samples\api\SGEMM_CUDA> nvcc -arch=sm_89 .\simplest_kernel.cu
simplest_kernel.cu
D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: attribute "__global__" does not apply here
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
             ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: incomplete type "void" is not allowed
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                              ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: identifier "uint" is undefined
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                     ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: identifier "A" is undefined
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                           ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: identifier "B" is undefined
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                                    ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: type name is not allowed
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                                       ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: expected a ")"
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                                           ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: expected a ";"
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {

nvcc not recognizing a kernel on Windows 11 with cuda 12.5 (I can compile other *.cu file fine with powershell)

nvcc -arch=sm_89 .\simplest_kernel.cu

#include <cuda_runtime.h>
#include <iostream>
#include <vector>

__global__ void kernel(uint *A, uint *B, int row) {
  auto x = threadIdx.x / 4;
  auto y = threadIdx.x % 4;
  A[x * row + y] = x;
  B[x * row + y] = y;
}

int main(int argc, char **argv) {
  uint *Xs, *Ys;
  uint *Xs_d, *Ys_d;

  uint SIZE = 4;

  Xs = (uint *)malloc(SIZE * SIZE * sizeof(uint));
  Ys = (uint *)malloc(SIZE * SIZE * sizeof(uint));

  cudaMalloc((void **)&Xs_d, SIZE * SIZE * sizeof(uint));
  cudaMalloc((void **)&Ys_d, SIZE * SIZE * sizeof(uint));

  dim3 grid_size(1, 1, 1);
  dim3 block_size(4 * 4);

  kernel<<<grid_size, block_size>>>(Xs_d, Ys_d, 4);

  cudaMemcpy(Xs, Xs_d, SIZE * SIZE * sizeof(uint), cudaMemcpyDeviceToHost);
  cudaMemcpy(Ys, Ys_d, SIZE * SIZE * sizeof(uint), cudaMemcpyDeviceToHost);

  cudaDeviceSynchronize();

  for (int row = 0; row < SIZE; ++row) {
    for (int col = 0; col < SIZE; ++col) {
      std::cout << "[" << Xs[row * SIZE + col] << "|" << Ys[row * SIZE + col]
                << "] ";
    }
    std::cout << "\n";
  }

  cudaFree(Xs_d);
  cudaFree(Ys_d);
  free(Xs);
  free(Ys);
}

results in

PS D:\samples\api\SGEMM_CUDA> nvcc -arch=sm_89 .\simplest_kernel.cu
simplest_kernel.cu
D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: attribute "__global__" does not apply here
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
             ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: incomplete type "void" is not allowed
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                              ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: identifier "uint" is undefined
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                     ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: identifier "A" is undefined
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                           ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: identifier "B" is undefined
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                                    ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: type name is not allowed
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                                       ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: expected a ")"
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {
                                                           ^

D:\samples\api\SGEMM_CUDA\simplest_kernel.cu(5): error: expected a ";"
  __declspec(__global__) void kernel(uint *A, uint *B, int row) {

Share Improve this question asked Nov 18, 2024 at 13:40 He Huang 231 silver badge3 bronze badges

Could it be that your header files are messed up somehow? This really shouldn't happen... – einpoklum Commented Nov 19, 2024 at 7:32
This sometimes happens to me if there is a syntax error somewhere else in the file, this stops the parser from processing the CUDA extensions. Scroll further down the list of errors and see if there are any c++ (not cuda) issues, fix those and recompile. The uint looks suspect, try and replace it with unsigned or uint32_t – Johan Commented Nov 19, 2024 at 7:55
1 you have other errors on that line such as the use of uint on windows without a typedef. fix all other errors and this attribute error will go away stackoverflow/questions/3552094/c-uint-unsigned-int-int – Robert Crovella Commented Nov 19, 2024 at 14:37

Add a comment |

1 Answer 1

Sorted by: Reset to default 1

From your error message I can see that you are running on Windows.

Only on Linux is uint defined, on Windows it is not.

You can fix this easily, by using unsigned instead, or just define uint as needed. The following fix will make your code compile and run correctly.

Add thus define uint as needed (note that on modern platforms unsigned means unsigned int, aka uint32_t):

#ifndef uint
    typedef unsigned uint;
#endif

In general when you see a lot of errors relating to CUDA stuff the odds are high that you have a simple syntax error somewhere tripping up the nvcc preprocessor.

The full code (see below) now compiles and runs correctly, outputting the correct result.

#include <cuda_runtime.h>
#include <iostream>
#include <vector>

#ifndef uint
   typedef unsigned uint;
#endif

__global__ void kernel(uint* A, uint* B, int row) {
    auto x = threadIdx.x / 4;
    auto y = threadIdx.x % 4;
    A[x * row + y] = x;
    B[x * row + y] = y;
}

int main(int argc, char** argv) {
    uint* Xs, * Ys;
    uint* Xs_d, * Ys_d;

    uint SIZE = 4;

    Xs = (uint*)malloc(SIZE * SIZE * sizeof(uint));
    Ys = (uint*)malloc(SIZE * SIZE * sizeof(uint));

    cudaMalloc((void**)&Xs_d, SIZE * SIZE * sizeof(uint));
    cudaMalloc((void**)&Ys_d, SIZE * SIZE * sizeof(uint));

    dim3 grid_size(1, 1, 1);
    dim3 block_size(4 * 4);

    kernel << <grid_size, block_size >> > (Xs_d, Ys_d, 4);

    cudaMemcpy(Xs, Xs_d, SIZE * SIZE * sizeof(uint), cudaMemcpyDeviceToHost);
    cudaMemcpy(Ys, Ys_d, SIZE * SIZE * sizeof(uint), cudaMemcpyDeviceToHost);

    cudaDeviceSynchronize();

    for (int row = 0; row < SIZE; ++row) {
        for (int col = 0; col < SIZE; ++col) {
            std::cout << "[" << Xs[row * SIZE + col] << "|" << Ys[row * SIZE + col]
                << "] ";
        }
        std::cout << "\n";
    }

    cudaFree(Xs_d);
    cudaFree(Ys_d);
    free(Xs);
    free(Ys);
}

本文标签： cerror attribute quotglobalquot does not apply hereStack Overflow

版权声明：本文标题：c++ - error: attribute "__global__" does not apply here - Stack Overflow 内容由热心网友自发贡献，该文观点仅代表作者本人，转载请联系作者并注明出处：http://it.en369.cn/questions/1745618494a2159433.html，本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容，一经查实，本站将立刻删除。

369IT编程

c++ - error: attribute "global" does not apply here - Stack Overflow

1 Answer 1

1 Answer 1

更多相关文章