如何从Cudafy c#中返回一个值进行GPU计算

本文关键字:一个 计算 GPU Cudafy 返回 | 更新日期: 2023-09-27 18:12:57

My Issue

嘿,所以我正在做这个简单的计算,以找到0到100度之间的罪过之和(因为我用它作为我的系统的基准),计算不是问题,我的问题是,我是Cudafy的新手,我不确定如何正确地传递和返回值,以便它可以打印出来,这是我的代码:

    public const int N = 33 * 1024;
    public const int threadsPerBlock = 256;
    public const int blocksPerGrid = 32;                                           
    public static void Main()
    {
        Stopwatch watch = new Stopwatch();                                          
        watch.Start();                                                              
        string Text = "";
        int iterations = 1000000;
        CudafyModule km = CudafyTranslator.Cudafy();
        GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
        gpu.LoadModule(km);
        double[] dev_Value = gpu.Allocate<double>();
        gpu.Launch(blocksPerGrid, threadsPerBlock).SumOfSines(iterations,dev_Value);                                                     
        double Value;
        gpu.CopyFromDevice(dev_Value, out Value);
        watch.Stop();                                                                                                
        Text = watch.Elapsed.TotalSeconds.ToString();                                                                
        Console.WriteLine("The process took a total of: " + Text + " Seconds");
        Console.WriteLine(Value);
        Console.Read();
        gpu.FreeAll();
    }
    [Cudafy]
    public static void SumOfSines(GThread thread,int iterations,double [] Value)
    {
        double total = new double();
        double degAsRad = Math.PI / 180.0;
        for (int i = 0; i < iterations; i++)
        {
            total = 0.0;
            for (int z = 1; z < 101; z++)
            {
                double angle = (double)z * degAsRad;
                total += Math.Sin(angle);
            }
        }
        Value[0] = total;

    }

我试图从CUDAfy部分提取的值是总数,然后将其打印出来,以及打印基准测试的时间。如果有人能提出建议,我将不胜感激(还有任何关于摆脱任何无用的行或低效的部分的建议也会很好)。

如何从Cudafy c#中返回一个值进行GPU计算

没关系,我找到了答案,但我会把它贴在这里:

    public const int N = 33 * 1024;
    public const int threadsPerBlock = 256;
    public const int blocksPerGrid = 32;
    public static void Main()
    {
        Stopwatch watch = new Stopwatch();
        watch.Start();
        CudafyModule km = CudafyTranslator.Cudafy();
        GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
        gpu.LoadModule(km);
        string Text = "";
        int iterations = 1000000;
        double Value;
        double[] dev_Value = gpu.Allocate<double>(iterations * sizeof(double));
        gpu.Launch(blocksPerGrid, threadsPerBlock).SumOfSines(iterations, dev_Value);
        gpu.CopyFromDevice(dev_Value, out Value);
        watch.Stop();
        Text = watch.Elapsed.TotalSeconds.ToString();
        Console.WriteLine("The process took a total of: " + Text + " Seconds");
        Console.WriteLine(Value);
        Console.Read();
        gpu.FreeAll();
    }
    [Cudafy]
    public static void SumOfSines(GThread thread, int _iterations, double[] Value)
    {
        int threadID = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
        int numThreads = thread.blockDim.x * thread.gridDim.x;
        if (threadID < _iterations){
            for (int i = threadID; i < _iterations; i += numThreads)
            {
                double _degAsRad = Math.PI / 180;
                Value[i] = 0.0;
                for (int a = 0; a < 100; a++)
                {
                    double angle = (double)a * _degAsRad;
                    Value[i] += Math.Sin(angle);
                }
            }
        }
    }

杰克